@brainpilot/skills 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/package.json +2 -2
  2. package/skills/01_Meta-Skills/academic-research-hub/SKILL.md +108 -0
  3. package/skills/01_Meta-Skills/academic-research-hub/scripts/requirements.txt +17 -0
  4. package/skills/01_Meta-Skills/academic-research-hub/scripts/research.py +781 -0
  5. package/skills/01_Meta-Skills/beautiful-log/SKILL.md +64 -0
  6. package/skills/01_Meta-Skills/beautiful-log/scripts/beautiful_log.py +274 -0
  7. package/skills/01_Meta-Skills/ethoclaw-daily-paper/SKILL.md +130 -0
  8. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/config.template.yaml +54 -0
  9. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/top5_digest_template.md +5 -0
  10. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/build_top5_digest.py +300 -0
  11. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/common.py +137 -0
  12. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/merge_results.py +106 -0
  13. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/run_pipeline.py +177 -0
  14. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_arxiv.py +162 -0
  15. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_pubmed.py +202 -0
  16. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/SKILL.md +173 -0
  17. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/scripts/normalize_data.py +874 -0
  18. package/skills/01_Meta-Skills/ethoclaw-pdf-research/SKILL.md +134 -0
  19. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/confirmation-prompts.md +31 -0
  20. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/output-patterns.md +45 -0
  21. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_markdown_deliverables.py +41 -0
  22. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_research_log.py +84 -0
  23. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_summary_md.py +63 -0
  24. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/extract_pdf_bundle.py +140 -0
  25. package/skills/01_Meta-Skills/experiment-controller/SKILL.md +140 -0
  26. package/skills/01_Meta-Skills/knowledge-graph-builder/SKILL.md +366 -0
  27. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/entity_resolution.py +120 -0
  28. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/extraction_prompt_template.txt +19 -0
  29. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/graph_query.py +106 -0
  30. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/hypothesis_cli_reference.py +42 -0
  31. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/new_data_source_template.py +116 -0
  32. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/requirements.txt +15 -0
  33. package/skills/01_Meta-Skills/method-design/SKILL.md +61 -0
  34. package/skills/01_Meta-Skills/multi-search-engine/SKILL.md +119 -0
  35. package/skills/01_Meta-Skills/research-idea/SKILL.md +65 -0
  36. package/skills/05_EEG_ERP/eeg-skill/SKILL.md +197 -0
  37. package/skills/05_EEG_ERP/meg-skill/SKILL.md +188 -0
  38. package/skills/05_EEG_ERP/meg-skill/scripts/time_frequency.py +223 -0
  39. package/skills/05_EEG_ERP/mne-eeg-tool/SKILL.md +165 -0
  40. package/skills/05_EEG_ERP/mne-eeg-tool/scripts/eeg_pipeline_reference.py +231 -0
  41. package/skills/05_EEG_ERP/seed-iv-skill/SKILL.md +184 -0
  42. package/skills/05_EEG_ERP/seed-iv-skill/scripts/classify_seed_iv.py +154 -0
  43. package/skills/05_EEG_ERP/seed-iv-skill/scripts/extract_seed_iv_features.py +190 -0
  44. package/skills/05_EEG_ERP/seed-iv-skill/scripts/validate_seed_iv.py +102 -0
  45. package/skills/05_EEG_ERP/seed-vig-skill/SKILL.md +182 -0
  46. package/skills/05_EEG_ERP/seed-vig-skill/scripts/classify_seed_vig.py +165 -0
  47. package/skills/05_EEG_ERP/seed-vig-skill/scripts/extract_seed_vig_features.py +185 -0
  48. package/skills/05_EEG_ERP/seed-vig-skill/scripts/validate_seed_vig.py +88 -0
  49. package/skills/06_fMRI_Neuroimaging/abcd-skill/SKILL.md +308 -0
  50. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/abcd_qc_summary.py +449 -0
  51. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/extract_abcd_phenotype.py +292 -0
  52. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/reorganize_abcd.py +387 -0
  53. package/skills/06_fMRI_Neuroimaging/abide-skill/SKILL.md +302 -0
  54. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/abide_qc_summary.py +317 -0
  55. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/extract_abide_phenotype.py +267 -0
  56. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/reorganize_abide.py +387 -0
  57. package/skills/06_fMRI_Neuroimaging/adhd200-skill/SKILL.md +244 -0
  58. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/adhd200_qc_summary.py +98 -0
  59. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/extract_adhd200_phenotype.py +134 -0
  60. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/reorganize_adhd200.py +206 -0
  61. package/skills/06_fMRI_Neuroimaging/adni-skill/SKILL.md +358 -0
  62. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_adni_task_files.py +1305 -0
  63. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_vqa_from_tasks.py +766 -0
  64. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/reorganize_adni.py +491 -0
  65. package/skills/06_fMRI_Neuroimaging/aibl-skill/SKILL.md +295 -0
  66. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/aibl_qc_summary.py +260 -0
  67. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/extract_aibl_phenotype.py +365 -0
  68. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/reorganize_aibl.py +394 -0
  69. package/skills/06_fMRI_Neuroimaging/aomic-skill/SKILL.md +292 -0
  70. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/aomic_qc_summary.py +258 -0
  71. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/extract_aomic_phenotype.py +284 -0
  72. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/reorganize_aomic.py +322 -0
  73. package/skills/06_fMRI_Neuroimaging/asl-skill/SKILL.md +168 -0
  74. package/skills/06_fMRI_Neuroimaging/asl-skill/scripts/compute_cbf.py +224 -0
  75. package/skills/06_fMRI_Neuroimaging/bids-organizer/SKILL.md +241 -0
  76. package/skills/06_fMRI_Neuroimaging/bold5000-skill/SKILL.md +186 -0
  77. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/bold5000_qc_summary.py +96 -0
  78. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/extract_bold5000_stimulus.py +125 -0
  79. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/reorganize_bold5000.py +102 -0
  80. package/skills/06_fMRI_Neuroimaging/camcan-skill/SKILL.md +213 -0
  81. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/camcan_qc_summary.py +131 -0
  82. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/extract_camcan_phenotype.py +145 -0
  83. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/validate_camcan.py +141 -0
  84. package/skills/06_fMRI_Neuroimaging/cobre-skill/SKILL.md +201 -0
  85. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/cobre_qc_summary.py +95 -0
  86. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/extract_cobre_phenotype.py +104 -0
  87. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/reorganize_cobre.py +140 -0
  88. package/skills/06_fMRI_Neuroimaging/conn-tool/SKILL.md +180 -0
  89. package/skills/06_fMRI_Neuroimaging/dcm2nii/SKILL.md +189 -0
  90. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/SKILL.md +183 -0
  91. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/dmt_har_med_qc_summary.py +96 -0
  92. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/extract_dmt_har_med_phenotype.py +121 -0
  93. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/reorganize_dmt_har_med.py +125 -0
  94. package/skills/06_fMRI_Neuroimaging/dwi-skill/SKILL.md +359 -0
  95. package/skills/06_fMRI_Neuroimaging/fmri-skill/SKILL.md +371 -0
  96. package/skills/06_fMRI_Neuroimaging/fmriprep-tool/SKILL.md +228 -0
  97. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/SKILL.md +286 -0
  98. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/scripts/freesurfer_processor.py +145 -0
  99. package/skills/06_fMRI_Neuroimaging/fsl-tool/SKILL.md +208 -0
  100. package/skills/06_fMRI_Neuroimaging/hbn-skill/SKILL.md +271 -0
  101. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/extract_hbn_phenotype.py +107 -0
  102. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/hbn_qc_summary.py +96 -0
  103. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/reorganize_hbn.py +150 -0
  104. package/skills/06_fMRI_Neuroimaging/hcpa-skill/SKILL.md +210 -0
  105. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/extract_hcpa_phenotype.py +146 -0
  106. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/hcpa_qc_summary.py +120 -0
  107. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/reorganize_hcpa.py +155 -0
  108. package/skills/06_fMRI_Neuroimaging/hcpd-skill/SKILL.md +210 -0
  109. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/extract_hcpd_phenotype.py +148 -0
  110. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/hcpd_qc_summary.py +125 -0
  111. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/reorganize_hcpd.py +146 -0
  112. package/skills/06_fMRI_Neuroimaging/hcpep-skill/SKILL.md +215 -0
  113. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/extract_hcpep_phenotype.py +157 -0
  114. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/hcpep_qc_summary.py +143 -0
  115. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/reorganize_hcpep.py +146 -0
  116. package/skills/06_fMRI_Neuroimaging/hcppipeline-tool/SKILL.md +217 -0
  117. package/skills/06_fMRI_Neuroimaging/hcpya-skill/SKILL.md +214 -0
  118. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/extract_hcpya_phenotype.py +190 -0
  119. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/hcpya_qc_summary.py +152 -0
  120. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/reorganize_hcpya.py +203 -0
  121. package/skills/06_fMRI_Neuroimaging/ixi-skill/SKILL.md +198 -0
  122. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/ixi_qc_summary.py +137 -0
  123. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/reorganize_ixi.py +190 -0
  124. package/skills/06_fMRI_Neuroimaging/mnd-skill/SKILL.md +191 -0
  125. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/extract_mnd_phenotype.py +143 -0
  126. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/mnd_qc_summary.py +120 -0
  127. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/validate_mnd.py +107 -0
  128. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/SKILL.md +203 -0
  129. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/analyze_lesions.py +119 -0
  130. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/longitudinal_lesion.py +148 -0
  131. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/mschallenge_qc_summary.py +132 -0
  132. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/validate_mschallenge.py +116 -0
  133. package/skills/06_fMRI_Neuroimaging/nibabel-skill/SKILL.md +184 -0
  134. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/atlas_coordinate_reference.py +61 -0
  135. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/freesurfer_io_reference.py +34 -0
  136. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/nifti_inspection_reference.py +35 -0
  137. package/skills/06_fMRI_Neuroimaging/nifd-skill/SKILL.md +205 -0
  138. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/extract_nifd_phenotype.py +132 -0
  139. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/nifd_qc_summary.py +111 -0
  140. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/validate_nifd.py +111 -0
  141. package/skills/06_fMRI_Neuroimaging/nii2dcm/SKILL.md +143 -0
  142. package/skills/06_fMRI_Neuroimaging/nilearn-tool/SKILL.md +266 -0
  143. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/connectome_reference.py +65 -0
  144. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/denoise_timeseries_reference.py +58 -0
  145. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/hierarchical_parcellation_reference.py +53 -0
  146. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/kmeans_parcellation_reference.py +53 -0
  147. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/preprocess_bold_reference.py +76 -0
  148. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_dictlearning_reference.py +56 -0
  149. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_ica_reference.py +59 -0
  150. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/second_level_glm_reference.py +58 -0
  151. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/spacenet_classifier_reference.py +59 -0
  152. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/svm_classifier_reference.py +60 -0
  153. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/task_glm_reference.py +63 -0
  154. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/zalff_summary_reference.py +109 -0
  155. package/skills/06_fMRI_Neuroimaging/nsd-skill/SKILL.md +210 -0
  156. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/extract_nsd_stimulus.py +171 -0
  157. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/nsd_qc_summary.py +142 -0
  158. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/validate_nsd.py +142 -0
  159. package/skills/06_fMRI_Neuroimaging/oasis-skill/SKILL.md +205 -0
  160. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/extract_oasis_phenotype.py +126 -0
  161. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/oasis_qc_summary.py +115 -0
  162. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/validate_oasis.py +119 -0
  163. package/skills/06_fMRI_Neuroimaging/pet-skill/SKILL.md +173 -0
  164. package/skills/06_fMRI_Neuroimaging/pet-skill/scripts/compute_suvr.py +202 -0
  165. package/skills/06_fMRI_Neuroimaging/pnc-skill/SKILL.md +206 -0
  166. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/extract_pnc_phenotype.py +136 -0
  167. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/pnc_qc_summary.py +116 -0
  168. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/validate_pnc.py +120 -0
  169. package/skills/06_fMRI_Neuroimaging/ppmi-skill/SKILL.md +209 -0
  170. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/extract_ppmi_phenotype.py +138 -0
  171. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/ppmi_qc_summary.py +111 -0
  172. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/validate_ppmi.py +117 -0
  173. package/skills/06_fMRI_Neuroimaging/qsiprep-tool/SKILL.md +320 -0
  174. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/SKILL.md +215 -0
  175. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/extract_rest_mdd_phenotype.py +132 -0
  176. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/harmonize_sites.py +152 -0
  177. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/rest_mdd_qc_summary.py +124 -0
  178. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/validate_rest_mdd.py +103 -0
  179. package/skills/06_fMRI_Neuroimaging/smri-skill/SKILL.md +302 -0
  180. package/skills/06_fMRI_Neuroimaging/tcp-skill/SKILL.md +204 -0
  181. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/extract_tcp_phenotype.py +139 -0
  182. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/tcp_qc_summary.py +111 -0
  183. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/validate_tcp.py +99 -0
  184. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/SKILL.md +217 -0
  185. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/extract_ucla_cnp_phenotype.py +145 -0
  186. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/ucla_cnp_qc_summary.py +111 -0
  187. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/validate_ucla_cnp.py +113 -0
  188. package/skills/06_fMRI_Neuroimaging/ukb-skill/SKILL.md +310 -0
  189. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/build_ukb_survival.py +210 -0
  190. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_cases.py +308 -0
  191. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_phenotype.py +232 -0
  192. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/ukb_qc_summary.py +158 -0
  193. package/skills/06_fMRI_Neuroimaging/wmh-segmentation/SKILL.md +133 -0
  194. package/skills/07_Computational_Modeling/detrending/SKILL.md +118 -0
  195. package/skills/07_Computational_Modeling/dictlearning/SKILL.md +122 -0
  196. package/skills/07_Computational_Modeling/filtering/SKILL.md +121 -0
  197. package/skills/07_Computational_Modeling/glm/SKILL.md +153 -0
  198. package/skills/07_Computational_Modeling/hierarchical/SKILL.md +121 -0
  199. package/skills/07_Computational_Modeling/ica/SKILL.md +122 -0
  200. package/skills/07_Computational_Modeling/kmeans/SKILL.md +119 -0
  201. package/skills/07_Computational_Modeling/run_models/SKILL.md +427 -0
  202. package/skills/07_Computational_Modeling/spacenet/SKILL.md +122 -0
  203. package/skills/07_Computational_Modeling/svm/SKILL.md +120 -0
  204. package/skills/08_Computational_Neuroscience/brain_gnn/SKILL.md +183 -0
  205. package/skills/08_Computational_Neuroscience/dipy-tool/SKILL.md +239 -0
  206. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/dti_metrics_reference.py +70 -0
  207. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/load_and_mask_reference.py +76 -0
  208. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/roi_stats_reference.py +59 -0
  209. package/skills/08_Computational_Neuroscience/fm_app/SKILL.md +195 -0
  210. package/skills/08_Computational_Neuroscience/neurostorm/SKILL.md +151 -0
  211. package/skills/13_Visualization/brain-visualization/SKILL.md +191 -0
  212. package/skills/13_Visualization/brain-visualization/scripts/connectome_reference.py +108 -0
  213. package/skills/13_Visualization/brain-visualization/scripts/freesurfer_ply_reference.py +54 -0
  214. package/skills/13_Visualization/brain-visualization/scripts/zalff_summary_reference.py +116 -0
  215. package/skills/13_Visualization/ethoclaw-paper-figure-layout/SKILL.md +78 -0
  216. package/skills/13_Visualization/ethoclaw-paper-figure-layout/assets/naturecomm_figures.tex +74 -0
  217. package/skills/13_Visualization/ethoclaw-paper-figure-layout/scripts/layout_results_foldered.py +579 -0
  218. package/skills/14_Writing/overleaf-skill/SKILL.md +184 -0
  219. package/skills/14_Writing/overleaf-skill/scripts/install.sh +30 -0
  220. package/skills/14_Writing/paper-writing/SKILL.md +146 -0
  221. package/skills/14_Writing/paper-writing/scripts/data_statement_templates.py +164 -0
  222. package/skills/14_Writing/paper-writing/scripts/figure_templates.py +315 -0
  223. package/skills/14_Writing/paper-writing/scripts/nature_figure_style.py +214 -0
  224. package/skills/14_Writing/paper-writing/scripts/section_phrasebank.py +246 -0
  225. package/skills/16_Animal_Behavior/deeplabcut/SKILL.md +154 -0
  226. package/skills/16_Animal_Behavior/deeplabcut/references/3d-pose.md +89 -0
  227. package/skills/16_Animal_Behavior/deeplabcut/references/maDLC.md +123 -0
  228. package/skills/16_Animal_Behavior/deeplabcut/references/modelzoo.md +98 -0
  229. package/skills/16_Animal_Behavior/deeplabcut/references/standard-pipeline.md +165 -0
  230. package/skills/16_Animal_Behavior/deeplabcut/references/utilities.md +146 -0
  231. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/SKILL.md +274 -0
  232. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.html +112 -0
  233. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.md +21 -0
  234. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/cluster-section.md +5 -0
  235. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/heatmap-section.md +5 -0
  236. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/integrated-interpretation.md +3 -0
  237. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/overview.md +3 -0
  238. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/project-summary.md +3 -0
  239. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/radar-section.md +5 -0
  240. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/raw-trajectory.md +3 -0
  241. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/sample-check.md +3 -0
  242. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/single-subject-section.md +3 -0
  243. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/stats-section.md +5 -0
  244. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/epm.md +52 -0
  245. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/fst.md +37 -0
  246. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/nor.md +39 -0
  247. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/oft.md +43 -0
  248. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tcst.md +45 -0
  249. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tst.md +36 -0
  250. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/input-types.md +59 -0
  251. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/interpretation-guardrails.md +45 -0
  252. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/metadata-schema.md +57 -0
  253. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/report-sections.md +86 -0
  254. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/section-selection-rules.md +169 -0
  255. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/build_report_manifest.py +27 -0
  256. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/render_report.py +34 -0
  257. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/report_utils.py +1121 -0
  258. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/SKILL.md +390 -0
  259. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/reference_code.py +98 -0
  260. package/skills/16_Animal_Behavior/ethoclaw-animal-pose-estimation/SKILL.md +336 -0
  261. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/README.md +21 -0
  262. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/SKILL.md +41 -0
  263. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/batch_kinematic_generator.py +663 -0
  264. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/config.json +19 -0
  265. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/generate_kinematic_parameter.py +401 -0
  266. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/kinematic_generator.py +265 -0
  267. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/SKILL.md +72 -0
  268. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/references/config.example.toml +56 -0
  269. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params.py +232 -0
  270. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params_from_config.py +236 -0
  271. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/SKILL.md +68 -0
  272. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/references/notes.md +5 -0
  273. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/scripts/plot_h5_radar.py +513 -0
  274. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/SKILL.md +52 -0
  275. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/config.toml +81 -0
  276. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/references/stats-rule.md +18 -0
  277. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_inspect.py +79 -0
  278. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_batch.py +624 -0
  279. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_stats.py +438 -0
  280. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/SKILL.md +280 -0
  281. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_trajectory.py +790 -0
  282. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_velocity.py +855 -0
  283. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.csv +101 -0
  284. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.h5 +0 -0
  285. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_data_readme.md +126 -0
@@ -0,0 +1,366 @@
1
+ ---
2
+ name: knowledge-graph-builder
3
+ description: "Use this skill when users need to build, populate, or extend a domain-specific knowledge graph from literature and structured databases. Triggers include: 'build knowledge graph', 'extract claims from papers', 'ingest data into graph', 'batch extract claims', 'knowledge graph construction', 'populate graph from PubMed', 'extract structured claims', 'ingest atlas data', or any request involving knowledge graph population from scientific literature or biomedical databases. Covers both structured data ingestion (Phase 1) and LLM-based claim extraction from papers (Phase 2)."
4
+ license: MIT License (NeuroClaw custom skill – freely modifiable within the project)
5
+ layer: base
6
+ skill_type: tool
7
+ dependencies:
8
+ - multi-search-engine
9
+ - academic-research-hub
10
+ ---
11
+ # Knowledge Graph Builder
12
+
13
+ ## Overview
14
+
15
+ This skill provides a reusable framework for constructing domain-specific knowledge graphs by combining two complementary data pipelines:
16
+
17
+ - **Phase 1 — Structured Ingestion**: Import concepts and relations from curated databases, ontologies, and brain atlases (e.g., NeuroNames, MeSH, DisGeNET, Cognitive Atlas, Nilearn atlases).
18
+ - **Phase 2 — Literature Claim Extraction**: Use LLMs to extract structured scientific claims from PubMed paper abstracts, then resolve entities and ingest into the graph.
19
+ - **Phase 3 — Hypothesis Engine**: Traverse the graph to find novel connections, contradictions, and unexplored gaps — turning raw claims into testable research hypotheses.
20
+
21
+ The output is a directed knowledge graph (NetworkX DiGraph + JSON serialization) where nodes represent domain concepts and claims, and edges represent typed relationships with confidence scores and provenance.
22
+
23
+ **Primary implementation**: `core/knowledge_graph/` in the NeuroClaw project.
24
+
25
+ ## Architecture
26
+
27
+ ```
28
+ ┌─────────────────────┐
29
+ │ Knowledge Graph │
30
+ │ (NetworkX DiGraph) │
31
+ └──────┬──────────────┘
32
+
33
+ ┌─────────────────┼─────────────────┐
34
+ │ │ │
35
+ ┌────────▼────────┐ ┌─────▼──────────┐ ┌────▼─────────────┐
36
+ │ Phase 1: │ │ Phase 2: │ │ Phase 3: │
37
+ │ Structured │ │ Literature │ │ Hypothesis │
38
+ │ Data Ingestion │ │ Claim Extract │ │ Engine │
39
+ └────────┬────────┘ └──────┬─────────┘ └────┬─────────────┘
40
+ │ │ │
41
+ ┌────────▼────────┐ ┌──────▼─────────┐ ┌────▼─────────────┐
42
+ │ - NeuroNames │ │ - PubMed search│ │ - Path finding │
43
+ │ - MeSH │ │ - LLM extract │ │ - Bridge discover│
44
+ │ - DisGeNET │ │ - Entity resol │ │ - Contradictions │
45
+ │ - Cognitive Atl │ │ - Claim ingest │ │ - Gap detection │
46
+ │ - Nilearn atlas │ │ │ │ - Ranking │
47
+ └─────────────────┘ └────────────────┘ └──────────────────┘
48
+ ```
49
+
50
+ ## Key Design Decisions (Lessons Learned)
51
+
52
+ ### 1. Schema Design: Three-Tier Nodes
53
+
54
+ The graph uses three types of nodes, all stored in the same DiGraph:
55
+
56
+ | Node Type | ID Format | Purpose |
57
+ |-----------|-----------|---------|
58
+ | **ConceptNode** | `NN:1234`, `CUI:xxx`, `MESH:D0001` | Domain concepts (brain regions, diseases, genes, drugs) |
59
+ | **Claim** | `CLM:abc123def456` | Structured scientific claims extracted from papers |
60
+ | **Edge** | (implicit) | Typed relationships between any two nodes |
61
+
62
+ **Why this matters**: Claims are stored as nodes (not just edges) so they can carry full metadata (evidence, p-value, sample size, conditions, population). Simplified edges are also generated for fast traversal.
63
+
64
+ ### 2. Entity Resolution: 5-Level Matching
65
+
66
+ When ingesting claims, entity names must be resolved to existing concept IDs. Use a cascading strategy:
67
+
68
+ 1. **Exact match** on preferred_name
69
+ 2. **Case-insensitive** match
70
+ 3. **Alias match** (check synonyms)
71
+ 4. **Substring match** (entity contained in name or vice versa, prefer shortest name)
72
+ 5. **Create new** concept if no match found
73
+
74
+ **Why not just use embeddings?** For small-to-medium graphs (<100K concepts), string matching is fast and predictable. SapBERT/FAISS alignment is recommended when UMLS is available and the graph exceeds 100K concepts.
75
+
76
+ ### 3. LLM Extraction: Keep Prompts Short
77
+
78
+ LLMs (especially via proxy endpoints) return empty responses when prompts + expected output exceed the token window. Hard-won rules:
79
+
80
+ - **Truncate abstracts** to 2000 chars before sending to LLM
81
+ - **Keep extraction prompts concise** — list field names and allowed values, not verbose descriptions
82
+ - **Use `max_tokens=8192`** — 4096 is too small for papers with many claims
83
+ - **Fix `[[` double brackets** — a common LLM error when outputting JSON arrays
84
+ - **Temperature=0.1** for extraction consistency
85
+
86
+ ### 4. Contextualized Triplets (MDKG-style)
87
+
88
+ Beyond simple (subject, predicate, object), extract:
89
+
90
+ - **Conditions**: list of conditions under which the claim holds (e.g., `["female only", "age > 65"]`)
91
+ - **Population**: study demographics (mean_age, gender distribution, sample size, cohort name)
92
+
93
+ This enables more nuanced graph queries and downstream hypothesis generation.
94
+
95
+ ### 5. Checkpoint/Resume for Batch Jobs
96
+
97
+ Large-scale extraction (10 diseases x 27 years x 20 papers = 5400 papers) takes 10-60 hours. Always implement:
98
+
99
+ - Save checkpoint after each batch (disease+year)
100
+ - Track completed_diseases and completed_years
101
+ - Save graph periodically (every 5 years or after each disease)
102
+ - CSV export of paper metadata for audit trail
103
+
104
+ ## Quick Reference
105
+
106
+ | Task | Command |
107
+ |------|---------|
108
+ | Ingest atlas data (Phase 1) | `python -m core.knowledge_graph.ingest_pipeline` |
109
+ | Generate brain atlas TSV | `python core/knowledge_graph/data/raw/generate_brain_atlas_nilearn.py` |
110
+ | Run single-disease extraction | `python -m core.knowledge_graph.batch_extract --diseases "Alzheimer's disease" --year-start 2024 --year-end 2024 --papers-per-year 5` |
111
+ | Run full batch extraction | `python -m core.knowledge_graph.batch_extract` |
112
+ | Resume from checkpoint | `python -m core.knowledge_graph.batch_extract` (auto-resumes) |
113
+ | Start fresh (ignore checkpoint) | `python -m core.knowledge_graph.batch_extract --no-resume` |
114
+ | Verbose logging | `python -m core.knowledge_graph.batch_extract -v` |
115
+ | Query graph stats | `python -c "from core.knowledge_graph import load_graph; g = load_graph(); print(g.stats())"` |
116
+ | **Batch generate hypotheses** | `python -m core.knowledge_graph.hypothesis_cli batch --output data/hypotheses.json` |
117
+ | **Rank saved hypotheses** | `python -m core.knowledge_graph.hypothesis_cli rank --input data/hypotheses.json --top 20` |
118
+ | Find hypothesis paths | `python -m core.knowledge_graph.hypothesis_cli paths "hippocampus" "Alzheimer Disease"` |
119
+ | Bridge discovery | `python -m core.knowledge_graph.hypothesis_cli bridge "hippocampus" --target-domain disease` |
120
+ | **Discover from concept** | `python -m core.knowledge_graph.hypothesis_cli discover "Alzheimer" --max-hops 3` |
121
+ | **Find trending evidence** | `python -m core.knowledge_graph.hypothesis_cli trending --since 2020 --direction strengthening` |
122
+ | Find contradictions | `python -m core.knowledge_graph.hypothesis_cli contradictions` |
123
+ | Detect gaps | `python -m core.knowledge_graph.hypothesis_cli gaps --domain-a neuroanatomy --domain-b disease` |
124
+ | Explore a concept | `python -m core.knowledge_graph.hypothesis_cli explore "hippocampus"` |
125
+
126
+ ## Agent Reference Rule
127
+
128
+ When the agent needs knowledge graph implementation code, it should first consult the curated snippets in `skills/knowledge-graph-builder/scripts/` instead of writing from scratch.
129
+
130
+ Reference snippets available:
131
+ - `scripts/entity_resolution.py` → EntityResolver class with 5-level matching
132
+ - `scripts/graph_query.py` → CLI tool for graph queries (stats, search, neighbors, paths, domain)
133
+ - `scripts/hypothesis_cli_reference.py` → Hypothesis engine usage patterns (executable code in `core/knowledge_graph/hypothesis_cli.py`)
134
+ - `scripts/extraction_prompt_template.txt` → LLM extraction prompt template
135
+ - `scripts/new_data_source_template.py` → Template for adding new data sources
136
+
137
+ ## Installation
138
+
139
+ ```bash
140
+ # Core dependencies
141
+ pip install networkx requests openai
142
+
143
+ # Optional: for atlas generation
144
+ pip install nilearn nibabel
145
+
146
+ # Optional: for Biopython Entrez (PubMed)
147
+ pip install biopython
148
+
149
+ # Use the neuroclaw conda environment
150
+ conda activate neuroclaw
151
+ ```
152
+
153
+ ## Phase 1: Structured Data Ingestion
154
+
155
+ ### Supported Data Sources
156
+
157
+ | Source | Data Type | Entity Type | Edge Type |
158
+ |--------|-----------|-------------|-----------|
159
+ | NeuroNames / Nilearn atlases | Brain region hierarchy | neuroanatomy | part_of |
160
+ | MeSH (desc*.xml) | Medical subject headings | disease, anatomy | is_a |
161
+ | DisGeNET (TSV) | Gene-disease associations | gene | gene_associated_with_disease |
162
+ | Cognitive Atlas (API) | Tasks, concepts, disorders | cognitive_function, paradigm | — |
163
+ | UMLS (pending) | Unified medical language system | all types | various |
164
+
165
+ ### Adding a New Data Source
166
+
167
+ Create a new file in `core/knowledge_graph/ingestion/` following this pattern:
168
+
169
+ ```python
170
+ """Ingest data from [SOURCE] into the knowledge graph."""
171
+
172
+ from ..schema import ConceptNode, Edge, DomainTag
173
+ from ..graph_manager import KnowledgeGraph
174
+
175
+ def ingest_source(kg: KnowledgeGraph, data_path: str) -> dict:
176
+ """Parse source data and add to graph.
177
+
178
+ Returns summary dict with counts.
179
+ """
180
+ concepts_added = 0
181
+ edges_added = 0
182
+
183
+ # 1. Parse raw data
184
+ records = parse_data(data_path)
185
+
186
+ # 2. Create ConceptNodes
187
+ for record in records:
188
+ node = ConceptNode(
189
+ id=record["id"],
190
+ preferred_name=record["name"],
191
+ domain_tags=[DomainTag.DISEASE.value],
192
+ source_vocab="my_source",
193
+ aliases=record.get("synonyms", []),
194
+ )
195
+ kg.add_concept(node)
196
+ concepts_added += 1
197
+
198
+ # 3. Create Edges (if hierarchical)
199
+ for record in records:
200
+ if record.get("parent_id"):
201
+ edge = Edge(
202
+ source_id=record["id"],
203
+ target_id=record["parent_id"],
204
+ relation_type="is_a",
205
+ source="my_source",
206
+ )
207
+ kg.add_edge(edge)
208
+ edges_added += 1
209
+
210
+ return {"concepts_added": concepts_added, "edges_added": edges_added}
211
+ ```
212
+
213
+ ### Atlas Generation (Nilearn)
214
+
215
+ Use `scripts/generate_atlas.py` as a template for generating brain region hierarchies from Nilearn built-in atlases. Key points:
216
+
217
+ - Start with a manual hierarchy of core brain regions (~100-200)
218
+ - Augment with atlas labels (Talairach, Harvard-Oxford, AAL, Dosenbach, Pauli, Seitzman)
219
+ - Handle SSL issues by patching `requests.Session.verify` before Nilearn calls
220
+ - Output: TSV with columns: NN_ID, Name, Latin_Name, Synonyms, Parent_ID, Brodmann_area
221
+
222
+ ## Phase 2: Literature Claim Extraction
223
+
224
+ ### Pipeline: PubMed Search → LLM Extraction → Entity Resolution → Ingestion
225
+
226
+ ```
227
+ PubMed query → PMIDs → XML parse → (abstract, PaperRef)
228
+
229
+ LLM extraction
230
+
231
+ [Claim objects]
232
+
233
+ Entity resolution
234
+
235
+ Graph ingestion (nodes + edges)
236
+ ```
237
+
238
+ ### PubMed Search Strategy
239
+
240
+ For each disease+year combination, search with neuroimaging focus:
241
+
242
+ ```
243
+ ({disease}[Title/Abstract])
244
+ AND ("brain imaging"[Title/Abstract] OR "neuroimaging"[Title/Abstract]
245
+ OR "MRI"[Title/Abstract] OR "fMRI"[Title/Abstract] OR "PET"[Title/Abstract])
246
+ AND {year}:{year}[pdat]
247
+ ```
248
+
249
+ Rate limit: 0.4s between NCBI API calls (3 req/sec without API key).
250
+
251
+ ### LLM Extraction Prompt Design
252
+
253
+ See `scripts/extraction_prompt_template.txt` for the recommended prompt structure. Key fields to extract:
254
+
255
+ | Field | Description |
256
+ |-------|-------------|
257
+ | subject / object | Entity names |
258
+ | subject_type / object_type | Entity category (brain_region, disease, gene, ...) |
259
+ | predicate | Relationship type (reduces, increases, correlates_with, ...) |
260
+ | negated | Whether the claim states NO relationship |
261
+ | effect_metric / effect_size | Statistical effect (Cohen's d, r, OR, ...) |
262
+ | p_value | Statistical significance |
263
+ | sample_size | Study sample size |
264
+ | study_type | fMRI, PET, GWAS, meta_analysis, ... |
265
+ | conditions | List of contextual conditions |
266
+ | population | Study demographics |
267
+ | raw_sentence | Source sentence from abstract |
268
+
269
+ ### Entity Resolution During Ingestion
270
+
271
+ When a claim references "hippocampus" and the graph already has `NN:11` (preferred_name="Hippocampus"), the entity resolver matches them. If no match is found, a new concept node is created with prefix `CLM_CONCEPT:`.
272
+
273
+ This means the graph grows organically: atlas data provides the backbone, and claim extraction fills in relationships and discovers new entities.
274
+
275
+ ### Claim Node vs. Simplified Edge
276
+
277
+ Each claim generates **three** graph elements:
278
+
279
+ 1. **Claim node** (`CLM:abc123`): full metadata (evidence, conditions, population, raw text)
280
+ 2. **Simplified edge** (subject → object): for fast multi-hop traversal
281
+ 3. **About edges** (claim → subject, claim → object): for provenance queries
282
+
283
+ ## Output Files
284
+
285
+ | File | Description |
286
+ |------|-------------|
287
+ | `data/knowledge_graph.json` | Full graph (concepts + edges + metadata) |
288
+ | `data/papers_metadata.csv` | Paper records: pmid, doi, title, authors, year, journal, disease, abstract_length, n_claims, timestamp |
289
+ | `data/batch_checkpoint.json` | Resume checkpoint: completed_diseases, completed_years, totals |
290
+
291
+ ## Complementary / Related Skills
292
+
293
+ - `academic-research-hub` → paper search (arXiv, PubMed, Semantic Scholar)
294
+ - `research-idea` → consumes knowledge graph for hypothesis generation
295
+ - `method-design` → uses graph structure for method comparison
296
+
297
+ ## Reference
298
+
299
+ - MDKG paper: Gao et al., "Large language model powered knowledge graph construction for mental health exploration." Nature Communications (2025). PMID: 40804250
300
+ - NeuroNames: Brain region hierarchy
301
+ - MeSH: Medical Subject Headings (NLM)
302
+ - DisGeNET: Gene-disease association database
303
+ - Cognitive Atlas: Cognitive paradigm ontology
304
+ - Nilearn: Python brain atlas library
305
+
306
+ ## Phase 3: Hypothesis Engine
307
+
308
+ The hypothesis engine **batch-generates** hypotheses across the entire graph, **persists** them to JSON, and **ranks** by novelty, evidence, testability, and confidence. See `core/knowledge_graph/hypothesis_engine.py` for the implementation.
309
+
310
+ ### Workflow
311
+
312
+ ```
313
+ batch_generate() → save_hypotheses() → rank_hypotheses() → (Phase 5: convert to analysis tasks)
314
+ ```
315
+
316
+ ### Capabilities
317
+
318
+ | Function | Description |
319
+ |----------|-------------|
320
+ | `batch_generate()` | Traverse entire graph, generate hypotheses across all domain pairs |
321
+ | `save_hypotheses()` / `load_hypotheses()` | Persist to JSON for iterative re-ranking |
322
+ | `rank_hypotheses()` | Sort by composite score (4 dimensions) |
323
+ | `find_paths(src, tgt)` | Interactive: multi-hop path finding |
324
+ | `bridge_discovery(concept, domain)` | Interactive: cross-domain connection discovery |
325
+ | `discover_hypotheses(concept)` | Find hypotheses radiating from a single concept to all reachable domains |
326
+ | `find_trending(since_year, direction)` | Find concept pairs with strengthening/weakening evidence over time |
327
+ | `contradiction_detection()` | Find opposing claims on same concept pair |
328
+ | `gap_detection(domain_a, domain_b)` | Find 2-hop concept pairs with no direct edge |
329
+
330
+ ### Scoring (4 Dimensions)
331
+
332
+ Each hypothesis is scored on four dimensions:
333
+
334
+ | Dimension | Weight | What it measures |
335
+ |-----------|--------|------------------|
336
+ | **Confidence** | 0.25 | Edge confidence × study type quality × replicability |
337
+ | **Novelty** | 0.25 | Cross-domain paths, rare relations, few supporting papers |
338
+ | **Evidence** | 0.25 | p-value strength, sample size, effect size presence |
339
+ | **Testability** | 0.25 | Can NeuroClaw execute this? Modality detection (sMRI, EEG, fMRI, PET, DTI), brain region specificity |
340
+
341
+ Composite ranking: `confidence^0.25 * evidence^0.25 * novelty^0.25 * testability^0.25`
342
+
343
+ ### Default Domain Pairs
344
+
345
+ The batch generator explores these cross-domain pairs:
346
+ - neuroanatomy ↔ disease
347
+ - neuroanatomy ↔ cognitive_function
348
+ - disease ↔ gene
349
+ - disease ↔ drug
350
+ - disease ↔ biomarker
351
+ - gene ↔ disease
352
+ - drug ↔ disease
353
+ - cognitive_function ↔ disease
354
+ - neurotransmitter ↔ disease
355
+
356
+ ## Future Work
357
+
358
+ - **SapBERT entity alignment** with UMLS (cosine similarity > 0.9)
359
+ - **LLM-based hypothesis summarization** — use LLM to generate natural language hypothesis descriptions
360
+ - **Result feedback loop**: validated hypotheses write back to graph
361
+ - ~~**Temporal analysis**~~: implemented as `find_trending()` — tracks strengthening/weakening evidence trends across publication years
362
+
363
+ ---
364
+ Created At: 2026-05-04 20:28 HKT
365
+ Last Updated At: 2026-05-06 14:46 HKT
366
+ Author: chengwang96
@@ -0,0 +1,120 @@
1
+ """Reusable entity resolution for knowledge graph construction.
2
+
3
+ Resolves entity names to concept IDs using a 5-level cascading strategy:
4
+ 1. Exact match on preferred_name
5
+ 2. Case-insensitive match
6
+ 3. Alias match
7
+ 4. Substring match (prefer shortest name)
8
+ 5. Create new concept if no match
9
+
10
+ Usage:
11
+ from scripts.entity_resolution import EntityResolver
12
+ resolver = EntityResolver(kg)
13
+ concept_id = resolver.resolve("hippocampus", entity_type="brain_region")
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ from typing import Optional
20
+
21
+ from core.knowledge_graph.src.schema import ConceptNode, DomainTag
22
+ from core.knowledge_graph.src.graph_manager import KnowledgeGraph
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # Map entity type strings to DomainTag values
27
+ ENTITY_TYPE_TO_DOMAIN = {
28
+ "brain_region": DomainTag.NEUROANATOMY,
29
+ "disease": DomainTag.DISEASE,
30
+ "gene": DomainTag.GENE,
31
+ "neurotransmitter": DomainTag.NEUROTRANSMITTER,
32
+ "protein": DomainTag.GENE,
33
+ "drug": DomainTag.DRUG,
34
+ "network": DomainTag.CONNECTIVITY,
35
+ "biomarker": DomainTag.BIOMARKER,
36
+ "cognitive_function": DomainTag.COGNITIVE_FUNCTION,
37
+ }
38
+
39
+
40
+ class EntityResolver:
41
+ """Resolve entity names to concept IDs in a knowledge graph."""
42
+
43
+ def __init__(self, kg: KnowledgeGraph):
44
+ self.kg = kg
45
+
46
+ def resolve(
47
+ self,
48
+ entity_name: str,
49
+ entity_type: str = "",
50
+ source_vocab: str = "entity_resolution",
51
+ ) -> Optional[str]:
52
+ """Resolve an entity name to a concept ID.
53
+
54
+ Args:
55
+ entity_name: The entity name to resolve.
56
+ entity_type: Entity type string (maps to DomainTag).
57
+ source_vocab: Source vocabulary for new concepts.
58
+
59
+ Returns:
60
+ Concept ID string, or None if entity_name is empty.
61
+ """
62
+ if not entity_name:
63
+ return None
64
+
65
+ # 1. Exact match
66
+ for node in self.kg._index.values():
67
+ if node.preferred_name == entity_name:
68
+ return node.id
69
+
70
+ # 2. Case-insensitive match
71
+ entity_lower = entity_name.lower()
72
+ for node in self.kg._index.values():
73
+ if node.preferred_name.lower() == entity_lower:
74
+ return node.id
75
+
76
+ # 3. Alias match
77
+ for node in self.kg._index.values():
78
+ for alias in node.aliases:
79
+ if alias.lower() == entity_lower:
80
+ return node.id
81
+
82
+ # 4. Substring match
83
+ candidates = []
84
+ for node in self.kg._index.values():
85
+ name_lower = node.preferred_name.lower()
86
+ if entity_lower in name_lower or name_lower in entity_lower:
87
+ candidates.append(node)
88
+ continue
89
+ for alias in node.aliases:
90
+ if entity_lower in alias.lower() or alias.lower() in entity_lower:
91
+ candidates.append(node)
92
+ break
93
+
94
+ if len(candidates) == 1:
95
+ return candidates[0].id
96
+ elif len(candidates) > 1:
97
+ candidates.sort(key=lambda n: len(n.preferred_name))
98
+ return candidates[0].id
99
+
100
+ # 5. Not found — create new concept
101
+ return self._create_new_concept(entity_name, entity_type, source_vocab)
102
+
103
+ def _create_new_concept(
104
+ self,
105
+ name: str,
106
+ entity_type: str,
107
+ source_vocab: str,
108
+ ) -> str:
109
+ """Create a new concept node for an unresolved entity."""
110
+ domain = ENTITY_TYPE_TO_DOMAIN.get(entity_type, DomainTag.DISEASE)
111
+ new_id = f"CLM_CONCEPT:{name.replace(' ', '_')}"
112
+
113
+ self.kg.add_concept(ConceptNode(
114
+ id=new_id,
115
+ preferred_name=name,
116
+ domain_tags=[domain.value],
117
+ source_vocab=source_vocab,
118
+ ))
119
+ logger.info(f"created new concept: {new_id} ({name})")
120
+ return new_id
@@ -0,0 +1,19 @@
1
+ Extract ALL scientific claims from this neuroscience paper abstract as JSON array.
2
+
3
+ Each claim object fields:
4
+ - subject, subject_type, predicate, object, object_type, negated
5
+ - effect_metric, effect_size, p_value, sample_size
6
+ - study_type, methodology, replicability, direction, raw_sentence
7
+ - conditions: list of conditions under which this claim holds (e.g. ["female only", "age > 65", "resting-state fMRI"]). Empty list [] if unconditional.
8
+ - population: object with study population info, null if not reported:
9
+ {{"mean_age": number or null, "age_range": "e.g. 18-65" or null, "n_female": int or null, "n_male": int or null, "ethnicity": str or null, "cohort_name": str or null}}
10
+
11
+ Types: brain_region, disease, gene, neurotransmitter, protein, drug, network, biomarker, cognitive_function
12
+ Predicates: reduces, increases, correlates_with, causes, is_biomarker_of, is_risk_factor_for, treats, modulates, activates, inhibits, predicts, mediates, is_associated_with, distinguishes
13
+ Study types: fMRI, PET, DTI, sMRI, EEG, MEG, lesion, meta_analysis, GWAS, animal_model, clinical_trial, case_control, longitudinal, cross_sectional, review
14
+
15
+ Title: {title}
16
+ PMID: {pmid}
17
+ Abstract: {abstract}
18
+
19
+ Return JSON array. Empty array [] if no claims.
@@ -0,0 +1,106 @@
1
+ """Quick graph query helpers for knowledge graph exploration.
2
+
3
+ Usage:
4
+ python scripts/graph_query.py stats
5
+ python scripts/graph_query.py search "hippocampus"
6
+ python scripts/graph_query.py neighbors "NN:11" --relation part_of
7
+ python scripts/graph_query.py paths "NN:11" "DOID:10652" --max-hops 3
8
+ python scripts/graph_query.py domain neuroanatomy
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ from core.knowledge_graph.src.storage import load_graph
19
+
20
+
21
+ def cmd_stats(kg):
22
+ stats = kg.stats()
23
+ print(json.dumps(stats, indent=2))
24
+
25
+
26
+ def cmd_search(kg, query, limit=20):
27
+ results = kg.search_by_name(query, limit)
28
+ for node in results:
29
+ tags = ", ".join(node.domain_tags)
30
+ print(f" {node.id:30s} {node.preferred_name:30s} [{tags}]")
31
+
32
+
33
+ def cmd_neighbors(kg, concept_id, relation=None, direction="out"):
34
+ neighbors = kg.get_neighbors(concept_id, relation_type=relation, direction=direction)
35
+ if not neighbors:
36
+ print(f"No neighbors found for {concept_id}")
37
+ return
38
+ for target_id, edge in neighbors:
39
+ print(f" {edge.source_id} --[{edge.relation_type}]--> {edge.target_id} (conf={edge.confidence:.2f})")
40
+
41
+
42
+ def cmd_paths(kg, source, target, max_hops=3):
43
+ paths = kg.find_paths(source, target, max_hops)
44
+ if not paths:
45
+ print(f"No paths found between {source} and {target}")
46
+ return
47
+ print(f"Found {len(paths)} path(s):")
48
+ for i, path in enumerate(paths):
49
+ hops = " -> ".join(f"{n}[{r}]" for n, r in path)
50
+ print(f" Path {i+1}: {hops}")
51
+
52
+
53
+ def cmd_domain(kg, domain_tag):
54
+ nodes = kg.search_by_domain(domain_tag)
55
+ print(f"Domain '{domain_tag}': {len(nodes)} concepts")
56
+ for node in nodes[:20]:
57
+ print(f" {node.id:30s} {node.preferred_name}")
58
+ if len(nodes) > 20:
59
+ print(f" ... and {len(nodes) - 20} more")
60
+
61
+
62
+ def main():
63
+ parser = argparse.ArgumentParser(description="Knowledge graph query tool")
64
+ parser.add_argument("--graph", default=None, help="Path to graph JSON")
65
+ sub = parser.add_subparsers(dest="command")
66
+
67
+ sub.add_parser("stats")
68
+
69
+ p_search = sub.add_parser("search")
70
+ p_search.add_argument("query")
71
+ p_search.add_argument("--limit", type=int, default=20)
72
+
73
+ p_neighbors = sub.add_parser("neighbors")
74
+ p_neighbors.add_argument("concept_id")
75
+ p_neighbors.add_argument("--relation", default=None)
76
+ p_neighbors.add_argument("--direction", default="out", choices=["out", "in", "both"])
77
+
78
+ p_paths = sub.add_parser("paths")
79
+ p_paths.add_argument("source")
80
+ p_paths.add_argument("target")
81
+ p_paths.add_argument("--max-hops", type=int, default=3)
82
+
83
+ p_domain = sub.add_parser("domain")
84
+ p_domain.add_argument("domain_tag")
85
+
86
+ args = parser.parse_args()
87
+
88
+ graph_path = Path(args.graph) if args.graph else Path("core/knowledge_graph/data/knowledge_graph.json")
89
+ kg = load_graph(graph_path)
90
+
91
+ if args.command == "stats":
92
+ cmd_stats(kg)
93
+ elif args.command == "search":
94
+ cmd_search(kg, args.query, args.limit)
95
+ elif args.command == "neighbors":
96
+ cmd_neighbors(kg, args.concept_id, args.relation, args.direction)
97
+ elif args.command == "paths":
98
+ cmd_paths(kg, args.source, args.target, args.max_hops)
99
+ elif args.command == "domain":
100
+ cmd_domain(kg, args.domain_tag)
101
+ else:
102
+ parser.print_help()
103
+
104
+
105
+ if __name__ == "__main__":
106
+ main()
@@ -0,0 +1,42 @@
1
+ """Hypothesis Engine CLI Reference - for agent use.
2
+
3
+ The executable CLI lives in core/knowledge_graph/phase3.py.
4
+ This file documents the usage patterns for agent reference.
5
+
6
+ Usage (run from project root):
7
+
8
+ # Batch generate hypotheses across the entire graph
9
+ python -m core.knowledge_graph.phase3 batch --output data/hypotheses.json
10
+
11
+ # Load and re-rank saved hypotheses
12
+ python -m core.knowledge_graph.phase3 rank --input data/hypotheses.json --top 20
13
+
14
+ # Interactive queries
15
+ python -m core.knowledge_graph.phase3 paths "hippocampus" "Alzheimer Disease"
16
+ python -m core.knowledge_graph.phase3 bridge "hippocampus" --target-domain disease
17
+ python -m core.knowledge_graph.phase3 contradictions --domain disease
18
+ python -m core.knowledge_graph.phase3 gaps --domain-a neuroanatomy --domain-b disease
19
+ python -m core.knowledge_graph.phase3 explore "hippocampus"
20
+ python -m core.knowledge_graph.phase3 stats
21
+
22
+ Programmatic usage:
23
+ from core.knowledge_graph import load_graph, HypothesisEngine
24
+
25
+ kg = load_graph()
26
+ engine = HypothesisEngine(kg)
27
+
28
+ # batch generate
29
+ hypotheses = engine.batch_generate()
30
+ engine.save_hypotheses(hypotheses, "data/hypotheses.json")
31
+
32
+ # load and rank
33
+ hypotheses = engine.load_hypotheses("data/hypotheses.json")
34
+ ranked = engine.rank_hypotheses(hypotheses, top_n=50)
35
+
36
+ # each hypothesis has 4 scores:
37
+ # confidence_score - evidence quality
38
+ # novelty_score - how unexpected
39
+ # evidence_score - statistical strength
40
+ # testability_score - can NeuroClaw execute this?
41
+ # composite_score - combined ranking
42
+ """