medsci-skills 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (702) hide show
  1. package/LICENSE +50 -0
  2. package/README.md +602 -0
  3. package/README_FIRST.md +27 -0
  4. package/bin/medsci-skills.js +159 -0
  5. package/installers/install-macos.command +19 -0
  6. package/installers/install-windows.cmd +26 -0
  7. package/installers/install-windows.ps1 +17 -0
  8. package/installers/install.py +218 -0
  9. package/metadata/skills_catalog.json +452 -0
  10. package/package.json +48 -0
  11. package/skills/academic-aio/SKILL.md +408 -0
  12. package/skills/academic-aio/references/case_studies/kjr_mllm_2025.md +82 -0
  13. package/skills/academic-aio/references/checklists/AIO_GENERAL.md +354 -0
  14. package/skills/academic-aio/references/journal_summarybox_templates.yaml +126 -0
  15. package/skills/academic-aio/references/oac_funding_checklist.yaml +129 -0
  16. package/skills/academic-aio/references/reporting_guideline_mapping.md +39 -0
  17. package/skills/academic-aio/references/schema_markup_templates/CodeRepository.jsonld +32 -0
  18. package/skills/academic-aio/references/schema_markup_templates/Dataset.jsonld +36 -0
  19. package/skills/academic-aio/references/schema_markup_templates/Person.jsonld +30 -0
  20. package/skills/academic-aio/references/schema_markup_templates/README.md +43 -0
  21. package/skills/academic-aio/references/schema_markup_templates/ScholarlyArticle.jsonld +55 -0
  22. package/skills/academic-aio/scripts/batch_metadata_audit.py +169 -0
  23. package/skills/academic-aio/scripts/validate_schema.py +118 -0
  24. package/skills/academic-aio/skill.yml +36 -0
  25. package/skills/academic-aio/templates/aio_audit_checklist.md.j2 +108 -0
  26. package/skills/add-journal/SKILL.md +482 -0
  27. package/skills/add-journal/skill.yml +33 -0
  28. package/skills/analyze-stats/SKILL.md +598 -0
  29. package/skills/analyze-stats/references/analysis_guides/missing_data.md +109 -0
  30. package/skills/analyze-stats/references/analysis_guides/nhis_icd10_mapping.md +247 -0
  31. package/skills/analyze-stats/references/analysis_guides/propensity_score.md +132 -0
  32. package/skills/analyze-stats/references/analysis_guides/regression.md +115 -0
  33. package/skills/analyze-stats/references/analysis_guides/repeated_measures.md +160 -0
  34. package/skills/analyze-stats/references/analysis_guides/survey_weighted.md +366 -0
  35. package/skills/analyze-stats/references/analysis_guides/test_selection.md +86 -0
  36. package/skills/analyze-stats/references/style/figure_style.mplstyle +69 -0
  37. package/skills/analyze-stats/references/style/theme_publication.R +147 -0
  38. package/skills/analyze-stats/references/table-standards/journal-profiles/ajr.yaml +51 -0
  39. package/skills/analyze-stats/references/table-standards/journal-profiles/european_radiology.yaml +55 -0
  40. package/skills/analyze-stats/references/table-standards/journal-profiles/jama.yaml +66 -0
  41. package/skills/analyze-stats/references/table-standards/journal-profiles/lancet.yaml +57 -0
  42. package/skills/analyze-stats/references/table-standards/journal-profiles/nejm.yaml +51 -0
  43. package/skills/analyze-stats/references/table-standards/journal-profiles/radiology.yaml +66 -0
  44. package/skills/analyze-stats/references/table-standards/table-standards.md +287 -0
  45. package/skills/analyze-stats/references/table-standards/table-types/diagnostic_accuracy.md +36 -0
  46. package/skills/analyze-stats/references/table-standards/table-types/meta_analysis.md +58 -0
  47. package/skills/analyze-stats/references/table-standards/table-types/model_comparison.md +36 -0
  48. package/skills/analyze-stats/references/table-standards/table-types/regression_results.md +50 -0
  49. package/skills/analyze-stats/references/table-standards/table-types/table1_demographics.md +51 -0
  50. package/skills/analyze-stats/references/table-standards/tool-comparison.md +79 -0
  51. package/skills/analyze-stats/references/templates/agreement_analysis.py +436 -0
  52. package/skills/analyze-stats/references/templates/dca_plot.R +237 -0
  53. package/skills/analyze-stats/references/templates/diagnostic_accuracy.py +401 -0
  54. package/skills/analyze-stats/references/templates/dta_meta_analysis.R +384 -0
  55. package/skills/analyze-stats/references/templates/forest_plot.py +412 -0
  56. package/skills/analyze-stats/references/templates/likert_summary.py +356 -0
  57. package/skills/analyze-stats/references/templates/meta_analysis.R +365 -0
  58. package/skills/analyze-stats/references/templates/propensity_score.py +478 -0
  59. package/skills/analyze-stats/references/templates/regression.py +425 -0
  60. package/skills/analyze-stats/references/templates/repeated_measures.py +434 -0
  61. package/skills/analyze-stats/references/templates/sample_size.R +382 -0
  62. package/skills/analyze-stats/references/templates/survey_weighted_analysis.py +411 -0
  63. package/skills/analyze-stats/references/templates/survival_analysis.py +325 -0
  64. package/skills/analyze-stats/references/templates/table1_demographics.py +287 -0
  65. package/skills/analyze-stats/scripts/check_generated_code.py +335 -0
  66. package/skills/analyze-stats/skill.yml +38 -0
  67. package/skills/analyze-stats/tests/fixtures/gen_bad.R +16 -0
  68. package/skills/analyze-stats/tests/fixtures/gen_bad.py +24 -0
  69. package/skills/analyze-stats/tests/fixtures/gen_clean.py +21 -0
  70. package/skills/analyze-stats/tests/test_generated_code.sh +59 -0
  71. package/skills/analyze-stats/tests/test_survival_template.sh +53 -0
  72. package/skills/author-strategy/SKILL.md +117 -0
  73. package/skills/author-strategy/analyze_patterns.py +303 -0
  74. package/skills/author-strategy/fetch_pubmed.py +374 -0
  75. package/skills/author-strategy/skill.yml +34 -0
  76. package/skills/batch-cohort/SKILL.md +223 -0
  77. package/skills/batch-cohort/references/base_template_knhanes.R +210 -0
  78. package/skills/batch-cohort/references/batch_template_generator.R +222 -0
  79. package/skills/batch-cohort/references/variable_coding_registry.md +136 -0
  80. package/skills/batch-cohort/skill.yml +35 -0
  81. package/skills/calc-sample-size/SKILL.md +491 -0
  82. package/skills/calc-sample-size/references/formulas.md +655 -0
  83. package/skills/calc-sample-size/references/observational_cohort.md +49 -0
  84. package/skills/calc-sample-size/skill.yml +51 -0
  85. package/skills/check-reporting/SKILL.md +534 -0
  86. package/skills/check-reporting/references/LICENSES.md +41 -0
  87. package/skills/check-reporting/references/checklists/AMSTAR2.md +54 -0
  88. package/skills/check-reporting/references/checklists/ARRIVE_2.md +234 -0
  89. package/skills/check-reporting/references/checklists/CARE.md +102 -0
  90. package/skills/check-reporting/references/checklists/CLAIM_2024.md +128 -0
  91. package/skills/check-reporting/references/checklists/CLEAR.md +113 -0
  92. package/skills/check-reporting/references/checklists/CONSORT.md +86 -0
  93. package/skills/check-reporting/references/checklists/COSMIN_RoB.md +136 -0
  94. package/skills/check-reporting/references/checklists/GRRAS.md +61 -0
  95. package/skills/check-reporting/references/checklists/MI_CLEAR_LLM.md +167 -0
  96. package/skills/check-reporting/references/checklists/MOOSE.md +85 -0
  97. package/skills/check-reporting/references/checklists/NOS.md +88 -0
  98. package/skills/check-reporting/references/checklists/PRISMA_2020.md +135 -0
  99. package/skills/check-reporting/references/checklists/PRISMA_DTA.md +36 -0
  100. package/skills/check-reporting/references/checklists/PRISMA_P.md +56 -0
  101. package/skills/check-reporting/references/checklists/PROBAST.md +75 -0
  102. package/skills/check-reporting/references/checklists/PROBAST_AI.md +130 -0
  103. package/skills/check-reporting/references/checklists/QUADAS2.md +77 -0
  104. package/skills/check-reporting/references/checklists/QUADAS_C.md +131 -0
  105. package/skills/check-reporting/references/checklists/ROBINS_E.md +179 -0
  106. package/skills/check-reporting/references/checklists/ROBINS_I.md +87 -0
  107. package/skills/check-reporting/references/checklists/ROBIS.md +114 -0
  108. package/skills/check-reporting/references/checklists/ROB_ME.md +126 -0
  109. package/skills/check-reporting/references/checklists/RoB2.md +79 -0
  110. package/skills/check-reporting/references/checklists/RoB_NMA.md +96 -0
  111. package/skills/check-reporting/references/checklists/SPIRIT.md +112 -0
  112. package/skills/check-reporting/references/checklists/SQUIRE_2.md +68 -0
  113. package/skills/check-reporting/references/checklists/STARD.md +129 -0
  114. package/skills/check-reporting/references/checklists/STARD_AI.md +211 -0
  115. package/skills/check-reporting/references/checklists/STROBE.md +80 -0
  116. package/skills/check-reporting/references/checklists/SWiM.md +33 -0
  117. package/skills/check-reporting/references/checklists/TRIPOD.md +157 -0
  118. package/skills/check-reporting/references/checklists/TRIPOD_AI.md +140 -0
  119. package/skills/check-reporting/references/step4c_registration_timing.md +93 -0
  120. package/skills/check-reporting/references/step4d_prisma_figure_audit.md +137 -0
  121. package/skills/check-reporting/scripts/check_checklist_exists.py +183 -0
  122. package/skills/check-reporting/scripts/check_checklist_version.py +168 -0
  123. package/skills/check-reporting/scripts/check_framework_naming.py +206 -0
  124. package/skills/check-reporting/scripts/check_prisma_figure.py +209 -0
  125. package/skills/check-reporting/scripts/prisma_cascade_check.py +274 -0
  126. package/skills/check-reporting/skill.yml +41 -0
  127. package/skills/check-reporting/tests/fixtures/framework_bad.md +8 -0
  128. package/skills/check-reporting/tests/fixtures/framework_clean.md +7 -0
  129. package/skills/check-reporting/tests/test_checklist_fail_fast.sh +77 -0
  130. package/skills/check-reporting/tests/test_checklist_version.sh +72 -0
  131. package/skills/check-reporting/tests/test_framework_naming.sh +45 -0
  132. package/skills/check-reporting/tests/test_prisma_cascade.sh +104 -0
  133. package/skills/clean-data/SKILL.md +180 -0
  134. package/skills/clean-data/references/cleaning_patterns.md +299 -0
  135. package/skills/clean-data/references/profiling_template.py +304 -0
  136. package/skills/clean-data/scripts/check_structural_zero.py +174 -0
  137. package/skills/clean-data/skill.yml +35 -0
  138. package/skills/clean-data/tests/fixtures/smoking.csv +8 -0
  139. package/skills/clean-data/tests/test_structural_zero.sh +49 -0
  140. package/skills/cross-national/SKILL.md +264 -0
  141. package/skills/cross-national/skill.yml +37 -0
  142. package/skills/define-variables/SKILL.md +146 -0
  143. package/skills/define-variables/references/common_definitions.md +190 -0
  144. package/skills/define-variables/skill.yml +34 -0
  145. package/skills/define-variables/templates/variable_operationalization.md +64 -0
  146. package/skills/deidentify/SKILL.md +203 -0
  147. package/skills/deidentify/deidentify.py +1224 -0
  148. package/skills/deidentify/locales/_template.json +45 -0
  149. package/skills/deidentify/locales/au.json +43 -0
  150. package/skills/deidentify/locales/ca.json +44 -0
  151. package/skills/deidentify/locales/cn.json +47 -0
  152. package/skills/deidentify/locales/de.json +48 -0
  153. package/skills/deidentify/locales/fr.json +48 -0
  154. package/skills/deidentify/locales/in.json +48 -0
  155. package/skills/deidentify/locales/jp.json +48 -0
  156. package/skills/deidentify/locales/kr.json +48 -0
  157. package/skills/deidentify/locales/uk.json +45 -0
  158. package/skills/deidentify/locales/us.json +43 -0
  159. package/skills/deidentify/references/date_shift_guide.md +82 -0
  160. package/skills/deidentify/references/hipaa_18_identifiers.md +48 -0
  161. package/skills/deidentify/references/korean_phi_patterns.md +135 -0
  162. package/skills/deidentify/skill.yml +43 -0
  163. package/skills/deidentify/tests/README.md +26 -0
  164. package/skills/deidentify/tests/test_clean.csv +16 -0
  165. package/skills/deidentify/tests/test_edge_cases.csv +11 -0
  166. package/skills/deidentify/tests/test_phi_korean.csv +11 -0
  167. package/skills/design-ai-benchmarking/SKILL.md +214 -0
  168. package/skills/design-ai-benchmarking/references/benchmark_export_schema.json +69 -0
  169. package/skills/design-ai-benchmarking/references/elicitation_rubric_template.md +37 -0
  170. package/skills/design-ai-benchmarking/skill.yml +38 -0
  171. package/skills/design-study/SKILL.md +298 -0
  172. package/skills/design-study/skill.yml +33 -0
  173. package/skills/fill-icmje-coi/SKILL.md +216 -0
  174. package/skills/fill-icmje-coi/scripts/fill_icmje_coi.py +140 -0
  175. package/skills/fill-icmje-coi/skill.yml +35 -0
  176. package/skills/fill-icmje-coi/templates/icmje_coi_seed_synthetic.docx +0 -0
  177. package/skills/fill-protocol/SKILL.md +248 -0
  178. package/skills/fill-protocol/examples/example_irb_template.yaml +53 -0
  179. package/skills/fill-protocol/references/best_practices.md +121 -0
  180. package/skills/fill-protocol/scripts/doc_to_docx.py +111 -0
  181. package/skills/fill-protocol/scripts/fill_form.py +611 -0
  182. package/skills/fill-protocol/scripts/inspect_template.py +61 -0
  183. package/skills/fill-protocol/setup.sh +162 -0
  184. package/skills/fill-protocol/skill.yml +37 -0
  185. package/skills/find-cohort-gap/SKILL.md +309 -0
  186. package/skills/find-cohort-gap/references/cohort_profile_template.md +93 -0
  187. package/skills/find-cohort-gap/references/onepager_template.md +84 -0
  188. package/skills/find-cohort-gap/references/pattern_scoring_rubric.md +169 -0
  189. package/skills/find-cohort-gap/references/saturation_query_templates.md +143 -0
  190. package/skills/find-cohort-gap/skill.yml +35 -0
  191. package/skills/find-journal/POLICY.md +87 -0
  192. package/skills/find-journal/SKILL.md +340 -0
  193. package/skills/find-journal/references/journal_profiles/AJNR.md +29 -0
  194. package/skills/find-journal/references/journal_profiles/AJR.md +30 -0
  195. package/skills/find-journal/references/journal_profiles/Abdominal_Radiology.md +30 -0
  196. package/skills/find-journal/references/journal_profiles/Academic_Radiology.md +30 -0
  197. package/skills/find-journal/references/journal_profiles/Annals_of_Internal_Medicine.md +33 -0
  198. package/skills/find-journal/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +28 -0
  199. package/skills/find-journal/references/journal_profiles/BMC_Medicine.md +31 -0
  200. package/skills/find-journal/references/journal_profiles/British_Journal_of_Radiology.md +39 -0
  201. package/skills/find-journal/references/journal_profiles/CVIR.md +30 -0
  202. package/skills/find-journal/references/journal_profiles/Chest.md +39 -0
  203. package/skills/find-journal/references/journal_profiles/Clinical_Radiology.md +30 -0
  204. package/skills/find-journal/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +32 -0
  205. package/skills/find-journal/references/journal_profiles/Diabetes_Metabolism_Journal.md +36 -0
  206. package/skills/find-journal/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +32 -0
  207. package/skills/find-journal/references/journal_profiles/Endocrinology_and_Metabolism.md +37 -0
  208. package/skills/find-journal/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +39 -0
  209. package/skills/find-journal/references/journal_profiles/European_Radiology.md +29 -0
  210. package/skills/find-journal/references/journal_profiles/Hepatology_Communications.md +40 -0
  211. package/skills/find-journal/references/journal_profiles/Hepatology_International.md +37 -0
  212. package/skills/find-journal/references/journal_profiles/IEEE_JBHI.md +28 -0
  213. package/skills/find-journal/references/journal_profiles/IEEE_TMI.md +28 -0
  214. package/skills/find-journal/references/journal_profiles/INSI.md +29 -0
  215. package/skills/find-journal/references/journal_profiles/Investigative_Radiology.md +25 -0
  216. package/skills/find-journal/references/journal_profiles/JACC_Advances.md +41 -0
  217. package/skills/find-journal/references/journal_profiles/JACC_Asia.md +30 -0
  218. package/skills/find-journal/references/journal_profiles/JACR.md +28 -0
  219. package/skills/find-journal/references/journal_profiles/JAMA.md +40 -0
  220. package/skills/find-journal/references/journal_profiles/JAMA_Network_Open.md +30 -0
  221. package/skills/find-journal/references/journal_profiles/JCSM.md +39 -0
  222. package/skills/find-journal/references/journal_profiles/JKMS.md +32 -0
  223. package/skills/find-journal/references/journal_profiles/JMIR.md +29 -0
  224. package/skills/find-journal/references/journal_profiles/JMIR_Medical_Education.md +29 -0
  225. package/skills/find-journal/references/journal_profiles/JNIS.md +35 -0
  226. package/skills/find-journal/references/journal_profiles/JVIR.md +31 -0
  227. package/skills/find-journal/references/journal_profiles/Journal_of_Biomedical_Informatics.md +29 -0
  228. package/skills/find-journal/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +40 -0
  229. package/skills/find-journal/references/journal_profiles/Journal_of_Magnetic_Resonance_Imaging.md +30 -0
  230. package/skills/find-journal/references/journal_profiles/Journal_of_Nuclear_Medicine.md +31 -0
  231. package/skills/find-journal/references/journal_profiles/Journal_of_Stroke.md +32 -0
  232. package/skills/find-journal/references/journal_profiles/KJR.md +38 -0
  233. package/skills/find-journal/references/journal_profiles/Korean_Circulation_Journal.md +38 -0
  234. package/skills/find-journal/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +36 -0
  235. package/skills/find-journal/references/journal_profiles/Lancet_Diabetes_and_Endocrinology.md +40 -0
  236. package/skills/find-journal/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +49 -0
  237. package/skills/find-journal/references/journal_profiles/Lancet_Infectious_Diseases.md +38 -0
  238. package/skills/find-journal/references/journal_profiles/Lancet_Neurology.md +39 -0
  239. package/skills/find-journal/references/journal_profiles/Lancet_Oncology.md +40 -0
  240. package/skills/find-journal/references/journal_profiles/Lancet_Psychiatry.md +38 -0
  241. package/skills/find-journal/references/journal_profiles/Lancet_Public_Health.md +30 -0
  242. package/skills/find-journal/references/journal_profiles/Lancet_Respiratory_Medicine.md +39 -0
  243. package/skills/find-journal/references/journal_profiles/Liver_International.md +33 -0
  244. package/skills/find-journal/references/journal_profiles/Medical_Image_Analysis.md +28 -0
  245. package/skills/find-journal/references/journal_profiles/NEJM.md +33 -0
  246. package/skills/find-journal/references/journal_profiles/Nature_Machine_Intelligence.md +31 -0
  247. package/skills/find-journal/references/journal_profiles/Nature_Medicine.md +39 -0
  248. package/skills/find-journal/references/journal_profiles/Neuroradiology.md +31 -0
  249. package/skills/find-journal/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +39 -0
  250. package/skills/find-journal/references/journal_profiles/PLOS_Medicine.md +32 -0
  251. package/skills/find-journal/references/journal_profiles/RYAI.md +28 -0
  252. package/skills/find-journal/references/journal_profiles/Radiology.md +29 -0
  253. package/skills/find-journal/references/journal_profiles/Skeletal_Radiology.md +31 -0
  254. package/skills/find-journal/references/journal_profiles/Stroke.md +37 -0
  255. package/skills/find-journal/references/journal_profiles/The_BMJ.md +31 -0
  256. package/skills/find-journal/references/journal_profiles/The_Lancet.md +31 -0
  257. package/skills/find-journal/references/journal_profiles/The_Lancet_Digital_Health.md +29 -0
  258. package/skills/find-journal/references/journal_profiles/World_Journal_of_Hepatology.md +53 -0
  259. package/skills/find-journal/references/journal_profiles/npj_Digital_Medicine.md +29 -0
  260. package/skills/find-journal/skill.yml +34 -0
  261. package/skills/fulltext-retrieval/SKILL.md +174 -0
  262. package/skills/fulltext-retrieval/fetch_oa.py +433 -0
  263. package/skills/fulltext-retrieval/pdf_to_md.py +160 -0
  264. package/skills/fulltext-retrieval/skill.yml +41 -0
  265. package/skills/generate-codebook/SKILL.md +155 -0
  266. package/skills/generate-codebook/references/codebook_schema.md +76 -0
  267. package/skills/generate-codebook/scripts/generate_codebook.py +278 -0
  268. package/skills/generate-codebook/skill.yml +35 -0
  269. package/skills/generate-codebook/tests/test_generate_codebook.sh +76 -0
  270. package/skills/grant-builder/SKILL.md +251 -0
  271. package/skills/grant-builder/skill.yml +34 -0
  272. package/skills/humanize/SKILL.md +251 -0
  273. package/skills/humanize/references/ai_patterns.md +571 -0
  274. package/skills/humanize/skill.yml +33 -0
  275. package/skills/intake-project/SKILL.md +264 -0
  276. package/skills/intake-project/skill.yml +34 -0
  277. package/skills/lit-sync/SKILL.md +448 -0
  278. package/skills/lit-sync/references/locale/ko/note_templates.md +110 -0
  279. package/skills/lit-sync/skill.yml +52 -0
  280. package/skills/lit-sync/tests/test_poll_logic.sh +92 -0
  281. package/skills/ma-scout/SKILL.md +640 -0
  282. package/skills/ma-scout/references/project_readme_template.md +95 -0
  283. package/skills/ma-scout/references/project_readme_template_ko.md +82 -0
  284. package/skills/ma-scout/skill.yml +33 -0
  285. package/skills/make-figures/SKILL.md +957 -0
  286. package/skills/make-figures/references/critic_rubrics/data_plot.md +166 -0
  287. package/skills/make-figures/references/critic_rubrics/flow_diagram.md +169 -0
  288. package/skills/make-figures/references/design_principles.md +181 -0
  289. package/skills/make-figures/references/exemplar_diagrams/README.md +65 -0
  290. package/skills/make-figures/references/exemplar_diagrams/consort/README.md +15 -0
  291. package/skills/make-figures/references/exemplar_diagrams/consort/template_input.yaml +37 -0
  292. package/skills/make-figures/references/exemplar_diagrams/consort/template_output.pdf +0 -0
  293. package/skills/make-figures/references/exemplar_diagrams/consort/template_output.png +0 -0
  294. package/skills/make-figures/references/exemplar_diagrams/consort/template_output_600.png +0 -0
  295. package/skills/make-figures/references/exemplar_diagrams/other/other_02.meta.yaml +4 -0
  296. package/skills/make-figures/references/exemplar_diagrams/other/other_02.png +0 -0
  297. package/skills/make-figures/references/exemplar_diagrams/other/other_02_why.md +13 -0
  298. package/skills/make-figures/references/exemplar_diagrams/pipeline/README.md +15 -0
  299. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.meta.yaml +4 -0
  300. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.png +0 -0
  301. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01_why.md +13 -0
  302. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.meta.yaml +4 -0
  303. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.png +0 -0
  304. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03_why.md +13 -0
  305. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.meta.yaml +4 -0
  306. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.png +0 -0
  307. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04_why.md +13 -0
  308. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.meta.yaml +4 -0
  309. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.png +0 -0
  310. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05_why.md +13 -0
  311. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.meta.yaml +4 -0
  312. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.png +0 -0
  313. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06_why.md +13 -0
  314. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.meta.yaml +4 -0
  315. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.png +0 -0
  316. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07_why.md +13 -0
  317. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.meta.yaml +4 -0
  318. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.png +0 -0
  319. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08_why.md +13 -0
  320. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.meta.yaml +4 -0
  321. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.png +0 -0
  322. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09_why.md +13 -0
  323. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.meta.yaml +4 -0
  324. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.png +0 -0
  325. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10_why.md +13 -0
  326. package/skills/make-figures/references/exemplar_diagrams/prisma/README.md +15 -0
  327. package/skills/make-figures/references/exemplar_diagrams/prisma/template_input.yaml +47 -0
  328. package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.pdf +0 -0
  329. package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.png +0 -0
  330. package/skills/make-figures/references/exemplar_diagrams/prisma/template_output_600.png +0 -0
  331. package/skills/make-figures/references/exemplar_diagrams/stard/README.md +15 -0
  332. package/skills/make-figures/references/exemplar_diagrams/stard/template_input.yaml +40 -0
  333. package/skills/make-figures/references/exemplar_diagrams/stard/template_output.pdf +0 -0
  334. package/skills/make-figures/references/exemplar_diagrams/stard/template_output.png +0 -0
  335. package/skills/make-figures/references/exemplar_diagrams/stard/template_output_600.png +0 -0
  336. package/skills/make-figures/references/exemplar_diagrams/strobe/template_input.yaml +43 -0
  337. package/skills/make-figures/references/exemplar_diagrams/strobe/template_input_pptx.yaml +43 -0
  338. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pdf +0 -0
  339. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.png +0 -0
  340. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pptx +0 -0
  341. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output_600.png +0 -0
  342. package/skills/make-figures/references/figure_specs.md +291 -0
  343. package/skills/make-figures/references/flow_diagram_lessons.md +164 -0
  344. package/skills/make-figures/references/jacc_central_illustration_principles.md +91 -0
  345. package/skills/make-figures/references/medical_illustration_sources.md +98 -0
  346. package/skills/make-figures/references/pipeline_concepts_medical_ai.md +240 -0
  347. package/skills/make-figures/references/reporting_guideline_figure_map.md +104 -0
  348. package/skills/make-figures/references/visual_abstract_templates/european_radiology.pptx +0 -0
  349. package/skills/make-figures/references/visual_abstract_templates/jacc_central_illustration.pptx +0 -0
  350. package/skills/make-figures/references/visual_abstract_templates/medsci_default.pptx +0 -0
  351. package/skills/make-figures/references/visual_abstract_templates/template_guide.md +114 -0
  352. package/skills/make-figures/scripts/build_jacc_template.py +77 -0
  353. package/skills/make-figures/scripts/build_prisma2020_template.py +371 -0
  354. package/skills/make-figures/scripts/build_strobe_template.py +351 -0
  355. package/skills/make-figures/scripts/critic_figure.py +264 -0
  356. package/skills/make-figures/scripts/derive_figure_legend_counts.py +138 -0
  357. package/skills/make-figures/scripts/extract_exemplar_from_pdf.py +186 -0
  358. package/skills/make-figures/scripts/fetch_official_templates.sh +88 -0
  359. package/skills/make-figures/scripts/fill_prisma_template.py +142 -0
  360. package/skills/make-figures/scripts/generate_flow_diagram.R +133 -0
  361. package/skills/make-figures/scripts/generate_image.py +99 -0
  362. package/skills/make-figures/scripts/generate_visual_abstract.py +438 -0
  363. package/skills/make-figures/scripts/validate_pptx_mac_compat.py +233 -0
  364. package/skills/make-figures/skill.yml +52 -0
  365. package/skills/make-figures/templates/official/NOTES.md +62 -0
  366. package/skills/make-figures/templates/official/consort2010/CONSORT_2025_editable_checklist.docx +0 -0
  367. package/skills/make-figures/templates/official/consort2010/CONSORT_2025_flow_diagram.docx +0 -0
  368. package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v1.pptx +0 -0
  369. package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v2.pptx +0 -0
  370. package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_updated_v2.pptx +0 -0
  371. package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_editable_checklist.docx +0 -0
  372. package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_participant_timeline.docx +0 -0
  373. package/skills/make-figures/templates/official/stard2015/STARD_2015_checklist.docx +0 -0
  374. package/skills/make-figures/templates/official/stard2015/STARD_2015_flow_diagram.pdf +0 -0
  375. package/skills/make-figures/tests/fixtures/figure1_flow.yaml +8 -0
  376. package/skills/make-figures/tests/fixtures/manuscript_ok.md +9 -0
  377. package/skills/make-figures/tests/fixtures/manuscript_stale.md +4 -0
  378. package/skills/make-figures/tests/test_legend_reconcile.sh +36 -0
  379. package/skills/manage-project/SKILL.md +358 -0
  380. package/skills/manage-project/references/pre_submission_checklist.md +53 -0
  381. package/skills/manage-project/references/project_state_template.json +37 -0
  382. package/skills/manage-project/references/scaffold_templates.md +118 -0
  383. package/skills/manage-project/references/status_output_format.md +44 -0
  384. package/skills/manage-project/references/timeline_example.md +20 -0
  385. package/skills/manage-project/skill.yml +36 -0
  386. package/skills/manage-project/templates/SSOT.yaml.template +41 -0
  387. package/skills/manage-refs/LICENSE.zotero-mcp +21 -0
  388. package/skills/manage-refs/NOTICE.md +29 -0
  389. package/skills/manage-refs/SKILL.md +289 -0
  390. package/skills/manage-refs/citation_styles/README.md +40 -0
  391. package/skills/manage-refs/citation_styles/american-journal-of-roentgenology.csl +211 -0
  392. package/skills/manage-refs/citation_styles/cardiovascular-and-interventional-radiology.csl +19 -0
  393. package/skills/manage-refs/citation_styles/european-radiology.csl +19 -0
  394. package/skills/manage-refs/citation_styles/journal-of-cachexia-sarcopenia-and-muscle.csl +150 -0
  395. package/skills/manage-refs/citation_styles/journal-of-korean-medical-science-strict.csl +533 -0
  396. package/skills/manage-refs/citation_styles/journal-of-korean-medical-science.csl +16 -0
  397. package/skills/manage-refs/citation_styles/korean-journal-of-radiology.csl +155 -0
  398. package/skills/manage-refs/citation_styles/nature.csl +189 -0
  399. package/skills/manage-refs/citation_styles/nlm-citation-sequence.csl +535 -0
  400. package/skills/manage-refs/citation_styles/radiology.csl +228 -0
  401. package/skills/manage-refs/citation_styles/springer-basic-brackets.csl +187 -0
  402. package/skills/manage-refs/citation_styles/springer-vancouver-brackets.csl +276 -0
  403. package/skills/manage-refs/citation_styles/vancouver-superscript.csl +536 -0
  404. package/skills/manage-refs/citation_styles/vancouver.csl +535 -0
  405. package/skills/manage-refs/references/REFERENCE_STYLE_SPECS.md +59 -0
  406. package/skills/manage-refs/references/check_xref_symptoms.md +35 -0
  407. package/skills/manage-refs/scripts/_vendor_citation_writer.py +600 -0
  408. package/skills/manage-refs/scripts/check_citation_keys.py +112 -0
  409. package/skills/manage-refs/scripts/check_csl_render.py +102 -0
  410. package/skills/manage-refs/scripts/check_xref.py +633 -0
  411. package/skills/manage-refs/scripts/fill_journal_abbrev.py +104 -0
  412. package/skills/manage-refs/scripts/inject_zotero_cwyw.py +133 -0
  413. package/skills/manage-refs/scripts/md_marker_convert.py +193 -0
  414. package/skills/manage-refs/scripts/pre_submission_gate.sh +238 -0
  415. package/skills/manage-refs/scripts/render_pandoc.sh +88 -0
  416. package/skills/manage-refs/skill.yml +70 -0
  417. package/skills/manage-refs/tests/fixtures/pre_submission_gate/README.md +32 -0
  418. package/skills/manage-refs/tests/fixtures/pre_submission_gate/manuscript.md +10 -0
  419. package/skills/manage-refs/tests/fixtures/pre_submission_gate/refs.bib +34 -0
  420. package/skills/manage-refs/tests/fixtures/pre_submission_gate/run.sh +117 -0
  421. package/skills/manage-refs/tests/test_vN_docx_check.sh +145 -0
  422. package/skills/meta-analysis/SKILL.md +739 -0
  423. package/skills/meta-analysis/references/LICENSES.md +21 -0
  424. package/skills/meta-analysis/references/PROSPERO_template.md +221 -0
  425. package/skills/meta-analysis/references/ai_pre_screening_template.py +245 -0
  426. package/skills/meta-analysis/references/checklists/JBI_Case_Series.md +45 -0
  427. package/skills/meta-analysis/references/checklists/NOS.md +88 -0
  428. package/skills/meta-analysis/references/checklists/PRISMA_DTA.md +36 -0
  429. package/skills/meta-analysis/references/checklists/PROBAST.md +75 -0
  430. package/skills/meta-analysis/references/checklists/QUADAS2.md +77 -0
  431. package/skills/meta-analysis/references/checklists/ROBINS_I.md +87 -0
  432. package/skills/meta-analysis/references/checklists/RoB2.md +79 -0
  433. package/skills/meta-analysis/references/data_integrity_checklist.md +57 -0
  434. package/skills/meta-analysis/references/icmje_coi_guide.md +181 -0
  435. package/skills/meta-analysis/references/phase10_recovery.md +136 -0
  436. package/skills/meta-analysis/references/phase4_km_composite.md +58 -0
  437. package/skills/meta-analysis/references/phase6_statistical_synthesis.md +148 -0
  438. package/skills/meta-analysis/references/phase9_circulation.md +84 -0
  439. package/skills/meta-analysis/references/post_submission_release_ops.md +41 -0
  440. package/skills/meta-analysis/references/r_templates.md +132 -0
  441. package/skills/meta-analysis/references/review_orchestration.md +40 -0
  442. package/skills/meta-analysis/references/submission_package_drift.md +71 -0
  443. package/skills/meta-analysis/scripts/check_pool_consistency.py +201 -0
  444. package/skills/meta-analysis/scripts/cohort_overlap_check.py +242 -0
  445. package/skills/meta-analysis/scripts/dta_extraction_qc.py +137 -0
  446. package/skills/meta-analysis/scripts/screening_reconcile.py +160 -0
  447. package/skills/meta-analysis/skill.yml +47 -0
  448. package/skills/meta-analysis/templates/FINAL_POOL_LOCK.yaml.template +70 -0
  449. package/skills/meta-analysis/templates/extraction_form_v2.md +129 -0
  450. package/skills/meta-analysis/templates/supplementary_8file_checklist.md +94 -0
  451. package/skills/meta-analysis/tests/test_pool_consistency.sh +123 -0
  452. package/skills/orchestrate/SKILL.md +501 -0
  453. package/skills/orchestrate/references/dialogue_nodes.md +196 -0
  454. package/skills/orchestrate/references/report_template.md +109 -0
  455. package/skills/orchestrate/references/report_template_ko.md +88 -0
  456. package/skills/orchestrate/skill.yml +44 -0
  457. package/skills/peer-review/SKILL.md +381 -0
  458. package/skills/peer-review/references/aczel_2021_reviewer2_patterns.md +88 -0
  459. package/skills/peer-review/references/domain-probes/ai_overclaiming.md +47 -0
  460. package/skills/peer-review/references/domain-probes/narrative_review.md +44 -0
  461. package/skills/peer-review/references/domain-probes/observational_confounding.md +48 -0
  462. package/skills/peer-review/references/domain-probes/radiomics.md +38 -0
  463. package/skills/peer-review/references/domain-probes/sr_ma.md +87 -0
  464. package/skills/peer-review/references/domain-probes/survival_prognostic.md +68 -0
  465. package/skills/peer-review/references/exemplar_reviews/README.md +43 -0
  466. package/skills/peer-review/references/exemplar_reviews/ai_overclaiming.md +47 -0
  467. package/skills/peer-review/references/exemplar_reviews/calibration_missing.md +44 -0
  468. package/skills/peer-review/references/exemplar_reviews/data_leakage.md +48 -0
  469. package/skills/peer-review/references/exemplar_reviews/reference_standard_validity.md +45 -0
  470. package/skills/peer-review/references/narrative_review_audit.md +67 -0
  471. package/skills/peer-review/references/reviewer_calibration/README.md +34 -0
  472. package/skills/peer-review/references/reviewer_calibration/compliance_floor.md +52 -0
  473. package/skills/peer-review/references/reviewer_profiles/AJR.md +82 -0
  474. package/skills/peer-review/references/reviewer_profiles/EURE.md +64 -0
  475. package/skills/peer-review/references/reviewer_profiles/INSI.md +57 -0
  476. package/skills/peer-review/references/reviewer_profiles/KJR.md +100 -0
  477. package/skills/peer-review/references/reviewer_profiles/README.md +32 -0
  478. package/skills/peer-review/references/reviewer_profiles/RYAI.md +86 -0
  479. package/skills/peer-review/skill.yml +39 -0
  480. package/skills/present-paper/SKILL.md +675 -0
  481. package/skills/present-paper/references/critic_rubrics/slide.md +155 -0
  482. package/skills/present-paper/references/generate_pptx_templates.py +604 -0
  483. package/skills/present-paper/references/medical_presentation_templates.md +277 -0
  484. package/skills/present-paper/references/slide_design_principles.md +202 -0
  485. package/skills/present-paper/references/slide_visual_styles/nature_lancet.md +168 -0
  486. package/skills/present-paper/references/workflow-checklist.md +109 -0
  487. package/skills/present-paper/scripts/extract_pdf_figures.py +243 -0
  488. package/skills/present-paper/scripts/inject_pronunciation_notes.py +178 -0
  489. package/skills/present-paper/scripts/inject_speaker_notes.py +133 -0
  490. package/skills/present-paper/scripts/strip_notes_for_sharing.py +140 -0
  491. package/skills/present-paper/scripts/trim_caption.py +271 -0
  492. package/skills/present-paper/skill.yml +41 -0
  493. package/skills/present-paper/templates/build_pptx_nature_lancet.py +688 -0
  494. package/skills/publish-skill/SKILL.md +370 -0
  495. package/skills/publish-skill/references/license-compatibility-matrix.md +132 -0
  496. package/skills/publish-skill/references/pii-patterns.md +130 -0
  497. package/skills/publish-skill/scripts/audit_skill.sh +278 -0
  498. package/skills/publish-skill/skill.yml +35 -0
  499. package/skills/render-pdf-doc/SKILL.md +146 -0
  500. package/skills/render-pdf-doc/references/known_pitfalls.md +53 -0
  501. package/skills/render-pdf-doc/references/pandoc_korean_cheatsheet.md +77 -0
  502. package/skills/render-pdf-doc/scripts/check_deps.sh +42 -0
  503. package/skills/render-pdf-doc/scripts/infer_colwidths.py +164 -0
  504. package/skills/render-pdf-doc/scripts/render_pdf.sh +98 -0
  505. package/skills/render-pdf-doc/skill.yml +57 -0
  506. package/skills/render-pdf-doc/templates/anchor-doc.md +27 -0
  507. package/skills/render-pdf-doc/templates/anchor-doc_ko.md +25 -0
  508. package/skills/render-pdf-doc/templates/briefing-handout.md +33 -0
  509. package/skills/render-pdf-doc/templates/briefing-handout_ko.md +31 -0
  510. package/skills/render-pdf-doc/templates/proposal-cover.md +33 -0
  511. package/skills/render-pdf-doc/templates/proposal-cover_ko.md +31 -0
  512. package/skills/render-pdf-doc/templates/reference-table.md +22 -0
  513. package/skills/render-pdf-doc/templates/reference-table_ko.md +20 -0
  514. package/skills/replicate-study/SKILL.md +150 -0
  515. package/skills/replicate-study/references/harmonization_3country.csv +47 -0
  516. package/skills/replicate-study/references/harmonization_knhanes_nhanes.csv +68 -0
  517. package/skills/replicate-study/references/methodology_extraction_template.md +134 -0
  518. package/skills/replicate-study/skill.yml +37 -0
  519. package/skills/review-paper/SKILL.md +104 -0
  520. package/skills/review-paper/references/macro_skeleton.md +6 -0
  521. package/skills/review-paper/skill.yml +25 -0
  522. package/skills/revise/SKILL.md +515 -0
  523. package/skills/revise/references/r2r_voice.md +346 -0
  524. package/skills/revise/skill.yml +43 -0
  525. package/skills/search-lit/SKILL.md +443 -0
  526. package/skills/search-lit/references/parse_pubmed.py +326 -0
  527. package/skills/search-lit/references/pubmed_eutils.sh +111 -0
  528. package/skills/search-lit/skill.yml +46 -0
  529. package/skills/self-review/SKILL.md +1045 -0
  530. package/skills/self-review/references/domain-probes/ai_overclaiming.md +47 -0
  531. package/skills/self-review/references/domain-probes/narrative_review.md +44 -0
  532. package/skills/self-review/references/domain-probes/observational_confounding.md +48 -0
  533. package/skills/self-review/references/domain-probes/radiomics.md +38 -0
  534. package/skills/self-review/references/domain-probes/sr_ma.md +87 -0
  535. package/skills/self-review/references/domain-probes/survival_prognostic.md +68 -0
  536. package/skills/self-review/references/exemplar_findings/README.md +43 -0
  537. package/skills/self-review/references/exemplar_findings/cohort_arithmetic_mismatch.md +35 -0
  538. package/skills/self-review/references/exemplar_findings/estimand_drift_posthoc_primary.md +39 -0
  539. package/skills/self-review/references/exemplar_findings/scope_overreach_cross_sectional.md +35 -0
  540. package/skills/self-review/references/exemplar_findings/unadjusted_confounder.md +36 -0
  541. package/skills/self-review/references/panel_review_template.md +177 -0
  542. package/skills/self-review/scripts/check_artifact_coverage.py +301 -0
  543. package/skills/self-review/scripts/check_claim_artifact.py +248 -0
  544. package/skills/self-review/scripts/check_classical_style.py +185 -0
  545. package/skills/self-review/scripts/check_cohort_arithmetic.py +481 -0
  546. package/skills/self-review/scripts/check_confounding_completeness.py +287 -0
  547. package/skills/self-review/scripts/check_panel_diversity.py +336 -0
  548. package/skills/self-review/scripts/check_reference_adequacy.py +392 -0
  549. package/skills/self-review/scripts/check_reviewer_team_consistency.py +412 -0
  550. package/skills/self-review/scripts/check_scope_coherence.py +177 -0
  551. package/skills/self-review/skill.yml +47 -0
  552. package/skills/self-review/tests/fixtures/claim_manuscript.md +17 -0
  553. package/skills/self-review/tests/fixtures/claim_prereg.md +6 -0
  554. package/skills/self-review/tests/fixtures/cohort_bad.md +21 -0
  555. package/skills/self-review/tests/fixtures/cohort_clean.md +21 -0
  556. package/skills/self-review/tests/fixtures/cohort_partition.csv +5 -0
  557. package/skills/self-review/tests/fixtures/coverage_analysis/31_delong_nested_added_value.csv +3 -0
  558. package/skills/self-review/tests/fixtures/coverage_analysis/table1_demographics.csv +3 -0
  559. package/skills/self-review/tests/fixtures/coverage_clean.md +13 -0
  560. package/skills/self-review/tests/fixtures/coverage_manuscript.md +11 -0
  561. package/skills/self-review/tests/fixtures/panel_collapse.json +27 -0
  562. package/skills/self-review/tests/fixtures/panel_good.json +32 -0
  563. package/skills/self-review/tests/fixtures/panel_monoculture.json +32 -0
  564. package/skills/self-review/tests/fixtures/refadeq_letter.md +13 -0
  565. package/skills/self-review/tests/fixtures/refadeq_original_fixed.md +42 -0
  566. package/skills/self-review/tests/fixtures/refadeq_original_uncited.md +40 -0
  567. package/skills/self-review/tests/fixtures/scope_bad.md +9 -0
  568. package/skills/self-review/tests/fixtures/scope_clean.md +8 -0
  569. package/skills/self-review/tests/fixtures/scope_surrogate.md +8 -0
  570. package/skills/self-review/tests/fixtures/style_bad.md +13 -0
  571. package/skills/self-review/tests/fixtures/style_clean.md +11 -0
  572. package/skills/self-review/tests/fixtures/table1_by_exposure.csv +11 -0
  573. package/skills/self-review/tests/test_artifact_coverage.sh +44 -0
  574. package/skills/self-review/tests/test_claim_artifact.sh +50 -0
  575. package/skills/self-review/tests/test_classical_style.sh +44 -0
  576. package/skills/self-review/tests/test_cohort_arithmetic.sh +49 -0
  577. package/skills/self-review/tests/test_confounding_completeness.sh +66 -0
  578. package/skills/self-review/tests/test_panel_diversity.sh +55 -0
  579. package/skills/self-review/tests/test_panel_mode.sh +69 -0
  580. package/skills/self-review/tests/test_reference_adequacy.sh +68 -0
  581. package/skills/self-review/tests/test_reviewer_team_consistency.sh +138 -0
  582. package/skills/self-review/tests/test_scope_coherence.sh +46 -0
  583. package/skills/setup-medsci/SKILL.md +110 -0
  584. package/skills/setup-medsci/references/setup-checklist.md +51 -0
  585. package/skills/setup-medsci/skill.yml +30 -0
  586. package/skills/sync-submission/SKILL.md +382 -0
  587. package/skills/sync-submission/scripts/author_registry_example.yaml +36 -0
  588. package/skills/sync-submission/scripts/blind_sweep.py +203 -0
  589. package/skills/sync-submission/scripts/check_asset_anonymization.py +300 -0
  590. package/skills/sync-submission/scripts/check_cross_artifact_stale.py +211 -0
  591. package/skills/sync-submission/scripts/cover_letter_drift_check.py +451 -0
  592. package/skills/sync-submission/scripts/cross_document_n_check.py +486 -0
  593. package/skills/sync-submission/scripts/detect_copy_divergence.py +136 -0
  594. package/skills/sync-submission/scripts/preflight_gate.py +458 -0
  595. package/skills/sync-submission/scripts/scope_drift_check.py +362 -0
  596. package/skills/sync-submission/scripts/sync_submission.py +169 -0
  597. package/skills/sync-submission/skill.yml +43 -0
  598. package/skills/sync-submission/tests/fixtures/copy_ok.md +5 -0
  599. package/skills/sync-submission/tests/fixtures/copy_stale.md +5 -0
  600. package/skills/sync-submission/tests/fixtures/ssot.md +5 -0
  601. package/skills/sync-submission/tests/test_asset_anonymization.sh +99 -0
  602. package/skills/sync-submission/tests/test_copy_divergence.sh +44 -0
  603. package/skills/sync-submission/tests/test_cross_artifact_stale.sh +80 -0
  604. package/skills/sync-submission/tests/test_cross_document_n.sh +132 -0
  605. package/skills/sync-submission/tests/test_preflight_gate.sh +112 -0
  606. package/skills/sync-submission/tests/test_scope_drift.sh +122 -0
  607. package/skills/sync-submission/tests/test_vN_docx_assertion.sh +51 -0
  608. package/skills/verify-refs/SKILL.md +177 -0
  609. package/skills/verify-refs/references/manual_checkpoint_guide.md +100 -0
  610. package/skills/verify-refs/scripts/verify_cli.sh +62 -0
  611. package/skills/verify-refs/scripts/verify_refs.py +782 -0
  612. package/skills/verify-refs/skill.yml +44 -0
  613. package/skills/verify-refs/tests/fixtures/pagination_placeholder.bib +17 -0
  614. package/skills/verify-refs/tests/test_pagination_placeholder.sh +42 -0
  615. package/skills/version-dataset/SKILL.md +143 -0
  616. package/skills/version-dataset/references/manifest_schema.md +72 -0
  617. package/skills/version-dataset/scripts/version_dataset.py +242 -0
  618. package/skills/version-dataset/skill.yml +35 -0
  619. package/skills/version-dataset/tests/test_version_dataset.sh +52 -0
  620. package/skills/write-paper/SKILL.md +1148 -0
  621. package/skills/write-paper/references/exemplar_methods/README.md +38 -0
  622. package/skills/write-paper/references/exemplar_methods/ai_validation_tripod_claim.md +47 -0
  623. package/skills/write-paper/references/exemplar_methods/diagnostic_accuracy_stard.md +50 -0
  624. package/skills/write-paper/references/exemplar_methods/observational_cohort_strobe.md +43 -0
  625. package/skills/write-paper/references/journal_profiles/AJNR.md +185 -0
  626. package/skills/write-paper/references/journal_profiles/AJR.md +149 -0
  627. package/skills/write-paper/references/journal_profiles/Abdominal_Radiology.md +139 -0
  628. package/skills/write-paper/references/journal_profiles/Academic_Radiology.md +90 -0
  629. package/skills/write-paper/references/journal_profiles/Annals_of_Internal_Medicine.md +150 -0
  630. package/skills/write-paper/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +82 -0
  631. package/skills/write-paper/references/journal_profiles/British_Journal_of_Radiology.md +161 -0
  632. package/skills/write-paper/references/journal_profiles/CVIR.md +157 -0
  633. package/skills/write-paper/references/journal_profiles/Chest.md +270 -0
  634. package/skills/write-paper/references/journal_profiles/Clinical_Radiology.md +160 -0
  635. package/skills/write-paper/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +147 -0
  636. package/skills/write-paper/references/journal_profiles/Diabetes_Metabolism_Journal.md +163 -0
  637. package/skills/write-paper/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +216 -0
  638. package/skills/write-paper/references/journal_profiles/Endocrinology_and_Metabolism.md +167 -0
  639. package/skills/write-paper/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +192 -0
  640. package/skills/write-paper/references/journal_profiles/European_Radiology.md +159 -0
  641. package/skills/write-paper/references/journal_profiles/Hepatology_Communications.md +110 -0
  642. package/skills/write-paper/references/journal_profiles/Hepatology_International.md +106 -0
  643. package/skills/write-paper/references/journal_profiles/IEEE_TMI.md +180 -0
  644. package/skills/write-paper/references/journal_profiles/INSI.md +163 -0
  645. package/skills/write-paper/references/journal_profiles/Investigative_Radiology.md +86 -0
  646. package/skills/write-paper/references/journal_profiles/JACC_Advances.md +197 -0
  647. package/skills/write-paper/references/journal_profiles/JACC_Asia.md +168 -0
  648. package/skills/write-paper/references/journal_profiles/JACR.md +87 -0
  649. package/skills/write-paper/references/journal_profiles/JAMA.md +188 -0
  650. package/skills/write-paper/references/journal_profiles/JAMA_Network_Open.md +170 -0
  651. package/skills/write-paper/references/journal_profiles/JCSM.md +266 -0
  652. package/skills/write-paper/references/journal_profiles/JKMS.md +201 -0
  653. package/skills/write-paper/references/journal_profiles/JMIR.md +88 -0
  654. package/skills/write-paper/references/journal_profiles/JMIR_Medical_Education.md +86 -0
  655. package/skills/write-paper/references/journal_profiles/JNIS.md +227 -0
  656. package/skills/write-paper/references/journal_profiles/JVIR.md +158 -0
  657. package/skills/write-paper/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +191 -0
  658. package/skills/write-paper/references/journal_profiles/Journal_of_Stroke.md +176 -0
  659. package/skills/write-paper/references/journal_profiles/KJR.md +185 -0
  660. package/skills/write-paper/references/journal_profiles/Korean_Circulation_Journal.md +184 -0
  661. package/skills/write-paper/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +178 -0
  662. package/skills/write-paper/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +127 -0
  663. package/skills/write-paper/references/journal_profiles/Liver_International.md +165 -0
  664. package/skills/write-paper/references/journal_profiles/Medical_Image_Analysis.md +147 -0
  665. package/skills/write-paper/references/journal_profiles/NEJM.md +147 -0
  666. package/skills/write-paper/references/journal_profiles/Nature_Medicine.md +181 -0
  667. package/skills/write-paper/references/journal_profiles/Neuroradiology.md +151 -0
  668. package/skills/write-paper/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +184 -0
  669. package/skills/write-paper/references/journal_profiles/PLOS_Medicine.md +166 -0
  670. package/skills/write-paper/references/journal_profiles/RYAI.md +124 -0
  671. package/skills/write-paper/references/journal_profiles/Radiology.md +173 -0
  672. package/skills/write-paper/references/journal_profiles/Skeletal_Radiology.md +135 -0
  673. package/skills/write-paper/references/journal_profiles/Stroke.md +210 -0
  674. package/skills/write-paper/references/journal_profiles/The_BMJ.md +121 -0
  675. package/skills/write-paper/references/journal_profiles/The_Lancet.md +112 -0
  676. package/skills/write-paper/references/journal_profiles/The_Lancet_Digital_Health.md +104 -0
  677. package/skills/write-paper/references/journal_profiles/World_Journal_of_Hepatology.md +106 -0
  678. package/skills/write-paper/references/journal_profiles/npj_Digital_Medicine.md +93 -0
  679. package/skills/write-paper/references/paper_types/ai_validation.md +270 -0
  680. package/skills/write-paper/references/paper_types/animal_study.md +194 -0
  681. package/skills/write-paper/references/paper_types/case_report.md +237 -0
  682. package/skills/write-paper/references/paper_types/cross_national.md +328 -0
  683. package/skills/write-paper/references/paper_types/letter.md +127 -0
  684. package/skills/write-paper/references/paper_types/meta_analysis.md +181 -0
  685. package/skills/write-paper/references/paper_types/nhis_cohort.md +297 -0
  686. package/skills/write-paper/references/paper_types/original_article.md +221 -0
  687. package/skills/write-paper/references/paper_types/technical_note.md +131 -0
  688. package/skills/write-paper/references/section_guides/discussion.md +155 -0
  689. package/skills/write-paper/references/section_guides/introduction.md +108 -0
  690. package/skills/write-paper/references/section_guides/methods.md +144 -0
  691. package/skills/write-paper/references/section_guides/results.md +113 -0
  692. package/skills/write-paper/references/section_guides/step7_1_classical_qc.md +67 -0
  693. package/skills/write-paper/references/section_guides/step7_4a_audit_recovery.md +74 -0
  694. package/skills/write-paper/references/section_guides/title_abstract.md +123 -0
  695. package/skills/write-paper/references/section_templates/methods_statistical.md +147 -0
  696. package/skills/write-paper/scripts/check_placeholders.py +182 -0
  697. package/skills/write-paper/skill.yml +48 -0
  698. package/skills/write-paper/tests/test_placeholders.sh +107 -0
  699. package/skills/write-protocol/SKILL.md +243 -0
  700. package/skills/write-protocol/references/ethics_checklist.md +150 -0
  701. package/skills/write-protocol/references/protocol_template.md +304 -0
  702. package/skills/write-protocol/skill.yml +34 -0
@@ -0,0 +1,299 @@
1
+ # Common Clinical Data Cleaning Patterns
2
+
3
+ Reference document for the clean-data skill. Covers recurring data quality issues
4
+ in electronic health records, registries, and research databases.
5
+
6
+ ---
7
+
8
+ ## 1. Missing Data Patterns
9
+
10
+ ### Classification
11
+
12
+ - **MCAR (Missing Completely At Random)**: Missingness is unrelated to any variable.
13
+ Example: random equipment failure during lab measurement.
14
+ Test: Little's MCAR test (chi-square). If p > 0.05, MCAR is plausible.
15
+
16
+ - **MAR (Missing At Random)**: Missingness depends on observed variables but not the
17
+ missing value itself. Example: younger patients less likely to have bone density measured.
18
+ Cannot be directly tested; inferred from associations between missingness indicators
19
+ and observed covariates.
20
+
21
+ - **MNAR (Missing Not At Random)**: Missingness depends on the unobserved value itself.
22
+ Example: severely ill patients too sick to complete follow-up surveys.
23
+ Cannot be tested from the data alone; requires domain knowledge.
24
+
25
+ ### Heuristic Assessment
26
+
27
+ 1. Compute missing percentage per variable.
28
+ 2. Create missingness indicator (0/1) for each variable with >5% missing.
29
+ 3. Correlate missingness indicators with observed variables (chi-square, t-test).
30
+ 4. If strong correlations exist: likely MAR. If none: plausible MCAR. If clinical
31
+ reasoning suggests the value itself drives missingness: suspect MNAR.
32
+
33
+ ### When to Use Each Imputation Method
34
+
35
+ | Method | When appropriate | Caution |
36
+ |--------|-----------------|---------|
37
+ | Listwise deletion (complete case) | MCAR, low % missing (<5%), large sample | Biased if MAR/MNAR; reduces power |
38
+ | Mean/median imputation | Quick exploratory analysis only | Underestimates variance; distorts distributions |
39
+ | Last observation carried forward | Longitudinal data, slow-changing variables | Biased if trajectory is changing |
40
+ | Multiple imputation (MICE) | MAR, moderate missing (5-40%), multivariate | Requires careful model specification |
41
+ | Maximum likelihood (FIML) | MAR, SEM or regression contexts | Needs software support |
42
+ | Sensitivity analysis | Always for MNAR suspicion | Report results under multiple assumptions |
43
+
44
+ ### Key Rule
45
+
46
+ Never impute the outcome variable in the primary analysis without explicit justification.
47
+ Report the missing data mechanism assumption in the methods section.
48
+
49
+ ---
50
+
51
+ ## 2. Outlier Detection
52
+
53
+ ### Statistical Methods
54
+
55
+ **IQR Method (Tukey Fences)**:
56
+ - Lower fence: Q1 - 1.5 * IQR
57
+ - Upper fence: Q3 + 1.5 * IQR
58
+ - Robust to non-normal distributions
59
+ - Preferred for clinical data where normality is rarely guaranteed
60
+
61
+ **Z-Score Method**:
62
+ - Flag values with |z| > 3 (or |z| > 2.5 for smaller samples)
63
+ - Assumes approximate normality
64
+ - Sensitive to the outliers themselves (mean and SD are affected)
65
+
66
+ **Modified Z-Score (MAD-based)**:
67
+ - Uses median and Median Absolute Deviation instead of mean/SD
68
+ - More robust than standard Z-score
69
+ - Formula: M_i = 0.6745 * (x_i - median) / MAD
70
+
71
+ ### Decision Framework
72
+
73
+ | Scenario | Recommended action |
74
+ |----------|--------------------|
75
+ | Data entry error (clearly impossible) | Correct if source available; else set to missing |
76
+ | Measurement error (instrument fault) | Set to missing; document in cleaning log |
77
+ | True extreme value (biologically plausible) | Keep in dataset; consider sensitivity analysis with/without |
78
+ | Ambiguous | Flag for domain expert review; do not remove without justification |
79
+
80
+ ### Clinical Context Matters
81
+
82
+ A BMI of 50 is an outlier statistically but clinically real. An age of 200 is impossible.
83
+ A creatinine of 15 mg/dL is extreme but occurs in dialysis patients. Always consult the
84
+ codebook and clinical context before removing outliers.
85
+
86
+ ---
87
+
88
+ ## 3. Duplicate Detection
89
+
90
+ ### Exact Duplicates
91
+
92
+ - Identical across ALL columns.
93
+ - Usually safe to remove (keep first occurrence).
94
+ - Common cause: accidental double-submission or ETL errors.
95
+
96
+ ### Near-Duplicates
97
+
98
+ - Same patient identifier, different records.
99
+ - May be legitimate (multiple visits) or errors (same visit entered twice with typos).
100
+
101
+ ### Detection Strategy
102
+
103
+ 1. Check for exact row duplicates: `df.duplicated().sum()`
104
+ 2. Check for duplicate patient IDs: `df['patient_id'].duplicated().sum()`
105
+ 3. For near-duplicates: group by patient ID, sort by date, check for records within
106
+ a suspiciously short time window (e.g., same day for what should be annual visits).
107
+ 4. Fuzzy matching: consider Levenshtein distance on name fields if no unique ID exists.
108
+
109
+ ### Resolution
110
+
111
+ - Exact duplicates: drop duplicates, log count.
112
+ - Same-patient near-duplicates: present to researcher for manual review.
113
+ - Never auto-merge patient records without explicit approval.
114
+
115
+ ---
116
+
117
+ ## 4. Date Handling
118
+
119
+ ### Common Date Formats in Clinical Data
120
+
121
+ | Format | Example | Source |
122
+ |--------|---------|--------|
123
+ | YYYY-MM-DD | 2024-03-15 | ISO 8601, most databases |
124
+ | MM/DD/YYYY | 03/15/2024 | US clinical systems |
125
+ | DD/MM/YYYY | 15/03/2024 | European systems |
126
+ | YYYYMMDD | 20240315 | DICOM, HL7 |
127
+ | DD-Mon-YYYY | 15-Mar-2024 | Some EMR exports |
128
+ | Excel serial | 45366 | Excel numeric date |
129
+
130
+ ### Common Issues
131
+
132
+ - **Ambiguous dates**: Is 03/04/2024 March 4th or April 3rd? Check the data source locale.
133
+ Look for values >12 in the first or second position to disambiguate.
134
+ - **Impossible dates**: February 30, month 13, year 0001.
135
+ - **Future dates**: Dates after the data extraction date (except for scheduled appointments).
136
+ - **Timezone issues**: Rarely relevant for clinical research dates, but critical for timestamps
137
+ in multi-site studies across time zones.
138
+ - **Two-digit years**: 24 could be 1924 or 2024. Use a pivot year (e.g., 30: <=30 means 2000s,
139
+ >30 means 1900s) or infer from context.
140
+
141
+ ### Standardization
142
+
143
+ 1. Parse all date columns to datetime using `pd.to_datetime(col, format=..., errors='coerce')`.
144
+ 2. Check for NaT (failed parses) and investigate.
145
+ 3. Standardize to ISO 8601 (YYYY-MM-DD) for storage.
146
+ 4. Calculate derived variables (age at event, follow-up duration) from standardized dates.
147
+
148
+ ---
149
+
150
+ ## 5. Category Harmonization
151
+
152
+ ### Common Inconsistencies
153
+
154
+ | Raw values | Harmonized |
155
+ |-----------|-----------|
156
+ | "Male", "male", "M", "MALE", " Male " | "Male" |
157
+ | "Y", "Yes", "yes", "YES", "1", "True" | 1 or "Yes" |
158
+ | "Right", "Rt", "R", "right", "RT" | "Right" |
159
+ | "Non-small cell", "NSCLC", "non small cell" | "NSCLC" |
160
+
161
+ ### Harmonization Steps
162
+
163
+ 1. Strip whitespace: `series.str.strip()`
164
+ 2. Normalize case: `series.str.lower()` or `series.str.title()`
165
+ 3. Build a mapping dictionary for known synonyms.
166
+ 4. Review unmapped values manually.
167
+ 5. Apply mapping: `series.map(mapping_dict).fillna(series)`
168
+
169
+ ### Encoding Standards
170
+
171
+ - **ICD-10**: Diagnosis codes. Watch for version differences (ICD-10-CM vs ICD-10-PCS).
172
+ - **SNOMED CT**: Clinical terminology. More granular than ICD-10.
173
+ - **LOINC**: Laboratory observations. Use for standardizing lab test names.
174
+ - **CPT/HCPCS**: Procedure codes.
175
+
176
+ When possible, map free-text categories to standard coding systems. Document the mapping
177
+ table and include it in supplementary materials.
178
+
179
+ ---
180
+
181
+ ## 6. Common Clinical Data Pitfalls
182
+
183
+ ### Lab Values with Inequality Prefixes
184
+
185
+ Values like "<0.01", ">10000", "<=5" are common for lab results at detection limits.
186
+
187
+ **Handling options**:
188
+ - Replace with the limit value: "<0.01" -> 0.01 (conservative)
189
+ - Replace with half the limit: "<0.01" -> 0.005 (common in environmental studies)
190
+ - Replace with limit / sqrt(2): "<0.01" -> 0.00707 (EPA method)
191
+ - Keep as censored data and use appropriate statistical methods (Tobit regression)
192
+
193
+ Document the chosen method in the statistical analysis plan.
194
+
195
+ ### Mixed Units
196
+
197
+ Common in multi-site studies or data merged from different systems.
198
+
199
+ | Analyte | Unit A | Unit B | Conversion |
200
+ |---------|--------|--------|-----------|
201
+ | Glucose | mg/dL | mmol/L | mg/dL = mmol/L * 18.018 |
202
+ | Creatinine | mg/dL | umol/L | mg/dL = umol/L / 88.4 |
203
+ | Hemoglobin | g/dL | g/L | g/dL = g/L / 10 |
204
+ | Calcium | mg/dL | mmol/L | mg/dL = mmol/L * 4.008 |
205
+
206
+ **Detection**: look for bimodal distributions in lab values -- one mode per unit system.
207
+
208
+ ### Sentinel Values
209
+
210
+ Values used as placeholders for missing data in legacy systems:
211
+
212
+ | Sentinel | Meaning |
213
+ |----------|---------|
214
+ | 999, 9999, 99999 | Missing / not recorded |
215
+ | -1, -9, -99 | Missing / not applicable |
216
+ | 0 | Could be true zero OR missing -- context-dependent |
217
+ | 88, 77 | "Not applicable" or "Refused" in survey data |
218
+ | 8888 | "Not applicable" or "Missing" in health screening/institutional databases |
219
+ | 01/01/1900 | Default/missing date |
220
+
221
+ **Action**: Replace sentinel values with `NaN` BEFORE computing any statistics.
222
+ Document which values were treated as sentinel.
223
+
224
+ ### Excel Date Corruption
225
+
226
+ Excel auto-converts certain strings to dates:
227
+ - Gene names: SEPT1 -> Sep-1, MARCH1 -> Mar-1, DEC1 -> Dec-1
228
+ - Sample IDs: 1-3 -> Jan-3, 2/4 -> Feb-4
229
+
230
+ **Prevention**: Open CSV in a text editor first to verify. Import with explicit dtypes
231
+ in pandas: `pd.read_csv(path, dtype={'gene': str})`.
232
+
233
+ **Detection**: Look for datetime values in columns that should contain gene names or
234
+ sample identifiers.
235
+
236
+ ### Numeric Precision
237
+
238
+ - Floating point: 0.1 + 0.2 != 0.3. Use `np.isclose()` for comparisons.
239
+ - Rounding: Be consistent. Define rounding rules before analysis.
240
+ - Integer overflow: Rare in Python, but watch for 32-bit integer limits in R or
241
+ database imports (max 2,147,483,647).
242
+
243
+ ---
244
+
245
+ ## 7. Recommended Workflow
246
+
247
+ The recommended end-to-end data cleaning workflow:
248
+
249
+ 1. **Profile**: Run the profiling script. Understand what you have.
250
+ 2. **Flag**: Identify potential issues. Categorize by type and severity.
251
+ 3. **Review**: Present flags to the domain expert (you, the researcher).
252
+ 4. **Approve**: Decide which flags to act on. Document rationale for each decision.
253
+ 5. **Clean**: Generate and run cleaning code for approved actions only.
254
+ 6. **Verify**: Compare before/after summaries. Check that cleaning did not introduce
255
+ new problems.
256
+ 7. **Document**: Save the cleaning log, mapping tables, and decision rationale.
257
+ Include in supplementary materials or methods section.
258
+
259
+ ### Documentation Checklist
260
+
261
+ - [ ] Number of rows before and after cleaning
262
+ - [ ] Number and percentage of missing values per variable (before/after)
263
+ - [ ] Outlier handling decisions with justification
264
+ - [ ] Duplicate removal count
265
+ - [ ] Category mapping tables
266
+ - [ ] Imputation method and variables imputed
267
+ - [ ] Any variables excluded from analysis and why
268
+
269
+ ---
270
+
271
+ ## 8. Key References
272
+
273
+ 1. Van den Broeck J, Cunningham SA,"; R,"; AB. Data cleaning: detecting,
274
+ diagnosing, and editing data abnormalities. *PLoS Med*. 2005;2(10):e267.
275
+ DOI: 10.1371/journal.pmed.0020267
276
+
277
+ 2. Kang H. The prevention and handling of the missing data.
278
+ *Korean J Anesthesiol*. 2013;64(5):402-406.
279
+ DOI: 10.4097/kjae.2013.64.5.402
280
+
281
+ 3. Sterne JAC, White IR, Carlin JB, et al. Multiple imputation for missing data
282
+ in epidemiological and clinical research: potential and pitfalls.
283
+ *BMJ*. 2009;338:b2393.
284
+ DOI: 10.1136/bmj.b2393
285
+
286
+ 4. Altman DG, Bland JM. Missing data. *BMJ*. 2007;334(7590):424.
287
+ DOI: 10.1136/bmj.38977.682025.2C
288
+
289
+ 5. White IR, Royston P, Wood AM. Multiple imputation using chained equations:
290
+ Issues and guidance for practice. *Stat Med*. 2011;30(4):377-399.
291
+ DOI: 10.1002/sim.4067
292
+
293
+ 6. Ziemann M, Eren Y, El-Osta A. Gene name errors are widespread in the
294
+ scientific literature. *Genome Biol*. 2016;17(1):177.
295
+ DOI: 10.1186/s13059-016-1044-7
296
+
297
+ ---
298
+
299
+ *This reference is part of the clean-data skill for the medical-research-skills package.*
@@ -0,0 +1,304 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Data Profiling Template for Clinical Research Datasets
4
+ ======================================================
5
+
6
+ Generates a structured profile of a CSV or Excel dataset.
7
+ Outputs a summary table to the console and saves it as CSV.
8
+
9
+ Usage:
10
+ python profiling_template.py <file_path> [--output <output_dir>]
11
+
12
+ Requirements:
13
+ - pandas
14
+ - numpy
15
+ - matplotlib, seaborn (optional, for plots)
16
+
17
+ This script does NOT modify the input data. It is read-only.
18
+ """
19
+
20
+ import argparse
21
+ import os
22
+ import sys
23
+ import random
24
+ from pathlib import Path
25
+
26
+ import numpy as np
27
+ import pandas as pd
28
+
29
+ # Reproducibility
30
+ np.random.seed(42)
31
+ random.seed(42)
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # 1. Data Loading
36
+ # ---------------------------------------------------------------------------
37
+
38
+ def load_data(file_path: str) -> pd.DataFrame:
39
+ """Auto-detect CSV vs Excel and load into a DataFrame."""
40
+ path = Path(file_path)
41
+ ext = path.suffix.lower()
42
+
43
+ if ext in (".csv", ".tsv"):
44
+ sep = "\t" if ext == ".tsv" else ","
45
+ df = pd.read_csv(path, sep=sep, low_memory=False)
46
+ elif ext in (".xls", ".xlsx", ".xlsm"):
47
+ df = pd.read_excel(path, engine="openpyxl")
48
+ else:
49
+ raise ValueError(f"Unsupported file format: {ext}. Use CSV, TSV, or Excel.")
50
+
51
+ print(f"Loaded {len(df)} rows x {len(df.columns)} columns from {path.name}")
52
+ return df
53
+
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # 2. Variable Summary
57
+ # ---------------------------------------------------------------------------
58
+
59
+ def build_variable_summary(df: pd.DataFrame) -> pd.DataFrame:
60
+ """Build a per-variable summary with type, missingness, and descriptive stats."""
61
+ records = []
62
+
63
+ for col in df.columns:
64
+ series = df[col]
65
+ n_missing = int(series.isna().sum())
66
+ pct_missing = round(100 * n_missing / len(df), 2) if len(df) > 0 else 0.0
67
+ n_unique = int(series.nunique(dropna=True))
68
+ inferred_type = _infer_variable_type(series)
69
+
70
+ rec = {
71
+ "variable": col,
72
+ "dtype": str(series.dtype),
73
+ "inferred_type": inferred_type,
74
+ "n_total": len(df),
75
+ "n_missing": n_missing,
76
+ "pct_missing": pct_missing,
77
+ "n_unique": n_unique,
78
+ "min": None,
79
+ "max": None,
80
+ "mean": None,
81
+ "median": None,
82
+ "sd": None,
83
+ }
84
+
85
+ # Numeric descriptive statistics
86
+ if inferred_type == "numeric":
87
+ numeric = pd.to_numeric(series, errors="coerce")
88
+ rec["min"] = round(float(numeric.min()), 4) if numeric.notna().any() else None
89
+ rec["max"] = round(float(numeric.max()), 4) if numeric.notna().any() else None
90
+ rec["mean"] = round(float(numeric.mean()), 4) if numeric.notna().any() else None
91
+ rec["median"] = round(float(numeric.median()), 4) if numeric.notna().any() else None
92
+ rec["sd"] = round(float(numeric.std()), 4) if numeric.notna().any() else None
93
+
94
+ records.append(rec)
95
+
96
+ summary = pd.DataFrame(records)
97
+ return summary
98
+
99
+
100
+ def _infer_variable_type(series: pd.Series) -> str:
101
+ """Heuristic type inference: numeric, categorical, datetime, or text."""
102
+ if pd.api.types.is_numeric_dtype(series):
103
+ return "numeric"
104
+ if pd.api.types.is_datetime64_any_dtype(series):
105
+ return "datetime"
106
+
107
+ # Try to parse as numeric (catches numeric-stored-as-string)
108
+ coerced = pd.to_numeric(series.dropna(), errors="coerce")
109
+ if coerced.notna().sum() > 0.8 * series.dropna().shape[0]:
110
+ return "numeric"
111
+
112
+ # Try to parse as datetime
113
+ try:
114
+ parsed = pd.to_datetime(series.dropna(), infer_datetime_format=True, errors="coerce")
115
+ if parsed.notna().sum() > 0.8 * series.dropna().shape[0]:
116
+ return "datetime"
117
+ except Exception:
118
+ pass
119
+
120
+ # Categorical vs free text heuristic
121
+ n_unique = series.nunique(dropna=True)
122
+ n_rows = len(series.dropna())
123
+ if n_rows > 0 and n_unique / n_rows < 0.05:
124
+ return "categorical"
125
+ if n_unique <= 20:
126
+ return "categorical"
127
+
128
+ return "text"
129
+
130
+
131
+ # ---------------------------------------------------------------------------
132
+ # 3. Flag Detection
133
+ # ---------------------------------------------------------------------------
134
+
135
+ def flag_missing(summary: pd.DataFrame, threshold: float = 5.0) -> pd.DataFrame:
136
+ """Flag variables with missing percentage above the threshold."""
137
+ flagged = summary[summary["pct_missing"] > threshold].copy()
138
+ flagged["issue"] = "Missing > " + str(threshold) + "%"
139
+ flagged["severity"] = flagged["pct_missing"].apply(
140
+ lambda x: "High" if x > 30 else ("Medium" if x > 10 else "Low")
141
+ )
142
+ return flagged[["variable", "issue", "n_missing", "pct_missing", "severity"]]
143
+
144
+
145
+ def flag_outliers_iqr(df: pd.DataFrame, summary: pd.DataFrame) -> pd.DataFrame:
146
+ """Flag numeric variables with outliers using the IQR method."""
147
+ results = []
148
+ numeric_vars = summary[summary["inferred_type"] == "numeric"]["variable"].tolist()
149
+
150
+ for col in numeric_vars:
151
+ numeric = pd.to_numeric(df[col], errors="coerce").dropna()
152
+ if len(numeric) < 10:
153
+ continue
154
+ q1 = numeric.quantile(0.25)
155
+ q3 = numeric.quantile(0.75)
156
+ iqr = q3 - q1
157
+ if iqr == 0:
158
+ continue
159
+ lower = q1 - 1.5 * iqr
160
+ upper = q3 + 1.5 * iqr
161
+ outliers = numeric[(numeric < lower) | (numeric > upper)]
162
+ if len(outliers) > 0:
163
+ results.append({
164
+ "variable": col,
165
+ "issue": f"Outlier (IQR): {len(outliers)} values outside [{lower:.2f}, {upper:.2f}]",
166
+ "count": len(outliers),
167
+ "severity": "Medium",
168
+ })
169
+
170
+ return pd.DataFrame(results) if results else pd.DataFrame(
171
+ columns=["variable", "issue", "count", "severity"]
172
+ )
173
+
174
+
175
+ # ---------------------------------------------------------------------------
176
+ # 4. Distribution Plots (optional)
177
+ # ---------------------------------------------------------------------------
178
+
179
+ def plot_distributions(df: pd.DataFrame, summary: pd.DataFrame, output_dir: str):
180
+ """Generate histograms for numeric and bar charts for categorical variables."""
181
+ try:
182
+ import matplotlib
183
+ matplotlib.use("Agg")
184
+ import matplotlib.pyplot as plt
185
+ import seaborn as sns
186
+ except ImportError:
187
+ print("[INFO] matplotlib/seaborn not installed. Skipping distribution plots.")
188
+ return
189
+
190
+ plot_dir = os.path.join(output_dir, "profile_plots")
191
+ os.makedirs(plot_dir, exist_ok=True)
192
+
193
+ # Numeric histograms
194
+ numeric_vars = summary[summary["inferred_type"] == "numeric"]["variable"].tolist()
195
+ for col in numeric_vars[:20]: # Limit to first 20 to avoid excessive plots
196
+ fig, ax = plt.subplots(figsize=(6, 4))
197
+ numeric = pd.to_numeric(df[col], errors="coerce").dropna()
198
+ if len(numeric) == 0:
199
+ plt.close(fig)
200
+ continue
201
+ ax.hist(numeric, bins=30, edgecolor="black", alpha=0.7)
202
+ ax.set_title(f"Distribution: {col}")
203
+ ax.set_xlabel(col)
204
+ ax.set_ylabel("Frequency")
205
+ fig.tight_layout()
206
+ fig.savefig(os.path.join(plot_dir, f"hist_{col}.png"), dpi=100)
207
+ plt.close(fig)
208
+
209
+ # Categorical bar charts
210
+ cat_vars = summary[summary["inferred_type"] == "categorical"]["variable"].tolist()
211
+ for col in cat_vars[:20]:
212
+ fig, ax = plt.subplots(figsize=(6, 4))
213
+ counts = df[col].value_counts().head(15)
214
+ counts.plot(kind="barh", ax=ax, color="steelblue", edgecolor="black")
215
+ ax.set_title(f"Categories: {col}")
216
+ ax.set_xlabel("Count")
217
+ fig.tight_layout()
218
+ fig.savefig(os.path.join(plot_dir, f"bar_{col}.png"), dpi=100)
219
+ plt.close(fig)
220
+
221
+ print(f"[INFO] Distribution plots saved to {plot_dir}/")
222
+
223
+
224
+ # ---------------------------------------------------------------------------
225
+ # 5. Report Output
226
+ # ---------------------------------------------------------------------------
227
+
228
+ def print_summary(summary: pd.DataFrame):
229
+ """Print a formatted summary table to the console."""
230
+ display_cols = [
231
+ "variable", "inferred_type", "n_missing", "pct_missing",
232
+ "n_unique", "min", "max", "mean", "median", "sd"
233
+ ]
234
+ print("\n" + "=" * 80)
235
+ print("VARIABLE SUMMARY")
236
+ print("=" * 80)
237
+ print(summary[display_cols].to_string(index=False))
238
+ print("=" * 80)
239
+
240
+
241
+ def save_outputs(summary: pd.DataFrame, flags_missing: pd.DataFrame,
242
+ flags_outlier: pd.DataFrame, output_dir: str):
243
+ """Save profiling results as CSV files."""
244
+ os.makedirs(output_dir, exist_ok=True)
245
+
246
+ summary_path = os.path.join(output_dir, "variable_summary.csv")
247
+ summary.to_csv(summary_path, index=False)
248
+ print(f"[SAVED] Variable summary -> {summary_path}")
249
+
250
+ if len(flags_missing) > 0:
251
+ missing_path = os.path.join(output_dir, "flags_missing.csv")
252
+ flags_missing.to_csv(missing_path, index=False)
253
+ print(f"[SAVED] Missing flags -> {missing_path}")
254
+
255
+ if len(flags_outlier) > 0:
256
+ outlier_path = os.path.join(output_dir, "flags_outliers.csv")
257
+ flags_outlier.to_csv(outlier_path, index=False)
258
+ print(f"[SAVED] Outlier flags -> {outlier_path}")
259
+
260
+
261
+ # ---------------------------------------------------------------------------
262
+ # Main
263
+ # ---------------------------------------------------------------------------
264
+
265
+ def main():
266
+ parser = argparse.ArgumentParser(description="Profile a clinical research dataset.")
267
+ parser.add_argument("file_path", help="Path to CSV or Excel file")
268
+ parser.add_argument("--output", default="./data_profile",
269
+ help="Output directory for profiling results (default: ./data_profile)")
270
+ parser.add_argument("--no-plots", action="store_true",
271
+ help="Skip distribution plots")
272
+ args = parser.parse_args()
273
+
274
+ # Load
275
+ df = load_data(args.file_path)
276
+
277
+ # Profile
278
+ summary = build_variable_summary(df)
279
+ print_summary(summary)
280
+
281
+ # Flag
282
+ flags_missing = flag_missing(summary, threshold=5.0)
283
+ flags_outlier = flag_outliers_iqr(df, summary)
284
+
285
+ if len(flags_missing) > 0:
286
+ print("\n[FLAGS] Variables with >5% missing:")
287
+ print(flags_missing.to_string(index=False))
288
+
289
+ if len(flags_outlier) > 0:
290
+ print("\n[FLAGS] Variables with IQR outliers:")
291
+ print(flags_outlier.to_string(index=False))
292
+
293
+ # Save
294
+ save_outputs(summary, flags_missing, flags_outlier, args.output)
295
+
296
+ # Plot (optional)
297
+ if not args.no_plots:
298
+ plot_distributions(df, summary, args.output)
299
+
300
+ print("\n[DONE] Profiling complete. Review outputs before proceeding to cleaning.")
301
+
302
+
303
+ if __name__ == "__main__":
304
+ main()