medsci-skills 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (702) hide show
  1. package/LICENSE +50 -0
  2. package/README.md +602 -0
  3. package/README_FIRST.md +27 -0
  4. package/bin/medsci-skills.js +159 -0
  5. package/installers/install-macos.command +19 -0
  6. package/installers/install-windows.cmd +26 -0
  7. package/installers/install-windows.ps1 +17 -0
  8. package/installers/install.py +218 -0
  9. package/metadata/skills_catalog.json +452 -0
  10. package/package.json +48 -0
  11. package/skills/academic-aio/SKILL.md +408 -0
  12. package/skills/academic-aio/references/case_studies/kjr_mllm_2025.md +82 -0
  13. package/skills/academic-aio/references/checklists/AIO_GENERAL.md +354 -0
  14. package/skills/academic-aio/references/journal_summarybox_templates.yaml +126 -0
  15. package/skills/academic-aio/references/oac_funding_checklist.yaml +129 -0
  16. package/skills/academic-aio/references/reporting_guideline_mapping.md +39 -0
  17. package/skills/academic-aio/references/schema_markup_templates/CodeRepository.jsonld +32 -0
  18. package/skills/academic-aio/references/schema_markup_templates/Dataset.jsonld +36 -0
  19. package/skills/academic-aio/references/schema_markup_templates/Person.jsonld +30 -0
  20. package/skills/academic-aio/references/schema_markup_templates/README.md +43 -0
  21. package/skills/academic-aio/references/schema_markup_templates/ScholarlyArticle.jsonld +55 -0
  22. package/skills/academic-aio/scripts/batch_metadata_audit.py +169 -0
  23. package/skills/academic-aio/scripts/validate_schema.py +118 -0
  24. package/skills/academic-aio/skill.yml +36 -0
  25. package/skills/academic-aio/templates/aio_audit_checklist.md.j2 +108 -0
  26. package/skills/add-journal/SKILL.md +482 -0
  27. package/skills/add-journal/skill.yml +33 -0
  28. package/skills/analyze-stats/SKILL.md +598 -0
  29. package/skills/analyze-stats/references/analysis_guides/missing_data.md +109 -0
  30. package/skills/analyze-stats/references/analysis_guides/nhis_icd10_mapping.md +247 -0
  31. package/skills/analyze-stats/references/analysis_guides/propensity_score.md +132 -0
  32. package/skills/analyze-stats/references/analysis_guides/regression.md +115 -0
  33. package/skills/analyze-stats/references/analysis_guides/repeated_measures.md +160 -0
  34. package/skills/analyze-stats/references/analysis_guides/survey_weighted.md +366 -0
  35. package/skills/analyze-stats/references/analysis_guides/test_selection.md +86 -0
  36. package/skills/analyze-stats/references/style/figure_style.mplstyle +69 -0
  37. package/skills/analyze-stats/references/style/theme_publication.R +147 -0
  38. package/skills/analyze-stats/references/table-standards/journal-profiles/ajr.yaml +51 -0
  39. package/skills/analyze-stats/references/table-standards/journal-profiles/european_radiology.yaml +55 -0
  40. package/skills/analyze-stats/references/table-standards/journal-profiles/jama.yaml +66 -0
  41. package/skills/analyze-stats/references/table-standards/journal-profiles/lancet.yaml +57 -0
  42. package/skills/analyze-stats/references/table-standards/journal-profiles/nejm.yaml +51 -0
  43. package/skills/analyze-stats/references/table-standards/journal-profiles/radiology.yaml +66 -0
  44. package/skills/analyze-stats/references/table-standards/table-standards.md +287 -0
  45. package/skills/analyze-stats/references/table-standards/table-types/diagnostic_accuracy.md +36 -0
  46. package/skills/analyze-stats/references/table-standards/table-types/meta_analysis.md +58 -0
  47. package/skills/analyze-stats/references/table-standards/table-types/model_comparison.md +36 -0
  48. package/skills/analyze-stats/references/table-standards/table-types/regression_results.md +50 -0
  49. package/skills/analyze-stats/references/table-standards/table-types/table1_demographics.md +51 -0
  50. package/skills/analyze-stats/references/table-standards/tool-comparison.md +79 -0
  51. package/skills/analyze-stats/references/templates/agreement_analysis.py +436 -0
  52. package/skills/analyze-stats/references/templates/dca_plot.R +237 -0
  53. package/skills/analyze-stats/references/templates/diagnostic_accuracy.py +401 -0
  54. package/skills/analyze-stats/references/templates/dta_meta_analysis.R +384 -0
  55. package/skills/analyze-stats/references/templates/forest_plot.py +412 -0
  56. package/skills/analyze-stats/references/templates/likert_summary.py +356 -0
  57. package/skills/analyze-stats/references/templates/meta_analysis.R +365 -0
  58. package/skills/analyze-stats/references/templates/propensity_score.py +478 -0
  59. package/skills/analyze-stats/references/templates/regression.py +425 -0
  60. package/skills/analyze-stats/references/templates/repeated_measures.py +434 -0
  61. package/skills/analyze-stats/references/templates/sample_size.R +382 -0
  62. package/skills/analyze-stats/references/templates/survey_weighted_analysis.py +411 -0
  63. package/skills/analyze-stats/references/templates/survival_analysis.py +325 -0
  64. package/skills/analyze-stats/references/templates/table1_demographics.py +287 -0
  65. package/skills/analyze-stats/scripts/check_generated_code.py +335 -0
  66. package/skills/analyze-stats/skill.yml +38 -0
  67. package/skills/analyze-stats/tests/fixtures/gen_bad.R +16 -0
  68. package/skills/analyze-stats/tests/fixtures/gen_bad.py +24 -0
  69. package/skills/analyze-stats/tests/fixtures/gen_clean.py +21 -0
  70. package/skills/analyze-stats/tests/test_generated_code.sh +59 -0
  71. package/skills/analyze-stats/tests/test_survival_template.sh +53 -0
  72. package/skills/author-strategy/SKILL.md +117 -0
  73. package/skills/author-strategy/analyze_patterns.py +303 -0
  74. package/skills/author-strategy/fetch_pubmed.py +374 -0
  75. package/skills/author-strategy/skill.yml +34 -0
  76. package/skills/batch-cohort/SKILL.md +223 -0
  77. package/skills/batch-cohort/references/base_template_knhanes.R +210 -0
  78. package/skills/batch-cohort/references/batch_template_generator.R +222 -0
  79. package/skills/batch-cohort/references/variable_coding_registry.md +136 -0
  80. package/skills/batch-cohort/skill.yml +35 -0
  81. package/skills/calc-sample-size/SKILL.md +491 -0
  82. package/skills/calc-sample-size/references/formulas.md +655 -0
  83. package/skills/calc-sample-size/references/observational_cohort.md +49 -0
  84. package/skills/calc-sample-size/skill.yml +51 -0
  85. package/skills/check-reporting/SKILL.md +534 -0
  86. package/skills/check-reporting/references/LICENSES.md +41 -0
  87. package/skills/check-reporting/references/checklists/AMSTAR2.md +54 -0
  88. package/skills/check-reporting/references/checklists/ARRIVE_2.md +234 -0
  89. package/skills/check-reporting/references/checklists/CARE.md +102 -0
  90. package/skills/check-reporting/references/checklists/CLAIM_2024.md +128 -0
  91. package/skills/check-reporting/references/checklists/CLEAR.md +113 -0
  92. package/skills/check-reporting/references/checklists/CONSORT.md +86 -0
  93. package/skills/check-reporting/references/checklists/COSMIN_RoB.md +136 -0
  94. package/skills/check-reporting/references/checklists/GRRAS.md +61 -0
  95. package/skills/check-reporting/references/checklists/MI_CLEAR_LLM.md +167 -0
  96. package/skills/check-reporting/references/checklists/MOOSE.md +85 -0
  97. package/skills/check-reporting/references/checklists/NOS.md +88 -0
  98. package/skills/check-reporting/references/checklists/PRISMA_2020.md +135 -0
  99. package/skills/check-reporting/references/checklists/PRISMA_DTA.md +36 -0
  100. package/skills/check-reporting/references/checklists/PRISMA_P.md +56 -0
  101. package/skills/check-reporting/references/checklists/PROBAST.md +75 -0
  102. package/skills/check-reporting/references/checklists/PROBAST_AI.md +130 -0
  103. package/skills/check-reporting/references/checklists/QUADAS2.md +77 -0
  104. package/skills/check-reporting/references/checklists/QUADAS_C.md +131 -0
  105. package/skills/check-reporting/references/checklists/ROBINS_E.md +179 -0
  106. package/skills/check-reporting/references/checklists/ROBINS_I.md +87 -0
  107. package/skills/check-reporting/references/checklists/ROBIS.md +114 -0
  108. package/skills/check-reporting/references/checklists/ROB_ME.md +126 -0
  109. package/skills/check-reporting/references/checklists/RoB2.md +79 -0
  110. package/skills/check-reporting/references/checklists/RoB_NMA.md +96 -0
  111. package/skills/check-reporting/references/checklists/SPIRIT.md +112 -0
  112. package/skills/check-reporting/references/checklists/SQUIRE_2.md +68 -0
  113. package/skills/check-reporting/references/checklists/STARD.md +129 -0
  114. package/skills/check-reporting/references/checklists/STARD_AI.md +211 -0
  115. package/skills/check-reporting/references/checklists/STROBE.md +80 -0
  116. package/skills/check-reporting/references/checklists/SWiM.md +33 -0
  117. package/skills/check-reporting/references/checklists/TRIPOD.md +157 -0
  118. package/skills/check-reporting/references/checklists/TRIPOD_AI.md +140 -0
  119. package/skills/check-reporting/references/step4c_registration_timing.md +93 -0
  120. package/skills/check-reporting/references/step4d_prisma_figure_audit.md +137 -0
  121. package/skills/check-reporting/scripts/check_checklist_exists.py +183 -0
  122. package/skills/check-reporting/scripts/check_checklist_version.py +168 -0
  123. package/skills/check-reporting/scripts/check_framework_naming.py +206 -0
  124. package/skills/check-reporting/scripts/check_prisma_figure.py +209 -0
  125. package/skills/check-reporting/scripts/prisma_cascade_check.py +274 -0
  126. package/skills/check-reporting/skill.yml +41 -0
  127. package/skills/check-reporting/tests/fixtures/framework_bad.md +8 -0
  128. package/skills/check-reporting/tests/fixtures/framework_clean.md +7 -0
  129. package/skills/check-reporting/tests/test_checklist_fail_fast.sh +77 -0
  130. package/skills/check-reporting/tests/test_checklist_version.sh +72 -0
  131. package/skills/check-reporting/tests/test_framework_naming.sh +45 -0
  132. package/skills/check-reporting/tests/test_prisma_cascade.sh +104 -0
  133. package/skills/clean-data/SKILL.md +180 -0
  134. package/skills/clean-data/references/cleaning_patterns.md +299 -0
  135. package/skills/clean-data/references/profiling_template.py +304 -0
  136. package/skills/clean-data/scripts/check_structural_zero.py +174 -0
  137. package/skills/clean-data/skill.yml +35 -0
  138. package/skills/clean-data/tests/fixtures/smoking.csv +8 -0
  139. package/skills/clean-data/tests/test_structural_zero.sh +49 -0
  140. package/skills/cross-national/SKILL.md +264 -0
  141. package/skills/cross-national/skill.yml +37 -0
  142. package/skills/define-variables/SKILL.md +146 -0
  143. package/skills/define-variables/references/common_definitions.md +190 -0
  144. package/skills/define-variables/skill.yml +34 -0
  145. package/skills/define-variables/templates/variable_operationalization.md +64 -0
  146. package/skills/deidentify/SKILL.md +203 -0
  147. package/skills/deidentify/deidentify.py +1224 -0
  148. package/skills/deidentify/locales/_template.json +45 -0
  149. package/skills/deidentify/locales/au.json +43 -0
  150. package/skills/deidentify/locales/ca.json +44 -0
  151. package/skills/deidentify/locales/cn.json +47 -0
  152. package/skills/deidentify/locales/de.json +48 -0
  153. package/skills/deidentify/locales/fr.json +48 -0
  154. package/skills/deidentify/locales/in.json +48 -0
  155. package/skills/deidentify/locales/jp.json +48 -0
  156. package/skills/deidentify/locales/kr.json +48 -0
  157. package/skills/deidentify/locales/uk.json +45 -0
  158. package/skills/deidentify/locales/us.json +43 -0
  159. package/skills/deidentify/references/date_shift_guide.md +82 -0
  160. package/skills/deidentify/references/hipaa_18_identifiers.md +48 -0
  161. package/skills/deidentify/references/korean_phi_patterns.md +135 -0
  162. package/skills/deidentify/skill.yml +43 -0
  163. package/skills/deidentify/tests/README.md +26 -0
  164. package/skills/deidentify/tests/test_clean.csv +16 -0
  165. package/skills/deidentify/tests/test_edge_cases.csv +11 -0
  166. package/skills/deidentify/tests/test_phi_korean.csv +11 -0
  167. package/skills/design-ai-benchmarking/SKILL.md +214 -0
  168. package/skills/design-ai-benchmarking/references/benchmark_export_schema.json +69 -0
  169. package/skills/design-ai-benchmarking/references/elicitation_rubric_template.md +37 -0
  170. package/skills/design-ai-benchmarking/skill.yml +38 -0
  171. package/skills/design-study/SKILL.md +298 -0
  172. package/skills/design-study/skill.yml +33 -0
  173. package/skills/fill-icmje-coi/SKILL.md +216 -0
  174. package/skills/fill-icmje-coi/scripts/fill_icmje_coi.py +140 -0
  175. package/skills/fill-icmje-coi/skill.yml +35 -0
  176. package/skills/fill-icmje-coi/templates/icmje_coi_seed_synthetic.docx +0 -0
  177. package/skills/fill-protocol/SKILL.md +248 -0
  178. package/skills/fill-protocol/examples/example_irb_template.yaml +53 -0
  179. package/skills/fill-protocol/references/best_practices.md +121 -0
  180. package/skills/fill-protocol/scripts/doc_to_docx.py +111 -0
  181. package/skills/fill-protocol/scripts/fill_form.py +611 -0
  182. package/skills/fill-protocol/scripts/inspect_template.py +61 -0
  183. package/skills/fill-protocol/setup.sh +162 -0
  184. package/skills/fill-protocol/skill.yml +37 -0
  185. package/skills/find-cohort-gap/SKILL.md +309 -0
  186. package/skills/find-cohort-gap/references/cohort_profile_template.md +93 -0
  187. package/skills/find-cohort-gap/references/onepager_template.md +84 -0
  188. package/skills/find-cohort-gap/references/pattern_scoring_rubric.md +169 -0
  189. package/skills/find-cohort-gap/references/saturation_query_templates.md +143 -0
  190. package/skills/find-cohort-gap/skill.yml +35 -0
  191. package/skills/find-journal/POLICY.md +87 -0
  192. package/skills/find-journal/SKILL.md +340 -0
  193. package/skills/find-journal/references/journal_profiles/AJNR.md +29 -0
  194. package/skills/find-journal/references/journal_profiles/AJR.md +30 -0
  195. package/skills/find-journal/references/journal_profiles/Abdominal_Radiology.md +30 -0
  196. package/skills/find-journal/references/journal_profiles/Academic_Radiology.md +30 -0
  197. package/skills/find-journal/references/journal_profiles/Annals_of_Internal_Medicine.md +33 -0
  198. package/skills/find-journal/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +28 -0
  199. package/skills/find-journal/references/journal_profiles/BMC_Medicine.md +31 -0
  200. package/skills/find-journal/references/journal_profiles/British_Journal_of_Radiology.md +39 -0
  201. package/skills/find-journal/references/journal_profiles/CVIR.md +30 -0
  202. package/skills/find-journal/references/journal_profiles/Chest.md +39 -0
  203. package/skills/find-journal/references/journal_profiles/Clinical_Radiology.md +30 -0
  204. package/skills/find-journal/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +32 -0
  205. package/skills/find-journal/references/journal_profiles/Diabetes_Metabolism_Journal.md +36 -0
  206. package/skills/find-journal/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +32 -0
  207. package/skills/find-journal/references/journal_profiles/Endocrinology_and_Metabolism.md +37 -0
  208. package/skills/find-journal/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +39 -0
  209. package/skills/find-journal/references/journal_profiles/European_Radiology.md +29 -0
  210. package/skills/find-journal/references/journal_profiles/Hepatology_Communications.md +40 -0
  211. package/skills/find-journal/references/journal_profiles/Hepatology_International.md +37 -0
  212. package/skills/find-journal/references/journal_profiles/IEEE_JBHI.md +28 -0
  213. package/skills/find-journal/references/journal_profiles/IEEE_TMI.md +28 -0
  214. package/skills/find-journal/references/journal_profiles/INSI.md +29 -0
  215. package/skills/find-journal/references/journal_profiles/Investigative_Radiology.md +25 -0
  216. package/skills/find-journal/references/journal_profiles/JACC_Advances.md +41 -0
  217. package/skills/find-journal/references/journal_profiles/JACC_Asia.md +30 -0
  218. package/skills/find-journal/references/journal_profiles/JACR.md +28 -0
  219. package/skills/find-journal/references/journal_profiles/JAMA.md +40 -0
  220. package/skills/find-journal/references/journal_profiles/JAMA_Network_Open.md +30 -0
  221. package/skills/find-journal/references/journal_profiles/JCSM.md +39 -0
  222. package/skills/find-journal/references/journal_profiles/JKMS.md +32 -0
  223. package/skills/find-journal/references/journal_profiles/JMIR.md +29 -0
  224. package/skills/find-journal/references/journal_profiles/JMIR_Medical_Education.md +29 -0
  225. package/skills/find-journal/references/journal_profiles/JNIS.md +35 -0
  226. package/skills/find-journal/references/journal_profiles/JVIR.md +31 -0
  227. package/skills/find-journal/references/journal_profiles/Journal_of_Biomedical_Informatics.md +29 -0
  228. package/skills/find-journal/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +40 -0
  229. package/skills/find-journal/references/journal_profiles/Journal_of_Magnetic_Resonance_Imaging.md +30 -0
  230. package/skills/find-journal/references/journal_profiles/Journal_of_Nuclear_Medicine.md +31 -0
  231. package/skills/find-journal/references/journal_profiles/Journal_of_Stroke.md +32 -0
  232. package/skills/find-journal/references/journal_profiles/KJR.md +38 -0
  233. package/skills/find-journal/references/journal_profiles/Korean_Circulation_Journal.md +38 -0
  234. package/skills/find-journal/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +36 -0
  235. package/skills/find-journal/references/journal_profiles/Lancet_Diabetes_and_Endocrinology.md +40 -0
  236. package/skills/find-journal/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +49 -0
  237. package/skills/find-journal/references/journal_profiles/Lancet_Infectious_Diseases.md +38 -0
  238. package/skills/find-journal/references/journal_profiles/Lancet_Neurology.md +39 -0
  239. package/skills/find-journal/references/journal_profiles/Lancet_Oncology.md +40 -0
  240. package/skills/find-journal/references/journal_profiles/Lancet_Psychiatry.md +38 -0
  241. package/skills/find-journal/references/journal_profiles/Lancet_Public_Health.md +30 -0
  242. package/skills/find-journal/references/journal_profiles/Lancet_Respiratory_Medicine.md +39 -0
  243. package/skills/find-journal/references/journal_profiles/Liver_International.md +33 -0
  244. package/skills/find-journal/references/journal_profiles/Medical_Image_Analysis.md +28 -0
  245. package/skills/find-journal/references/journal_profiles/NEJM.md +33 -0
  246. package/skills/find-journal/references/journal_profiles/Nature_Machine_Intelligence.md +31 -0
  247. package/skills/find-journal/references/journal_profiles/Nature_Medicine.md +39 -0
  248. package/skills/find-journal/references/journal_profiles/Neuroradiology.md +31 -0
  249. package/skills/find-journal/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +39 -0
  250. package/skills/find-journal/references/journal_profiles/PLOS_Medicine.md +32 -0
  251. package/skills/find-journal/references/journal_profiles/RYAI.md +28 -0
  252. package/skills/find-journal/references/journal_profiles/Radiology.md +29 -0
  253. package/skills/find-journal/references/journal_profiles/Skeletal_Radiology.md +31 -0
  254. package/skills/find-journal/references/journal_profiles/Stroke.md +37 -0
  255. package/skills/find-journal/references/journal_profiles/The_BMJ.md +31 -0
  256. package/skills/find-journal/references/journal_profiles/The_Lancet.md +31 -0
  257. package/skills/find-journal/references/journal_profiles/The_Lancet_Digital_Health.md +29 -0
  258. package/skills/find-journal/references/journal_profiles/World_Journal_of_Hepatology.md +53 -0
  259. package/skills/find-journal/references/journal_profiles/npj_Digital_Medicine.md +29 -0
  260. package/skills/find-journal/skill.yml +34 -0
  261. package/skills/fulltext-retrieval/SKILL.md +174 -0
  262. package/skills/fulltext-retrieval/fetch_oa.py +433 -0
  263. package/skills/fulltext-retrieval/pdf_to_md.py +160 -0
  264. package/skills/fulltext-retrieval/skill.yml +41 -0
  265. package/skills/generate-codebook/SKILL.md +155 -0
  266. package/skills/generate-codebook/references/codebook_schema.md +76 -0
  267. package/skills/generate-codebook/scripts/generate_codebook.py +278 -0
  268. package/skills/generate-codebook/skill.yml +35 -0
  269. package/skills/generate-codebook/tests/test_generate_codebook.sh +76 -0
  270. package/skills/grant-builder/SKILL.md +251 -0
  271. package/skills/grant-builder/skill.yml +34 -0
  272. package/skills/humanize/SKILL.md +251 -0
  273. package/skills/humanize/references/ai_patterns.md +571 -0
  274. package/skills/humanize/skill.yml +33 -0
  275. package/skills/intake-project/SKILL.md +264 -0
  276. package/skills/intake-project/skill.yml +34 -0
  277. package/skills/lit-sync/SKILL.md +448 -0
  278. package/skills/lit-sync/references/locale/ko/note_templates.md +110 -0
  279. package/skills/lit-sync/skill.yml +52 -0
  280. package/skills/lit-sync/tests/test_poll_logic.sh +92 -0
  281. package/skills/ma-scout/SKILL.md +640 -0
  282. package/skills/ma-scout/references/project_readme_template.md +95 -0
  283. package/skills/ma-scout/references/project_readme_template_ko.md +82 -0
  284. package/skills/ma-scout/skill.yml +33 -0
  285. package/skills/make-figures/SKILL.md +957 -0
  286. package/skills/make-figures/references/critic_rubrics/data_plot.md +166 -0
  287. package/skills/make-figures/references/critic_rubrics/flow_diagram.md +169 -0
  288. package/skills/make-figures/references/design_principles.md +181 -0
  289. package/skills/make-figures/references/exemplar_diagrams/README.md +65 -0
  290. package/skills/make-figures/references/exemplar_diagrams/consort/README.md +15 -0
  291. package/skills/make-figures/references/exemplar_diagrams/consort/template_input.yaml +37 -0
  292. package/skills/make-figures/references/exemplar_diagrams/consort/template_output.pdf +0 -0
  293. package/skills/make-figures/references/exemplar_diagrams/consort/template_output.png +0 -0
  294. package/skills/make-figures/references/exemplar_diagrams/consort/template_output_600.png +0 -0
  295. package/skills/make-figures/references/exemplar_diagrams/other/other_02.meta.yaml +4 -0
  296. package/skills/make-figures/references/exemplar_diagrams/other/other_02.png +0 -0
  297. package/skills/make-figures/references/exemplar_diagrams/other/other_02_why.md +13 -0
  298. package/skills/make-figures/references/exemplar_diagrams/pipeline/README.md +15 -0
  299. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.meta.yaml +4 -0
  300. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.png +0 -0
  301. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01_why.md +13 -0
  302. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.meta.yaml +4 -0
  303. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.png +0 -0
  304. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03_why.md +13 -0
  305. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.meta.yaml +4 -0
  306. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.png +0 -0
  307. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04_why.md +13 -0
  308. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.meta.yaml +4 -0
  309. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.png +0 -0
  310. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05_why.md +13 -0
  311. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.meta.yaml +4 -0
  312. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.png +0 -0
  313. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06_why.md +13 -0
  314. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.meta.yaml +4 -0
  315. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.png +0 -0
  316. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07_why.md +13 -0
  317. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.meta.yaml +4 -0
  318. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.png +0 -0
  319. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08_why.md +13 -0
  320. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.meta.yaml +4 -0
  321. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.png +0 -0
  322. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09_why.md +13 -0
  323. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.meta.yaml +4 -0
  324. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.png +0 -0
  325. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10_why.md +13 -0
  326. package/skills/make-figures/references/exemplar_diagrams/prisma/README.md +15 -0
  327. package/skills/make-figures/references/exemplar_diagrams/prisma/template_input.yaml +47 -0
  328. package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.pdf +0 -0
  329. package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.png +0 -0
  330. package/skills/make-figures/references/exemplar_diagrams/prisma/template_output_600.png +0 -0
  331. package/skills/make-figures/references/exemplar_diagrams/stard/README.md +15 -0
  332. package/skills/make-figures/references/exemplar_diagrams/stard/template_input.yaml +40 -0
  333. package/skills/make-figures/references/exemplar_diagrams/stard/template_output.pdf +0 -0
  334. package/skills/make-figures/references/exemplar_diagrams/stard/template_output.png +0 -0
  335. package/skills/make-figures/references/exemplar_diagrams/stard/template_output_600.png +0 -0
  336. package/skills/make-figures/references/exemplar_diagrams/strobe/template_input.yaml +43 -0
  337. package/skills/make-figures/references/exemplar_diagrams/strobe/template_input_pptx.yaml +43 -0
  338. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pdf +0 -0
  339. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.png +0 -0
  340. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pptx +0 -0
  341. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output_600.png +0 -0
  342. package/skills/make-figures/references/figure_specs.md +291 -0
  343. package/skills/make-figures/references/flow_diagram_lessons.md +164 -0
  344. package/skills/make-figures/references/jacc_central_illustration_principles.md +91 -0
  345. package/skills/make-figures/references/medical_illustration_sources.md +98 -0
  346. package/skills/make-figures/references/pipeline_concepts_medical_ai.md +240 -0
  347. package/skills/make-figures/references/reporting_guideline_figure_map.md +104 -0
  348. package/skills/make-figures/references/visual_abstract_templates/european_radiology.pptx +0 -0
  349. package/skills/make-figures/references/visual_abstract_templates/jacc_central_illustration.pptx +0 -0
  350. package/skills/make-figures/references/visual_abstract_templates/medsci_default.pptx +0 -0
  351. package/skills/make-figures/references/visual_abstract_templates/template_guide.md +114 -0
  352. package/skills/make-figures/scripts/build_jacc_template.py +77 -0
  353. package/skills/make-figures/scripts/build_prisma2020_template.py +371 -0
  354. package/skills/make-figures/scripts/build_strobe_template.py +351 -0
  355. package/skills/make-figures/scripts/critic_figure.py +264 -0
  356. package/skills/make-figures/scripts/derive_figure_legend_counts.py +138 -0
  357. package/skills/make-figures/scripts/extract_exemplar_from_pdf.py +186 -0
  358. package/skills/make-figures/scripts/fetch_official_templates.sh +88 -0
  359. package/skills/make-figures/scripts/fill_prisma_template.py +142 -0
  360. package/skills/make-figures/scripts/generate_flow_diagram.R +133 -0
  361. package/skills/make-figures/scripts/generate_image.py +99 -0
  362. package/skills/make-figures/scripts/generate_visual_abstract.py +438 -0
  363. package/skills/make-figures/scripts/validate_pptx_mac_compat.py +233 -0
  364. package/skills/make-figures/skill.yml +52 -0
  365. package/skills/make-figures/templates/official/NOTES.md +62 -0
  366. package/skills/make-figures/templates/official/consort2010/CONSORT_2025_editable_checklist.docx +0 -0
  367. package/skills/make-figures/templates/official/consort2010/CONSORT_2025_flow_diagram.docx +0 -0
  368. package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v1.pptx +0 -0
  369. package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v2.pptx +0 -0
  370. package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_updated_v2.pptx +0 -0
  371. package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_editable_checklist.docx +0 -0
  372. package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_participant_timeline.docx +0 -0
  373. package/skills/make-figures/templates/official/stard2015/STARD_2015_checklist.docx +0 -0
  374. package/skills/make-figures/templates/official/stard2015/STARD_2015_flow_diagram.pdf +0 -0
  375. package/skills/make-figures/tests/fixtures/figure1_flow.yaml +8 -0
  376. package/skills/make-figures/tests/fixtures/manuscript_ok.md +9 -0
  377. package/skills/make-figures/tests/fixtures/manuscript_stale.md +4 -0
  378. package/skills/make-figures/tests/test_legend_reconcile.sh +36 -0
  379. package/skills/manage-project/SKILL.md +358 -0
  380. package/skills/manage-project/references/pre_submission_checklist.md +53 -0
  381. package/skills/manage-project/references/project_state_template.json +37 -0
  382. package/skills/manage-project/references/scaffold_templates.md +118 -0
  383. package/skills/manage-project/references/status_output_format.md +44 -0
  384. package/skills/manage-project/references/timeline_example.md +20 -0
  385. package/skills/manage-project/skill.yml +36 -0
  386. package/skills/manage-project/templates/SSOT.yaml.template +41 -0
  387. package/skills/manage-refs/LICENSE.zotero-mcp +21 -0
  388. package/skills/manage-refs/NOTICE.md +29 -0
  389. package/skills/manage-refs/SKILL.md +289 -0
  390. package/skills/manage-refs/citation_styles/README.md +40 -0
  391. package/skills/manage-refs/citation_styles/american-journal-of-roentgenology.csl +211 -0
  392. package/skills/manage-refs/citation_styles/cardiovascular-and-interventional-radiology.csl +19 -0
  393. package/skills/manage-refs/citation_styles/european-radiology.csl +19 -0
  394. package/skills/manage-refs/citation_styles/journal-of-cachexia-sarcopenia-and-muscle.csl +150 -0
  395. package/skills/manage-refs/citation_styles/journal-of-korean-medical-science-strict.csl +533 -0
  396. package/skills/manage-refs/citation_styles/journal-of-korean-medical-science.csl +16 -0
  397. package/skills/manage-refs/citation_styles/korean-journal-of-radiology.csl +155 -0
  398. package/skills/manage-refs/citation_styles/nature.csl +189 -0
  399. package/skills/manage-refs/citation_styles/nlm-citation-sequence.csl +535 -0
  400. package/skills/manage-refs/citation_styles/radiology.csl +228 -0
  401. package/skills/manage-refs/citation_styles/springer-basic-brackets.csl +187 -0
  402. package/skills/manage-refs/citation_styles/springer-vancouver-brackets.csl +276 -0
  403. package/skills/manage-refs/citation_styles/vancouver-superscript.csl +536 -0
  404. package/skills/manage-refs/citation_styles/vancouver.csl +535 -0
  405. package/skills/manage-refs/references/REFERENCE_STYLE_SPECS.md +59 -0
  406. package/skills/manage-refs/references/check_xref_symptoms.md +35 -0
  407. package/skills/manage-refs/scripts/_vendor_citation_writer.py +600 -0
  408. package/skills/manage-refs/scripts/check_citation_keys.py +112 -0
  409. package/skills/manage-refs/scripts/check_csl_render.py +102 -0
  410. package/skills/manage-refs/scripts/check_xref.py +633 -0
  411. package/skills/manage-refs/scripts/fill_journal_abbrev.py +104 -0
  412. package/skills/manage-refs/scripts/inject_zotero_cwyw.py +133 -0
  413. package/skills/manage-refs/scripts/md_marker_convert.py +193 -0
  414. package/skills/manage-refs/scripts/pre_submission_gate.sh +238 -0
  415. package/skills/manage-refs/scripts/render_pandoc.sh +88 -0
  416. package/skills/manage-refs/skill.yml +70 -0
  417. package/skills/manage-refs/tests/fixtures/pre_submission_gate/README.md +32 -0
  418. package/skills/manage-refs/tests/fixtures/pre_submission_gate/manuscript.md +10 -0
  419. package/skills/manage-refs/tests/fixtures/pre_submission_gate/refs.bib +34 -0
  420. package/skills/manage-refs/tests/fixtures/pre_submission_gate/run.sh +117 -0
  421. package/skills/manage-refs/tests/test_vN_docx_check.sh +145 -0
  422. package/skills/meta-analysis/SKILL.md +739 -0
  423. package/skills/meta-analysis/references/LICENSES.md +21 -0
  424. package/skills/meta-analysis/references/PROSPERO_template.md +221 -0
  425. package/skills/meta-analysis/references/ai_pre_screening_template.py +245 -0
  426. package/skills/meta-analysis/references/checklists/JBI_Case_Series.md +45 -0
  427. package/skills/meta-analysis/references/checklists/NOS.md +88 -0
  428. package/skills/meta-analysis/references/checklists/PRISMA_DTA.md +36 -0
  429. package/skills/meta-analysis/references/checklists/PROBAST.md +75 -0
  430. package/skills/meta-analysis/references/checklists/QUADAS2.md +77 -0
  431. package/skills/meta-analysis/references/checklists/ROBINS_I.md +87 -0
  432. package/skills/meta-analysis/references/checklists/RoB2.md +79 -0
  433. package/skills/meta-analysis/references/data_integrity_checklist.md +57 -0
  434. package/skills/meta-analysis/references/icmje_coi_guide.md +181 -0
  435. package/skills/meta-analysis/references/phase10_recovery.md +136 -0
  436. package/skills/meta-analysis/references/phase4_km_composite.md +58 -0
  437. package/skills/meta-analysis/references/phase6_statistical_synthesis.md +148 -0
  438. package/skills/meta-analysis/references/phase9_circulation.md +84 -0
  439. package/skills/meta-analysis/references/post_submission_release_ops.md +41 -0
  440. package/skills/meta-analysis/references/r_templates.md +132 -0
  441. package/skills/meta-analysis/references/review_orchestration.md +40 -0
  442. package/skills/meta-analysis/references/submission_package_drift.md +71 -0
  443. package/skills/meta-analysis/scripts/check_pool_consistency.py +201 -0
  444. package/skills/meta-analysis/scripts/cohort_overlap_check.py +242 -0
  445. package/skills/meta-analysis/scripts/dta_extraction_qc.py +137 -0
  446. package/skills/meta-analysis/scripts/screening_reconcile.py +160 -0
  447. package/skills/meta-analysis/skill.yml +47 -0
  448. package/skills/meta-analysis/templates/FINAL_POOL_LOCK.yaml.template +70 -0
  449. package/skills/meta-analysis/templates/extraction_form_v2.md +129 -0
  450. package/skills/meta-analysis/templates/supplementary_8file_checklist.md +94 -0
  451. package/skills/meta-analysis/tests/test_pool_consistency.sh +123 -0
  452. package/skills/orchestrate/SKILL.md +501 -0
  453. package/skills/orchestrate/references/dialogue_nodes.md +196 -0
  454. package/skills/orchestrate/references/report_template.md +109 -0
  455. package/skills/orchestrate/references/report_template_ko.md +88 -0
  456. package/skills/orchestrate/skill.yml +44 -0
  457. package/skills/peer-review/SKILL.md +381 -0
  458. package/skills/peer-review/references/aczel_2021_reviewer2_patterns.md +88 -0
  459. package/skills/peer-review/references/domain-probes/ai_overclaiming.md +47 -0
  460. package/skills/peer-review/references/domain-probes/narrative_review.md +44 -0
  461. package/skills/peer-review/references/domain-probes/observational_confounding.md +48 -0
  462. package/skills/peer-review/references/domain-probes/radiomics.md +38 -0
  463. package/skills/peer-review/references/domain-probes/sr_ma.md +87 -0
  464. package/skills/peer-review/references/domain-probes/survival_prognostic.md +68 -0
  465. package/skills/peer-review/references/exemplar_reviews/README.md +43 -0
  466. package/skills/peer-review/references/exemplar_reviews/ai_overclaiming.md +47 -0
  467. package/skills/peer-review/references/exemplar_reviews/calibration_missing.md +44 -0
  468. package/skills/peer-review/references/exemplar_reviews/data_leakage.md +48 -0
  469. package/skills/peer-review/references/exemplar_reviews/reference_standard_validity.md +45 -0
  470. package/skills/peer-review/references/narrative_review_audit.md +67 -0
  471. package/skills/peer-review/references/reviewer_calibration/README.md +34 -0
  472. package/skills/peer-review/references/reviewer_calibration/compliance_floor.md +52 -0
  473. package/skills/peer-review/references/reviewer_profiles/AJR.md +82 -0
  474. package/skills/peer-review/references/reviewer_profiles/EURE.md +64 -0
  475. package/skills/peer-review/references/reviewer_profiles/INSI.md +57 -0
  476. package/skills/peer-review/references/reviewer_profiles/KJR.md +100 -0
  477. package/skills/peer-review/references/reviewer_profiles/README.md +32 -0
  478. package/skills/peer-review/references/reviewer_profiles/RYAI.md +86 -0
  479. package/skills/peer-review/skill.yml +39 -0
  480. package/skills/present-paper/SKILL.md +675 -0
  481. package/skills/present-paper/references/critic_rubrics/slide.md +155 -0
  482. package/skills/present-paper/references/generate_pptx_templates.py +604 -0
  483. package/skills/present-paper/references/medical_presentation_templates.md +277 -0
  484. package/skills/present-paper/references/slide_design_principles.md +202 -0
  485. package/skills/present-paper/references/slide_visual_styles/nature_lancet.md +168 -0
  486. package/skills/present-paper/references/workflow-checklist.md +109 -0
  487. package/skills/present-paper/scripts/extract_pdf_figures.py +243 -0
  488. package/skills/present-paper/scripts/inject_pronunciation_notes.py +178 -0
  489. package/skills/present-paper/scripts/inject_speaker_notes.py +133 -0
  490. package/skills/present-paper/scripts/strip_notes_for_sharing.py +140 -0
  491. package/skills/present-paper/scripts/trim_caption.py +271 -0
  492. package/skills/present-paper/skill.yml +41 -0
  493. package/skills/present-paper/templates/build_pptx_nature_lancet.py +688 -0
  494. package/skills/publish-skill/SKILL.md +370 -0
  495. package/skills/publish-skill/references/license-compatibility-matrix.md +132 -0
  496. package/skills/publish-skill/references/pii-patterns.md +130 -0
  497. package/skills/publish-skill/scripts/audit_skill.sh +278 -0
  498. package/skills/publish-skill/skill.yml +35 -0
  499. package/skills/render-pdf-doc/SKILL.md +146 -0
  500. package/skills/render-pdf-doc/references/known_pitfalls.md +53 -0
  501. package/skills/render-pdf-doc/references/pandoc_korean_cheatsheet.md +77 -0
  502. package/skills/render-pdf-doc/scripts/check_deps.sh +42 -0
  503. package/skills/render-pdf-doc/scripts/infer_colwidths.py +164 -0
  504. package/skills/render-pdf-doc/scripts/render_pdf.sh +98 -0
  505. package/skills/render-pdf-doc/skill.yml +57 -0
  506. package/skills/render-pdf-doc/templates/anchor-doc.md +27 -0
  507. package/skills/render-pdf-doc/templates/anchor-doc_ko.md +25 -0
  508. package/skills/render-pdf-doc/templates/briefing-handout.md +33 -0
  509. package/skills/render-pdf-doc/templates/briefing-handout_ko.md +31 -0
  510. package/skills/render-pdf-doc/templates/proposal-cover.md +33 -0
  511. package/skills/render-pdf-doc/templates/proposal-cover_ko.md +31 -0
  512. package/skills/render-pdf-doc/templates/reference-table.md +22 -0
  513. package/skills/render-pdf-doc/templates/reference-table_ko.md +20 -0
  514. package/skills/replicate-study/SKILL.md +150 -0
  515. package/skills/replicate-study/references/harmonization_3country.csv +47 -0
  516. package/skills/replicate-study/references/harmonization_knhanes_nhanes.csv +68 -0
  517. package/skills/replicate-study/references/methodology_extraction_template.md +134 -0
  518. package/skills/replicate-study/skill.yml +37 -0
  519. package/skills/review-paper/SKILL.md +104 -0
  520. package/skills/review-paper/references/macro_skeleton.md +6 -0
  521. package/skills/review-paper/skill.yml +25 -0
  522. package/skills/revise/SKILL.md +515 -0
  523. package/skills/revise/references/r2r_voice.md +346 -0
  524. package/skills/revise/skill.yml +43 -0
  525. package/skills/search-lit/SKILL.md +443 -0
  526. package/skills/search-lit/references/parse_pubmed.py +326 -0
  527. package/skills/search-lit/references/pubmed_eutils.sh +111 -0
  528. package/skills/search-lit/skill.yml +46 -0
  529. package/skills/self-review/SKILL.md +1045 -0
  530. package/skills/self-review/references/domain-probes/ai_overclaiming.md +47 -0
  531. package/skills/self-review/references/domain-probes/narrative_review.md +44 -0
  532. package/skills/self-review/references/domain-probes/observational_confounding.md +48 -0
  533. package/skills/self-review/references/domain-probes/radiomics.md +38 -0
  534. package/skills/self-review/references/domain-probes/sr_ma.md +87 -0
  535. package/skills/self-review/references/domain-probes/survival_prognostic.md +68 -0
  536. package/skills/self-review/references/exemplar_findings/README.md +43 -0
  537. package/skills/self-review/references/exemplar_findings/cohort_arithmetic_mismatch.md +35 -0
  538. package/skills/self-review/references/exemplar_findings/estimand_drift_posthoc_primary.md +39 -0
  539. package/skills/self-review/references/exemplar_findings/scope_overreach_cross_sectional.md +35 -0
  540. package/skills/self-review/references/exemplar_findings/unadjusted_confounder.md +36 -0
  541. package/skills/self-review/references/panel_review_template.md +177 -0
  542. package/skills/self-review/scripts/check_artifact_coverage.py +301 -0
  543. package/skills/self-review/scripts/check_claim_artifact.py +248 -0
  544. package/skills/self-review/scripts/check_classical_style.py +185 -0
  545. package/skills/self-review/scripts/check_cohort_arithmetic.py +481 -0
  546. package/skills/self-review/scripts/check_confounding_completeness.py +287 -0
  547. package/skills/self-review/scripts/check_panel_diversity.py +336 -0
  548. package/skills/self-review/scripts/check_reference_adequacy.py +392 -0
  549. package/skills/self-review/scripts/check_reviewer_team_consistency.py +412 -0
  550. package/skills/self-review/scripts/check_scope_coherence.py +177 -0
  551. package/skills/self-review/skill.yml +47 -0
  552. package/skills/self-review/tests/fixtures/claim_manuscript.md +17 -0
  553. package/skills/self-review/tests/fixtures/claim_prereg.md +6 -0
  554. package/skills/self-review/tests/fixtures/cohort_bad.md +21 -0
  555. package/skills/self-review/tests/fixtures/cohort_clean.md +21 -0
  556. package/skills/self-review/tests/fixtures/cohort_partition.csv +5 -0
  557. package/skills/self-review/tests/fixtures/coverage_analysis/31_delong_nested_added_value.csv +3 -0
  558. package/skills/self-review/tests/fixtures/coverage_analysis/table1_demographics.csv +3 -0
  559. package/skills/self-review/tests/fixtures/coverage_clean.md +13 -0
  560. package/skills/self-review/tests/fixtures/coverage_manuscript.md +11 -0
  561. package/skills/self-review/tests/fixtures/panel_collapse.json +27 -0
  562. package/skills/self-review/tests/fixtures/panel_good.json +32 -0
  563. package/skills/self-review/tests/fixtures/panel_monoculture.json +32 -0
  564. package/skills/self-review/tests/fixtures/refadeq_letter.md +13 -0
  565. package/skills/self-review/tests/fixtures/refadeq_original_fixed.md +42 -0
  566. package/skills/self-review/tests/fixtures/refadeq_original_uncited.md +40 -0
  567. package/skills/self-review/tests/fixtures/scope_bad.md +9 -0
  568. package/skills/self-review/tests/fixtures/scope_clean.md +8 -0
  569. package/skills/self-review/tests/fixtures/scope_surrogate.md +8 -0
  570. package/skills/self-review/tests/fixtures/style_bad.md +13 -0
  571. package/skills/self-review/tests/fixtures/style_clean.md +11 -0
  572. package/skills/self-review/tests/fixtures/table1_by_exposure.csv +11 -0
  573. package/skills/self-review/tests/test_artifact_coverage.sh +44 -0
  574. package/skills/self-review/tests/test_claim_artifact.sh +50 -0
  575. package/skills/self-review/tests/test_classical_style.sh +44 -0
  576. package/skills/self-review/tests/test_cohort_arithmetic.sh +49 -0
  577. package/skills/self-review/tests/test_confounding_completeness.sh +66 -0
  578. package/skills/self-review/tests/test_panel_diversity.sh +55 -0
  579. package/skills/self-review/tests/test_panel_mode.sh +69 -0
  580. package/skills/self-review/tests/test_reference_adequacy.sh +68 -0
  581. package/skills/self-review/tests/test_reviewer_team_consistency.sh +138 -0
  582. package/skills/self-review/tests/test_scope_coherence.sh +46 -0
  583. package/skills/setup-medsci/SKILL.md +110 -0
  584. package/skills/setup-medsci/references/setup-checklist.md +51 -0
  585. package/skills/setup-medsci/skill.yml +30 -0
  586. package/skills/sync-submission/SKILL.md +382 -0
  587. package/skills/sync-submission/scripts/author_registry_example.yaml +36 -0
  588. package/skills/sync-submission/scripts/blind_sweep.py +203 -0
  589. package/skills/sync-submission/scripts/check_asset_anonymization.py +300 -0
  590. package/skills/sync-submission/scripts/check_cross_artifact_stale.py +211 -0
  591. package/skills/sync-submission/scripts/cover_letter_drift_check.py +451 -0
  592. package/skills/sync-submission/scripts/cross_document_n_check.py +486 -0
  593. package/skills/sync-submission/scripts/detect_copy_divergence.py +136 -0
  594. package/skills/sync-submission/scripts/preflight_gate.py +458 -0
  595. package/skills/sync-submission/scripts/scope_drift_check.py +362 -0
  596. package/skills/sync-submission/scripts/sync_submission.py +169 -0
  597. package/skills/sync-submission/skill.yml +43 -0
  598. package/skills/sync-submission/tests/fixtures/copy_ok.md +5 -0
  599. package/skills/sync-submission/tests/fixtures/copy_stale.md +5 -0
  600. package/skills/sync-submission/tests/fixtures/ssot.md +5 -0
  601. package/skills/sync-submission/tests/test_asset_anonymization.sh +99 -0
  602. package/skills/sync-submission/tests/test_copy_divergence.sh +44 -0
  603. package/skills/sync-submission/tests/test_cross_artifact_stale.sh +80 -0
  604. package/skills/sync-submission/tests/test_cross_document_n.sh +132 -0
  605. package/skills/sync-submission/tests/test_preflight_gate.sh +112 -0
  606. package/skills/sync-submission/tests/test_scope_drift.sh +122 -0
  607. package/skills/sync-submission/tests/test_vN_docx_assertion.sh +51 -0
  608. package/skills/verify-refs/SKILL.md +177 -0
  609. package/skills/verify-refs/references/manual_checkpoint_guide.md +100 -0
  610. package/skills/verify-refs/scripts/verify_cli.sh +62 -0
  611. package/skills/verify-refs/scripts/verify_refs.py +782 -0
  612. package/skills/verify-refs/skill.yml +44 -0
  613. package/skills/verify-refs/tests/fixtures/pagination_placeholder.bib +17 -0
  614. package/skills/verify-refs/tests/test_pagination_placeholder.sh +42 -0
  615. package/skills/version-dataset/SKILL.md +143 -0
  616. package/skills/version-dataset/references/manifest_schema.md +72 -0
  617. package/skills/version-dataset/scripts/version_dataset.py +242 -0
  618. package/skills/version-dataset/skill.yml +35 -0
  619. package/skills/version-dataset/tests/test_version_dataset.sh +52 -0
  620. package/skills/write-paper/SKILL.md +1148 -0
  621. package/skills/write-paper/references/exemplar_methods/README.md +38 -0
  622. package/skills/write-paper/references/exemplar_methods/ai_validation_tripod_claim.md +47 -0
  623. package/skills/write-paper/references/exemplar_methods/diagnostic_accuracy_stard.md +50 -0
  624. package/skills/write-paper/references/exemplar_methods/observational_cohort_strobe.md +43 -0
  625. package/skills/write-paper/references/journal_profiles/AJNR.md +185 -0
  626. package/skills/write-paper/references/journal_profiles/AJR.md +149 -0
  627. package/skills/write-paper/references/journal_profiles/Abdominal_Radiology.md +139 -0
  628. package/skills/write-paper/references/journal_profiles/Academic_Radiology.md +90 -0
  629. package/skills/write-paper/references/journal_profiles/Annals_of_Internal_Medicine.md +150 -0
  630. package/skills/write-paper/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +82 -0
  631. package/skills/write-paper/references/journal_profiles/British_Journal_of_Radiology.md +161 -0
  632. package/skills/write-paper/references/journal_profiles/CVIR.md +157 -0
  633. package/skills/write-paper/references/journal_profiles/Chest.md +270 -0
  634. package/skills/write-paper/references/journal_profiles/Clinical_Radiology.md +160 -0
  635. package/skills/write-paper/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +147 -0
  636. package/skills/write-paper/references/journal_profiles/Diabetes_Metabolism_Journal.md +163 -0
  637. package/skills/write-paper/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +216 -0
  638. package/skills/write-paper/references/journal_profiles/Endocrinology_and_Metabolism.md +167 -0
  639. package/skills/write-paper/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +192 -0
  640. package/skills/write-paper/references/journal_profiles/European_Radiology.md +159 -0
  641. package/skills/write-paper/references/journal_profiles/Hepatology_Communications.md +110 -0
  642. package/skills/write-paper/references/journal_profiles/Hepatology_International.md +106 -0
  643. package/skills/write-paper/references/journal_profiles/IEEE_TMI.md +180 -0
  644. package/skills/write-paper/references/journal_profiles/INSI.md +163 -0
  645. package/skills/write-paper/references/journal_profiles/Investigative_Radiology.md +86 -0
  646. package/skills/write-paper/references/journal_profiles/JACC_Advances.md +197 -0
  647. package/skills/write-paper/references/journal_profiles/JACC_Asia.md +168 -0
  648. package/skills/write-paper/references/journal_profiles/JACR.md +87 -0
  649. package/skills/write-paper/references/journal_profiles/JAMA.md +188 -0
  650. package/skills/write-paper/references/journal_profiles/JAMA_Network_Open.md +170 -0
  651. package/skills/write-paper/references/journal_profiles/JCSM.md +266 -0
  652. package/skills/write-paper/references/journal_profiles/JKMS.md +201 -0
  653. package/skills/write-paper/references/journal_profiles/JMIR.md +88 -0
  654. package/skills/write-paper/references/journal_profiles/JMIR_Medical_Education.md +86 -0
  655. package/skills/write-paper/references/journal_profiles/JNIS.md +227 -0
  656. package/skills/write-paper/references/journal_profiles/JVIR.md +158 -0
  657. package/skills/write-paper/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +191 -0
  658. package/skills/write-paper/references/journal_profiles/Journal_of_Stroke.md +176 -0
  659. package/skills/write-paper/references/journal_profiles/KJR.md +185 -0
  660. package/skills/write-paper/references/journal_profiles/Korean_Circulation_Journal.md +184 -0
  661. package/skills/write-paper/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +178 -0
  662. package/skills/write-paper/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +127 -0
  663. package/skills/write-paper/references/journal_profiles/Liver_International.md +165 -0
  664. package/skills/write-paper/references/journal_profiles/Medical_Image_Analysis.md +147 -0
  665. package/skills/write-paper/references/journal_profiles/NEJM.md +147 -0
  666. package/skills/write-paper/references/journal_profiles/Nature_Medicine.md +181 -0
  667. package/skills/write-paper/references/journal_profiles/Neuroradiology.md +151 -0
  668. package/skills/write-paper/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +184 -0
  669. package/skills/write-paper/references/journal_profiles/PLOS_Medicine.md +166 -0
  670. package/skills/write-paper/references/journal_profiles/RYAI.md +124 -0
  671. package/skills/write-paper/references/journal_profiles/Radiology.md +173 -0
  672. package/skills/write-paper/references/journal_profiles/Skeletal_Radiology.md +135 -0
  673. package/skills/write-paper/references/journal_profiles/Stroke.md +210 -0
  674. package/skills/write-paper/references/journal_profiles/The_BMJ.md +121 -0
  675. package/skills/write-paper/references/journal_profiles/The_Lancet.md +112 -0
  676. package/skills/write-paper/references/journal_profiles/The_Lancet_Digital_Health.md +104 -0
  677. package/skills/write-paper/references/journal_profiles/World_Journal_of_Hepatology.md +106 -0
  678. package/skills/write-paper/references/journal_profiles/npj_Digital_Medicine.md +93 -0
  679. package/skills/write-paper/references/paper_types/ai_validation.md +270 -0
  680. package/skills/write-paper/references/paper_types/animal_study.md +194 -0
  681. package/skills/write-paper/references/paper_types/case_report.md +237 -0
  682. package/skills/write-paper/references/paper_types/cross_national.md +328 -0
  683. package/skills/write-paper/references/paper_types/letter.md +127 -0
  684. package/skills/write-paper/references/paper_types/meta_analysis.md +181 -0
  685. package/skills/write-paper/references/paper_types/nhis_cohort.md +297 -0
  686. package/skills/write-paper/references/paper_types/original_article.md +221 -0
  687. package/skills/write-paper/references/paper_types/technical_note.md +131 -0
  688. package/skills/write-paper/references/section_guides/discussion.md +155 -0
  689. package/skills/write-paper/references/section_guides/introduction.md +108 -0
  690. package/skills/write-paper/references/section_guides/methods.md +144 -0
  691. package/skills/write-paper/references/section_guides/results.md +113 -0
  692. package/skills/write-paper/references/section_guides/step7_1_classical_qc.md +67 -0
  693. package/skills/write-paper/references/section_guides/step7_4a_audit_recovery.md +74 -0
  694. package/skills/write-paper/references/section_guides/title_abstract.md +123 -0
  695. package/skills/write-paper/references/section_templates/methods_statistical.md +147 -0
  696. package/skills/write-paper/scripts/check_placeholders.py +182 -0
  697. package/skills/write-paper/skill.yml +48 -0
  698. package/skills/write-paper/tests/test_placeholders.sh +107 -0
  699. package/skills/write-protocol/SKILL.md +243 -0
  700. package/skills/write-protocol/references/ethics_checklist.md +150 -0
  701. package/skills/write-protocol/references/protocol_template.md +304 -0
  702. package/skills/write-protocol/skill.yml +34 -0
@@ -0,0 +1,155 @@
1
+ ---
2
+ name: generate-codebook
3
+ description: Generate a citable data dictionary / codebook from a tabular dataset (CSV/TSV/Excel/Parquet/Stata/SAS). Profiles every variable — role, type, units placeholder, level frequencies, range/quantiles, missingness — and emits codebook.md + codebook.json. Flags coded variables whose level meanings are unknown as [NEEDS DICTIONARY] rather than guessing them, feeding /define-variables and the dictionary-first workflow.
4
+ triggers: generate codebook, data dictionary, codebook, profile variables, variable dictionary, describe dataset, what variables, column dictionary, build codebook
5
+ tools: Read, Write, Edit, Bash, Grep, Glob
6
+ model: inherit
7
+ ---
8
+
9
+ # Generate Codebook Skill
10
+
11
+ You help a medical researcher turn a raw tabular dataset into a structured,
12
+ **citable** data dictionary (codebook). This is the *generator* side of the
13
+ dictionary-first workflow: it produces the artifact that `/define-variables` and
14
+ dictionary-first QC later consume. You generate code and review output — you do
15
+ **not** invent the meaning of coded values.
16
+
17
+ ## Communication Rules
18
+
19
+ - Communicate with the user in their preferred language.
20
+ - Variable names, codebook fields, and report output are in English.
21
+ - Medical terminology is always in English.
22
+
23
+ ## Philosophy
24
+
25
+ A codebook describes *what is in the data*, not *what the codes mean*. Column
26
+ distributions, types, and missingness are observable and safe to profile. The
27
+ **meaning** of a coded value (`fatty_liver_grade = 0`) is NOT observable from the
28
+ data — it lives in the authoritative data dictionary. This skill profiles the
29
+ former deterministically and explicitly flags the latter as `[NEEDS DICTIONARY]`
30
+ so a human fills it from the source. This is the generator counterpart to the
31
+ dictionary-first rule that `/define-variables` enforces on consumption.
32
+
33
+ ## Reference Files
34
+
35
+ - **Schema + role rules**: `${CLAUDE_SKILL_DIR}/references/codebook_schema.md` — the
36
+ codebook.json schema, the role-inference heuristics, and how the output threads
37
+ into `/define-variables` and dictionary-first QC. Read this before interpreting output.
38
+
39
+ ## Deterministic Script
40
+
41
+ Run the bundled profiler rather than describing columns from memory:
42
+
43
+ ```bash
44
+ python "${CLAUDE_SKILL_DIR}/scripts/generate_codebook.py" data.csv --out-dir .
45
+ ```
46
+
47
+ Supports `.csv/.tsv/.xlsx/.parquet/.dta/.sas7bdat`. Flags: `--max-levels N`
48
+ (categorical cutoff, default 20), `--json-only`, `--md-only`. The script is
49
+ pandas-only, runs locally, and never sends data anywhere.
50
+
51
+ ## Workflow
52
+
53
+ ### Step 1: Profile (deterministic)
54
+
55
+ Run `generate_codebook.py` on the dataset. It writes `codebook.json` (machine-
56
+ readable) and `codebook.md` (review table), reporting per variable: role
57
+ (id / continuous / categorical / binary / date / text), dtype, missingness,
58
+ unique count, level frequencies or quantile summary, and a `needs_dictionary` flag.
59
+
60
+ ### Step 2: Review with the researcher (gate)
61
+
62
+ Present `codebook.md` and walk the user through it. **Gate:** the user confirms
63
+ the inferred roles (e.g., an integer-coded scale mis-read as continuous, or an id
64
+ column). Do not proceed to definition work until the user approves the role
65
+ assignments.
66
+
67
+ ### Step 3: Resolve [NEEDS DICTIONARY] items (gate)
68
+
69
+ For every variable flagged `needs_dictionary: true`, the level codes are
70
+ uninterpretable without the authoritative source. **Gate:** ask the user to
71
+ supply the meaning of each code from the real data dictionary (file/sheet/row),
72
+ or to confirm none exists. Fill `label`, `units`, and per-level meanings into the
73
+ codebook **only** from that source — never from inference. If the user cannot
74
+ supply it, leave the `[NEEDS DICTIONARY]` marker in place; do not erase it.
75
+
76
+ ### Step 4: Hand off
77
+
78
+ The completed `codebook.json` becomes the input dictionary for `/define-variables`
79
+ (operationalization) and the citation source for dictionary-first QC. **Gate:**
80
+ confirm with the user that no `needs_dictionary` flags remain unresolved before
81
+ the codebook is treated as authoritative for downstream analysis.
82
+
83
+ ## Scope Limitations
84
+
85
+ ### Supported
86
+ - Tabular files: CSV, TSV, Excel, Parquet, Stata (`.dta`), SAS (`.sas7bdat`).
87
+ - Per-variable profiling, role inference, missingness, level/range summaries.
88
+
89
+ ### NOT Supported
90
+ - Inventing or guessing the meaning of coded values (that is `[NEEDS DICTIONARY]`).
91
+ - Cleaning or transforming data — use `/clean-data`.
92
+ - De-identification — use `/deidentify` before sharing.
93
+ - Operationalizing exposure/outcome definitions — use `/define-variables` (this skill feeds it).
94
+
95
+ ## Cross-Skill Integration
96
+
97
+ - **/define-variables** consumes `codebook.json` as its data dictionary input.
98
+ - **/clean-data** profiles + cleans; this skill produces a durable dictionary artifact instead.
99
+ - **/deidentify** should run on the raw data before a codebook is shared externally.
100
+
101
+ ## Output Format
102
+
103
+ `codebook.json` (schema in references) and `codebook.md` (review table with a
104
+ "Columns requiring dictionary lookup" section). Summarize the counts
105
+ (rows, columns, `needs_dictionary_count`) in chat; do not paste the full JSON.
106
+
107
+ ## Worked Example
108
+
109
+ Input `cohort.csv`:
110
+
111
+ ```text
112
+ patient_id,age,sex,fatty_liver_grade,smoking_status,visit_date
113
+ 1001,54,1,0,never,2023-01-15
114
+ 1002,61,2,2,former,2023-02-03
115
+ ```
116
+
117
+ Run:
118
+
119
+ ```bash
120
+ python "${CLAUDE_SKILL_DIR}/scripts/generate_codebook.py" cohort.csv --out-dir .
121
+ # -> {"n_rows": ..., "n_columns": 6, "needs_dictionary_count": 2, "outputs": [...]}
122
+ ```
123
+
124
+ `codebook.md` (excerpt):
125
+
126
+ ```text
127
+ | Variable | Role | Missing % | Unique | Needs dictionary |
128
+ | `patient_id` | id | 0.0 | N | |
129
+ | `age` | continuous | 0.0 | ... | |
130
+ | `sex` | binary | 0.0 | 2 | ⚠️ YES |
131
+ | `fatty_liver_grade` | categorical | 0.0 | 5 | ⚠️ YES |
132
+ | `smoking_status` | categorical | 0.0 | 3 | |
133
+ | `visit_date` | date | 0.0 | ... | |
134
+ ```
135
+
136
+ `sex` and `fatty_liver_grade` are flagged because their levels are bare codes
137
+ (`1/2`, `0..4`). `smoking_status` is **not** flagged — its levels are already
138
+ human-readable. The reviewer then:
139
+
140
+ 1. Opens the project's authoritative data dictionary.
141
+ 2. Fills `sex`: `1 = male, 2 = female` and `fatty_liver_grade`: `0 = none … 4 = suspected`
142
+ into the codebook **from that source** (citing file > sheet > row).
143
+ 3. Confirms no `[NEEDS DICTIONARY]` flags remain, then hands `codebook.json` to
144
+ `/define-variables`.
145
+
146
+ What the skill must **never** do: write `sex: 1 = male` because "that is the
147
+ usual coding." If the dictionary is unavailable, the flag stays.
148
+
149
+ ## Anti-Hallucination
150
+
151
+ - Never invent a variable's label, units, or the meaning of any coded level.
152
+ - Coded categorical/binary columns with bare codes are flagged `[NEEDS DICTIONARY]`;
153
+ the meaning is filled only from the authoritative data dictionary, then cited.
154
+ - Role inference is a heuristic — surface it for user confirmation, do not assert it as ground truth.
155
+ - The profiler reads values locally; no data is sent to any model or network.
@@ -0,0 +1,76 @@
1
+ # Codebook Schema & Role Inference
2
+
3
+ `generate_codebook.py` emits `codebook.json` (machine-readable) and `codebook.md`
4
+ (human review). This file documents the JSON schema, the role-inference rules,
5
+ and how the output threads into downstream skills.
6
+
7
+ ## codebook.json schema (schema_version 1)
8
+
9
+ ```jsonc
10
+ {
11
+ "schema_version": 1,
12
+ "source": "path/to/data.csv",
13
+ "n_rows": 200,
14
+ "n_columns": 9,
15
+ "needs_dictionary_count": 2,
16
+ "columns": [
17
+ {
18
+ "name": "fatty_liver_grade",
19
+ "role": "categorical", // id | continuous | categorical | binary | date | text
20
+ "dtype": "int64",
21
+ "n": 200,
22
+ "n_missing": 0,
23
+ "pct_missing": 0.0,
24
+ "n_unique": 5,
25
+ "label": null, // filled by researcher from the authoritative dictionary
26
+ "units": null, // filled by researcher
27
+ "needs_dictionary": true, // true => level meanings are unknown; do NOT guess
28
+ "notes": ["[NEEDS DICTIONARY] level codes are uninterpretable ..."],
29
+ "levels": [{"value": 0, "count": 41}, {"value": 1, "count": 39}], // categorical/binary
30
+ "stats": {"min": 0, "q1": 1, "median": 2, "q3": 3, "max": 4}, // continuous/date
31
+ "examples": ["0", "2", "1"]
32
+ }
33
+ ]
34
+ }
35
+ ```
36
+
37
+ `label`, `units`, and per-level meanings are intentionally left `null` / unlabelled.
38
+ They are filled **only** from the authoritative data dictionary, never inferred.
39
+
40
+ ## Role inference (heuristic — confirm with the user)
41
+
42
+ Decided in this order; dtype and column name dominate so that continuous
43
+ measurements are never misread as identifiers on small datasets:
44
+
45
+ 1. **date** — datetime dtype, or >80% of a sample parses as dates.
46
+ 2. **binary** — exactly 2 distinct non-null values.
47
+ 3. **numeric dtype:**
48
+ - **id** — integer-valued, id-like name (`*_id`, `uid`, `mrn`, `subject`, `patient`, `record`, `accession`), and unique count > `--max-levels`.
49
+ - **categorical** — integer-valued and unique count ≤ `--max-levels` (coded scale).
50
+ - **continuous** — otherwise (floats, or many distinct integers).
51
+ 4. **object/string:**
52
+ - **id** — id-like name with high cardinality, or all-unique on ≥50 rows.
53
+ - **categorical** — unique count ≤ `--max-levels`.
54
+ - **text** — otherwise.
55
+
56
+ `--max-levels` (default 20) is the categorical cutoff. Raise it for scales with
57
+ many levels; lower it to force more columns to `continuous`/`text`.
58
+
59
+ ## needs_dictionary flag
60
+
61
+ A categorical/binary column is flagged `needs_dictionary: true` when its levels
62
+ are **bare codes** — integers, or short tokens like `Y`/`N`/`M`/`U`/`NA` — i.e.,
63
+ uninterpretable without the source dictionary. A column whose levels are already
64
+ human-readable (`never` / `former` / `current`) is **not** flagged. The flag is a
65
+ prompt for the researcher to fill meanings from the authoritative dictionary; it
66
+ is never resolved by guessing.
67
+
68
+ ## Downstream integration
69
+
70
+ - **/define-variables** takes `codebook.json` as its data-dictionary input and
71
+ operationalizes exposure/outcome/covariate definitions on top of it. Unresolved
72
+ `needs_dictionary` flags should be cleared first.
73
+ - **dictionary-first QC** cites the codebook (file > variable) as the provenance
74
+ for a coded value's meaning. The codebook is the artifact that citation points at.
75
+ - **/clean-data** and **/deidentify** operate on the raw data; run `/deidentify`
76
+ before a codebook (which contains example values) is shared externally.
@@ -0,0 +1,278 @@
1
+ #!/usr/bin/env python3
2
+ """Generate a citable data dictionary / codebook from a tabular dataset.
3
+
4
+ Profiles every column of a dataset and emits two artifacts:
5
+ - codebook.json — machine-readable (consumed by /define-variables and
6
+ dictionary-first QC)
7
+ - codebook.md — human-readable table for review/sharing
8
+
9
+ Hard anti-hallucination rule: the meaning of coded values is NEVER invented. A
10
+ categorical/binary column whose levels are bare codes (0/1/2, "Y"/"N", ...) is
11
+ flagged `needs_dictionary: true` with a `[NEEDS DICTIONARY]` note, so the
12
+ researcher fills the meaning from the authoritative data dictionary rather than
13
+ the model guessing. This is the generator side of the dictionary-first rule.
14
+
15
+ Profiling is deterministic and local — pandas only, no network, no LLM touches
16
+ the values. Optional engines (openpyxl/pyarrow/Stata) are used only if present.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import json
23
+ import sys
24
+ from pathlib import Path
25
+
26
+ try:
27
+ import pandas as pd
28
+ except ImportError:
29
+ print("ERROR: pandas is required (pip install pandas).", file=sys.stderr)
30
+ sys.exit(2)
31
+
32
+
33
+ CATEGORICAL_MAX_LEVELS_DEFAULT = 20
34
+ TOP_LEVELS = 15
35
+ EXAMPLES = 3
36
+
37
+
38
+ def read_table(path: Path) -> "pd.DataFrame":
39
+ suf = path.suffix.lower()
40
+ if suf in (".csv", ".txt"):
41
+ return pd.read_csv(path)
42
+ if suf in (".tsv",):
43
+ return pd.read_csv(path, sep="\t")
44
+ if suf in (".xlsx", ".xls"):
45
+ return pd.read_excel(path) # needs openpyxl/xlrd
46
+ if suf in (".parquet", ".pq"):
47
+ return pd.read_parquet(path) # needs pyarrow/fastparquet
48
+ if suf in (".dta",):
49
+ return pd.read_stata(path)
50
+ if suf in (".sas7bdat",):
51
+ return pd.read_sas(path)
52
+ # default: try CSV
53
+ return pd.read_csv(path)
54
+
55
+
56
+ def _looks_like_date(series: "pd.Series") -> bool:
57
+ if pd.api.types.is_datetime64_any_dtype(series):
58
+ return True
59
+ s = series.dropna().astype(str).head(50)
60
+ if s.empty:
61
+ return False
62
+ parsed = pd.to_datetime(s, errors="coerce", format="mixed") if hasattr(pd, "__version__") else pd.to_datetime(s, errors="coerce")
63
+ return parsed.notna().mean() > 0.8
64
+
65
+
66
+ MIN_ROWS_FOR_UNIQUENESS_ID = 50 # below this, "all-unique" is not a reliable id signal
67
+
68
+
69
+ def _is_integer_valued(series: "pd.Series") -> bool:
70
+ if pd.api.types.is_integer_dtype(series):
71
+ return True
72
+ nn = series.dropna()
73
+ if nn.empty or not pd.api.types.is_numeric_dtype(series):
74
+ return False
75
+ try:
76
+ return bool((nn.astype(float) % 1 == 0).all())
77
+ except Exception:
78
+ return False
79
+
80
+
81
+ def infer_role(series: "pd.Series", n_rows: int, n_unique: int, max_levels: int, name: str) -> str:
82
+ """Dtype- and name-driven role inference.
83
+
84
+ Order matters: date and binary are decided before id, and id is conservative
85
+ (a float column or a small dataset's all-unique column is NOT an id — that
86
+ misclassifies continuous measurements as identifiers on small data).
87
+ """
88
+ name_l = name.lower()
89
+ id_name = name_l == "id" or any(
90
+ tok in name_l for tok in ("_id", "id_", "uid", "mrn", "subject", "patient", "record", "accession")
91
+ )
92
+
93
+ if _looks_like_date(series):
94
+ return "date"
95
+ if n_unique == 2:
96
+ return "binary"
97
+
98
+ if pd.api.types.is_numeric_dtype(series):
99
+ intlike = _is_integer_valued(series)
100
+ # id only for integer-valued, high-cardinality, id-named columns
101
+ if id_name and intlike and n_unique > max_levels:
102
+ return "id"
103
+ if intlike and n_unique <= max_levels:
104
+ return "categorical"
105
+ return "continuous"
106
+
107
+ # object / string
108
+ if id_name and n_unique > max_levels:
109
+ return "id"
110
+ if n_unique == n_rows and n_rows >= MIN_ROWS_FOR_UNIQUENESS_ID:
111
+ return "id"
112
+ if n_unique <= max_levels:
113
+ return "categorical"
114
+ return "text"
115
+
116
+
117
+ def _coded_levels(series: "pd.Series") -> bool:
118
+ """True when the categorical/binary levels are bare codes needing a dictionary."""
119
+ vals = series.dropna().unique().tolist()
120
+ if not vals:
121
+ return False
122
+ for v in vals:
123
+ if pd.api.types.is_number(v):
124
+ continue
125
+ s = str(v).strip()
126
+ # short tokens / pure codes look uninterpretable without a dictionary
127
+ if len(s) <= 3 or s.isdigit() or s.upper() in ("Y", "N", "T", "F", "M", "U", "NA"):
128
+ continue
129
+ return False # at least one human-readable label present
130
+ return True
131
+
132
+
133
+ def profile_column(df: "pd.DataFrame", col: str, n_rows: int, max_levels: int) -> dict:
134
+ s = df[col]
135
+ n_missing = int(s.isna().sum())
136
+ nonnull = s.dropna()
137
+ n_unique = int(nonnull.nunique())
138
+ role = infer_role(s, n_rows, n_unique, max_levels, col)
139
+
140
+ rec: dict = {
141
+ "name": col,
142
+ "role": role,
143
+ "dtype": str(s.dtype),
144
+ "n": int(n_rows),
145
+ "n_missing": n_missing,
146
+ "pct_missing": round(100.0 * n_missing / n_rows, 2) if n_rows else 0.0,
147
+ "n_unique": n_unique,
148
+ "label": None, # to be filled by researcher from the real dictionary
149
+ "units": None, # to be filled by researcher
150
+ "needs_dictionary": False,
151
+ "notes": [],
152
+ }
153
+
154
+ if role == "continuous":
155
+ desc = nonnull.astype(float)
156
+ if not desc.empty:
157
+ rec["stats"] = {
158
+ "min": float(desc.min()), "q1": float(desc.quantile(0.25)),
159
+ "median": float(desc.median()), "q3": float(desc.quantile(0.75)),
160
+ "max": float(desc.max()), "mean": round(float(desc.mean()), 4),
161
+ "sd": round(float(desc.std()), 4) if len(desc) > 1 else 0.0,
162
+ }
163
+ rec["notes"].append("[NEEDS DICTIONARY] confirm units and measurement method")
164
+ elif role in ("categorical", "binary"):
165
+ vc = nonnull.value_counts().head(TOP_LEVELS)
166
+ rec["levels"] = [{"value": (int(v) if pd.api.types.is_number(v) and float(v).is_integer() else
167
+ (float(v) if pd.api.types.is_number(v) else str(v))),
168
+ "count": int(c)} for v, c in vc.items()]
169
+ if _coded_levels(nonnull):
170
+ rec["needs_dictionary"] = True
171
+ rec["notes"].append("[NEEDS DICTIONARY] level codes are uninterpretable without the authoritative data dictionary — do not guess meanings")
172
+ elif role == "date":
173
+ try:
174
+ d = pd.to_datetime(nonnull, errors="coerce")
175
+ rec["stats"] = {"min": str(d.min()), "max": str(d.max())}
176
+ except Exception:
177
+ pass
178
+ rec["notes"].append("[NEEDS DICTIONARY] confirm whether this is event / measurement / enrollment date")
179
+ elif role == "id":
180
+ rec["notes"].append("identifier candidate (high/maximal cardinality) — exclude from analysis variables")
181
+
182
+ examples = nonnull.head(EXAMPLES).tolist()
183
+ rec["examples"] = [str(x) for x in examples]
184
+ return rec
185
+
186
+
187
+ def build_codebook(df: "pd.DataFrame", source: str, max_levels: int) -> dict:
188
+ n_rows = len(df)
189
+ cols = [profile_column(df, c, n_rows, max_levels) for c in df.columns]
190
+ return {
191
+ "schema_version": 1,
192
+ "source": source,
193
+ "n_rows": n_rows,
194
+ "n_columns": len(df.columns),
195
+ "needs_dictionary_count": sum(1 for c in cols if c["needs_dictionary"]),
196
+ "columns": cols,
197
+ }
198
+
199
+
200
+ def render_md(cb: dict) -> str:
201
+ lines = [
202
+ f"# Codebook — {Path(cb['source']).name}",
203
+ "",
204
+ f"- Rows: {cb['n_rows']}",
205
+ f"- Columns: {cb['n_columns']}",
206
+ f"- Columns needing a data dictionary: **{cb['needs_dictionary_count']}**",
207
+ "",
208
+ "> `[NEEDS DICTIONARY]` rows require the meaning to be filled from the "
209
+ "authoritative data dictionary. Meanings were **not** guessed.",
210
+ "",
211
+ "| Variable | Role | Dtype | Missing % | Unique | Summary | Needs dictionary |",
212
+ "|---|---|---|---|---|---|---|",
213
+ ]
214
+ for c in cb["columns"]:
215
+ if c["role"] == "continuous" and "stats" in c:
216
+ s = c["stats"]
217
+ summ = f"median {s['median']} [{s['q1']}–{s['q3']}], range {s['min']}–{s['max']}"
218
+ elif c["role"] in ("categorical", "binary") and c.get("levels"):
219
+ summ = ", ".join(f"{l['value']}={l['count']}" for l in c["levels"][:6])
220
+ if len(c["levels"]) > 6:
221
+ summ += ", …"
222
+ elif c["role"] == "date" and "stats" in c:
223
+ summ = f"{c['stats'].get('min','?')} → {c['stats'].get('max','?')}"
224
+ else:
225
+ summ = ", ".join(c.get("examples", [])[:3])
226
+ nd = "⚠️ YES" if c["needs_dictionary"] else ""
227
+ summ = summ.replace("|", "\\|")
228
+ lines.append(f"| `{c['name']}` | {c['role']} | {c['dtype']} | {c['pct_missing']} | {c['n_unique']} | {summ} | {nd} |")
229
+ nd_cols = [c["name"] for c in cb["columns"] if c["needs_dictionary"]]
230
+ if nd_cols:
231
+ lines += ["", "## Columns requiring dictionary lookup", ""]
232
+ for name in nd_cols:
233
+ lines.append(f"- `{name}` — fill level meanings + units from the authoritative data dictionary, then cite per the dictionary-first rule before use in /define-variables.")
234
+ lines.append("")
235
+ return "\n".join(lines)
236
+
237
+
238
+ def main() -> int:
239
+ ap = argparse.ArgumentParser(description="Generate a data dictionary / codebook from a tabular dataset.")
240
+ ap.add_argument("data", help="Path to .csv/.tsv/.xlsx/.parquet/.dta/.sas7bdat")
241
+ ap.add_argument("--out-dir", default=".", help="Output directory (default: cwd)")
242
+ ap.add_argument("--max-levels", type=int, default=CATEGORICAL_MAX_LEVELS_DEFAULT,
243
+ help=f"Max distinct values to treat a column as categorical (default {CATEGORICAL_MAX_LEVELS_DEFAULT})")
244
+ ap.add_argument("--json-only", action="store_true", help="Write only codebook.json")
245
+ ap.add_argument("--md-only", action="store_true", help="Write only codebook.md")
246
+ args = ap.parse_args()
247
+
248
+ data_path = Path(args.data)
249
+ if not data_path.exists():
250
+ print(f"ERROR: data file not found: {data_path}", file=sys.stderr)
251
+ return 2
252
+
253
+ try:
254
+ df = read_table(data_path)
255
+ except Exception as e:
256
+ print(f"ERROR: could not read {data_path}: {e}", file=sys.stderr)
257
+ return 2
258
+
259
+ cb = build_codebook(df, str(data_path), args.max_levels)
260
+ out = Path(args.out_dir)
261
+ out.mkdir(parents=True, exist_ok=True)
262
+
263
+ if not args.md_only:
264
+ (out / "codebook.json").write_text(json.dumps(cb, indent=2, ensure_ascii=False), encoding="utf-8")
265
+ if not args.json_only:
266
+ (out / "codebook.md").write_text(render_md(cb), encoding="utf-8")
267
+
268
+ print(json.dumps({
269
+ "n_rows": cb["n_rows"],
270
+ "n_columns": cb["n_columns"],
271
+ "needs_dictionary_count": cb["needs_dictionary_count"],
272
+ "outputs": [str(out / "codebook.json")] * (not args.md_only) + [str(out / "codebook.md")] * (not args.json_only),
273
+ }, indent=2))
274
+ return 0
275
+
276
+
277
+ if __name__ == "__main__":
278
+ sys.exit(main())
@@ -0,0 +1,35 @@
1
+ schema_version: 2
2
+ name: generate-codebook
3
+ layer: A
4
+ owner_domain: data_documentation
5
+
6
+ when_to_use: "Generate a data dictionary and codebook from a dataset."
7
+ when_NOT_to_use: "Cleaning the data (use clean-data); versioning the dataset (use version-dataset)."
8
+
9
+ inputs:
10
+ - "analysis dataset (CSV/Excel)"
11
+ outputs:
12
+ - "data dictionary"
13
+ - "codebook"
14
+ deterministic_scripts:
15
+ - scripts/generate_codebook.py
16
+ side_effects:
17
+ - writes_codebook_artifacts
18
+ downstream_consumers:
19
+ - define-variables
20
+ - version-dataset
21
+ forbidden_actions:
22
+ - fabricate_variable_descriptions
23
+ - infer_units_not_present_in_data
24
+
25
+ # v2.1 quality card
26
+ purpose: "Derive a structured codebook (variables, types, ranges, missingness) directly from a dataset."
27
+ safety_boundaries:
28
+ - "Descriptive statistics are computed from the data by the bundled script, not asserted."
29
+ - "Variable semantics not present in the data are left blank for the researcher, not invented."
30
+ known_limitations:
31
+ - "Computes structure and distributions; does not supply clinical meaning of variables."
32
+ - "Free-text/semantic descriptions require researcher input."
33
+ validation_commands:
34
+ - "python3 scripts/generate_codebook.py <dataset>"
35
+ evidence_surface: bundled_script
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env bash
2
+ # Regression tests for generate-codebook/scripts/generate_codebook.py.
3
+ # Self-contained: builds a synthetic dataset (no committed data) and asserts
4
+ # role inference, the needs_dictionary flag, and the no-hallucination invariant.
5
+
6
+ set -uo pipefail
7
+
8
+ REPO_ROOT="$(cd "$(dirname "$0")/../../.." && pwd)"
9
+ SCRIPT="$REPO_ROOT/skills/generate-codebook/scripts/generate_codebook.py"
10
+ TMP="$(mktemp -d -t codebook.XXXXXX)"
11
+ trap 'rm -rf "$TMP"' EXIT
12
+
13
+ [[ -f "$SCRIPT" ]] || { echo "ENV-ERR: script missing" >&2; exit 2; }
14
+ python3 -c "import pandas, numpy" 2>/dev/null || { echo "SKIP: pandas/numpy not installed"; exit 0; }
15
+
16
+ # Build a realistic synthetic dataset (seeded, no real data).
17
+ python3 - "$TMP" <<'PY'
18
+ import sys, numpy as np, pandas as pd
19
+ out = sys.argv[1]
20
+ rng = np.random.default_rng(42); n = 200
21
+ pd.DataFrame({
22
+ "patient_id": np.arange(10001, 10001+n),
23
+ "age": rng.integers(30, 85, n),
24
+ "sex": rng.integers(1, 3, n), # coded -> needs_dictionary
25
+ "fatty_liver_grade": rng.integers(0, 5, n), # coded -> needs_dictionary
26
+ "bmi": rng.normal(25, 3, n).round(1), # continuous
27
+ "visit_date": pd.to_datetime("2023-01-01") + pd.to_timedelta(rng.integers(0,365,n), unit="D"),
28
+ "smoking_status": rng.choice(["never","former","current"], n), # labelled -> NOT needs_dictionary
29
+ }).to_csv(f"{out}/data.csv", index=False)
30
+ PY
31
+
32
+ python3 "$SCRIPT" "$TMP/data.csv" --out-dir "$TMP/out" >/dev/null 2>&1
33
+
34
+ fail=0; ran=0
35
+ assert() {
36
+ local label="$1" cond="$2"
37
+ ran=$((ran+1))
38
+ if [[ "$cond" == "1" ]]; then printf ' PASS %s\n' "$label"
39
+ else printf ' FAIL %s\n' "$label"; fail=$((fail+1)); fi
40
+ }
41
+
42
+ # Outputs exist.
43
+ assert "codebook.json written" "$([[ -f "$TMP/out/codebook.json" ]] && echo 1 || echo 0)"
44
+ assert "codebook.md written" "$([[ -f "$TMP/out/codebook.md" ]] && echo 1 || echo 0)"
45
+
46
+ # Role inference + needs_dictionary + no-hallucination, asserted from JSON.
47
+ while IFS=$'\t' read -r status label; do
48
+ [[ -z "$label" ]] && continue
49
+ assert "$label" "$([[ "$status" == "PASS" ]] && echo 1 || echo 0)"
50
+ done < <(python3 - "$TMP/out/codebook.json" <<'PY'
51
+ import json, sys
52
+ cb = json.load(open(sys.argv[1]))
53
+ col = {c["name"]: c for c in cb["columns"]}
54
+ checks = {
55
+ "role: patient_id=id": col["patient_id"]["role"] == "id",
56
+ "role: age=continuous": col["age"]["role"] == "continuous",
57
+ "role: bmi=continuous": col["bmi"]["role"] == "continuous",
58
+ "role: sex=binary": col["sex"]["role"] == "binary",
59
+ "role: fatty_liver_grade=categorical": col["fatty_liver_grade"]["role"] == "categorical",
60
+ "role: visit_date=date": col["visit_date"]["role"] == "date",
61
+ "role: smoking_status=categorical": col["smoking_status"]["role"] == "categorical",
62
+ "needs_dict: sex flagged": col["sex"]["needs_dictionary"] is True,
63
+ "needs_dict: fatty_liver_grade flagged": col["fatty_liver_grade"]["needs_dictionary"] is True,
64
+ "needs_dict: smoking_status NOT flagged": col["smoking_status"]["needs_dictionary"] is False,
65
+ "needs_dict: bmi NOT flagged": col["bmi"]["needs_dictionary"] is False,
66
+ "no-hallucination: labels null": all(c["label"] is None for c in cb["columns"]),
67
+ "no-hallucination: units null": all(c["units"] is None for c in cb["columns"]),
68
+ "count: needs_dictionary_count==2": cb["needs_dictionary_count"] == 2,
69
+ }
70
+ for k, v in checks.items():
71
+ print(("PASS" if v else "FAIL") + "\t" + k)
72
+ PY
73
+ )
74
+
75
+ printf '\n%d/%d checks passed\n' "$((ran-fail))" "$ran"
76
+ [[ "$fail" -eq 0 ]] || exit 1