medsci-skills 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (702) hide show
  1. package/LICENSE +50 -0
  2. package/README.md +602 -0
  3. package/README_FIRST.md +27 -0
  4. package/bin/medsci-skills.js +159 -0
  5. package/installers/install-macos.command +19 -0
  6. package/installers/install-windows.cmd +26 -0
  7. package/installers/install-windows.ps1 +17 -0
  8. package/installers/install.py +218 -0
  9. package/metadata/skills_catalog.json +452 -0
  10. package/package.json +48 -0
  11. package/skills/academic-aio/SKILL.md +408 -0
  12. package/skills/academic-aio/references/case_studies/kjr_mllm_2025.md +82 -0
  13. package/skills/academic-aio/references/checklists/AIO_GENERAL.md +354 -0
  14. package/skills/academic-aio/references/journal_summarybox_templates.yaml +126 -0
  15. package/skills/academic-aio/references/oac_funding_checklist.yaml +129 -0
  16. package/skills/academic-aio/references/reporting_guideline_mapping.md +39 -0
  17. package/skills/academic-aio/references/schema_markup_templates/CodeRepository.jsonld +32 -0
  18. package/skills/academic-aio/references/schema_markup_templates/Dataset.jsonld +36 -0
  19. package/skills/academic-aio/references/schema_markup_templates/Person.jsonld +30 -0
  20. package/skills/academic-aio/references/schema_markup_templates/README.md +43 -0
  21. package/skills/academic-aio/references/schema_markup_templates/ScholarlyArticle.jsonld +55 -0
  22. package/skills/academic-aio/scripts/batch_metadata_audit.py +169 -0
  23. package/skills/academic-aio/scripts/validate_schema.py +118 -0
  24. package/skills/academic-aio/skill.yml +36 -0
  25. package/skills/academic-aio/templates/aio_audit_checklist.md.j2 +108 -0
  26. package/skills/add-journal/SKILL.md +482 -0
  27. package/skills/add-journal/skill.yml +33 -0
  28. package/skills/analyze-stats/SKILL.md +598 -0
  29. package/skills/analyze-stats/references/analysis_guides/missing_data.md +109 -0
  30. package/skills/analyze-stats/references/analysis_guides/nhis_icd10_mapping.md +247 -0
  31. package/skills/analyze-stats/references/analysis_guides/propensity_score.md +132 -0
  32. package/skills/analyze-stats/references/analysis_guides/regression.md +115 -0
  33. package/skills/analyze-stats/references/analysis_guides/repeated_measures.md +160 -0
  34. package/skills/analyze-stats/references/analysis_guides/survey_weighted.md +366 -0
  35. package/skills/analyze-stats/references/analysis_guides/test_selection.md +86 -0
  36. package/skills/analyze-stats/references/style/figure_style.mplstyle +69 -0
  37. package/skills/analyze-stats/references/style/theme_publication.R +147 -0
  38. package/skills/analyze-stats/references/table-standards/journal-profiles/ajr.yaml +51 -0
  39. package/skills/analyze-stats/references/table-standards/journal-profiles/european_radiology.yaml +55 -0
  40. package/skills/analyze-stats/references/table-standards/journal-profiles/jama.yaml +66 -0
  41. package/skills/analyze-stats/references/table-standards/journal-profiles/lancet.yaml +57 -0
  42. package/skills/analyze-stats/references/table-standards/journal-profiles/nejm.yaml +51 -0
  43. package/skills/analyze-stats/references/table-standards/journal-profiles/radiology.yaml +66 -0
  44. package/skills/analyze-stats/references/table-standards/table-standards.md +287 -0
  45. package/skills/analyze-stats/references/table-standards/table-types/diagnostic_accuracy.md +36 -0
  46. package/skills/analyze-stats/references/table-standards/table-types/meta_analysis.md +58 -0
  47. package/skills/analyze-stats/references/table-standards/table-types/model_comparison.md +36 -0
  48. package/skills/analyze-stats/references/table-standards/table-types/regression_results.md +50 -0
  49. package/skills/analyze-stats/references/table-standards/table-types/table1_demographics.md +51 -0
  50. package/skills/analyze-stats/references/table-standards/tool-comparison.md +79 -0
  51. package/skills/analyze-stats/references/templates/agreement_analysis.py +436 -0
  52. package/skills/analyze-stats/references/templates/dca_plot.R +237 -0
  53. package/skills/analyze-stats/references/templates/diagnostic_accuracy.py +401 -0
  54. package/skills/analyze-stats/references/templates/dta_meta_analysis.R +384 -0
  55. package/skills/analyze-stats/references/templates/forest_plot.py +412 -0
  56. package/skills/analyze-stats/references/templates/likert_summary.py +356 -0
  57. package/skills/analyze-stats/references/templates/meta_analysis.R +365 -0
  58. package/skills/analyze-stats/references/templates/propensity_score.py +478 -0
  59. package/skills/analyze-stats/references/templates/regression.py +425 -0
  60. package/skills/analyze-stats/references/templates/repeated_measures.py +434 -0
  61. package/skills/analyze-stats/references/templates/sample_size.R +382 -0
  62. package/skills/analyze-stats/references/templates/survey_weighted_analysis.py +411 -0
  63. package/skills/analyze-stats/references/templates/survival_analysis.py +325 -0
  64. package/skills/analyze-stats/references/templates/table1_demographics.py +287 -0
  65. package/skills/analyze-stats/scripts/check_generated_code.py +335 -0
  66. package/skills/analyze-stats/skill.yml +38 -0
  67. package/skills/analyze-stats/tests/fixtures/gen_bad.R +16 -0
  68. package/skills/analyze-stats/tests/fixtures/gen_bad.py +24 -0
  69. package/skills/analyze-stats/tests/fixtures/gen_clean.py +21 -0
  70. package/skills/analyze-stats/tests/test_generated_code.sh +59 -0
  71. package/skills/analyze-stats/tests/test_survival_template.sh +53 -0
  72. package/skills/author-strategy/SKILL.md +117 -0
  73. package/skills/author-strategy/analyze_patterns.py +303 -0
  74. package/skills/author-strategy/fetch_pubmed.py +374 -0
  75. package/skills/author-strategy/skill.yml +34 -0
  76. package/skills/batch-cohort/SKILL.md +223 -0
  77. package/skills/batch-cohort/references/base_template_knhanes.R +210 -0
  78. package/skills/batch-cohort/references/batch_template_generator.R +222 -0
  79. package/skills/batch-cohort/references/variable_coding_registry.md +136 -0
  80. package/skills/batch-cohort/skill.yml +35 -0
  81. package/skills/calc-sample-size/SKILL.md +491 -0
  82. package/skills/calc-sample-size/references/formulas.md +655 -0
  83. package/skills/calc-sample-size/references/observational_cohort.md +49 -0
  84. package/skills/calc-sample-size/skill.yml +51 -0
  85. package/skills/check-reporting/SKILL.md +534 -0
  86. package/skills/check-reporting/references/LICENSES.md +41 -0
  87. package/skills/check-reporting/references/checklists/AMSTAR2.md +54 -0
  88. package/skills/check-reporting/references/checklists/ARRIVE_2.md +234 -0
  89. package/skills/check-reporting/references/checklists/CARE.md +102 -0
  90. package/skills/check-reporting/references/checklists/CLAIM_2024.md +128 -0
  91. package/skills/check-reporting/references/checklists/CLEAR.md +113 -0
  92. package/skills/check-reporting/references/checklists/CONSORT.md +86 -0
  93. package/skills/check-reporting/references/checklists/COSMIN_RoB.md +136 -0
  94. package/skills/check-reporting/references/checklists/GRRAS.md +61 -0
  95. package/skills/check-reporting/references/checklists/MI_CLEAR_LLM.md +167 -0
  96. package/skills/check-reporting/references/checklists/MOOSE.md +85 -0
  97. package/skills/check-reporting/references/checklists/NOS.md +88 -0
  98. package/skills/check-reporting/references/checklists/PRISMA_2020.md +135 -0
  99. package/skills/check-reporting/references/checklists/PRISMA_DTA.md +36 -0
  100. package/skills/check-reporting/references/checklists/PRISMA_P.md +56 -0
  101. package/skills/check-reporting/references/checklists/PROBAST.md +75 -0
  102. package/skills/check-reporting/references/checklists/PROBAST_AI.md +130 -0
  103. package/skills/check-reporting/references/checklists/QUADAS2.md +77 -0
  104. package/skills/check-reporting/references/checklists/QUADAS_C.md +131 -0
  105. package/skills/check-reporting/references/checklists/ROBINS_E.md +179 -0
  106. package/skills/check-reporting/references/checklists/ROBINS_I.md +87 -0
  107. package/skills/check-reporting/references/checklists/ROBIS.md +114 -0
  108. package/skills/check-reporting/references/checklists/ROB_ME.md +126 -0
  109. package/skills/check-reporting/references/checklists/RoB2.md +79 -0
  110. package/skills/check-reporting/references/checklists/RoB_NMA.md +96 -0
  111. package/skills/check-reporting/references/checklists/SPIRIT.md +112 -0
  112. package/skills/check-reporting/references/checklists/SQUIRE_2.md +68 -0
  113. package/skills/check-reporting/references/checklists/STARD.md +129 -0
  114. package/skills/check-reporting/references/checklists/STARD_AI.md +211 -0
  115. package/skills/check-reporting/references/checklists/STROBE.md +80 -0
  116. package/skills/check-reporting/references/checklists/SWiM.md +33 -0
  117. package/skills/check-reporting/references/checklists/TRIPOD.md +157 -0
  118. package/skills/check-reporting/references/checklists/TRIPOD_AI.md +140 -0
  119. package/skills/check-reporting/references/step4c_registration_timing.md +93 -0
  120. package/skills/check-reporting/references/step4d_prisma_figure_audit.md +137 -0
  121. package/skills/check-reporting/scripts/check_checklist_exists.py +183 -0
  122. package/skills/check-reporting/scripts/check_checklist_version.py +168 -0
  123. package/skills/check-reporting/scripts/check_framework_naming.py +206 -0
  124. package/skills/check-reporting/scripts/check_prisma_figure.py +209 -0
  125. package/skills/check-reporting/scripts/prisma_cascade_check.py +274 -0
  126. package/skills/check-reporting/skill.yml +41 -0
  127. package/skills/check-reporting/tests/fixtures/framework_bad.md +8 -0
  128. package/skills/check-reporting/tests/fixtures/framework_clean.md +7 -0
  129. package/skills/check-reporting/tests/test_checklist_fail_fast.sh +77 -0
  130. package/skills/check-reporting/tests/test_checklist_version.sh +72 -0
  131. package/skills/check-reporting/tests/test_framework_naming.sh +45 -0
  132. package/skills/check-reporting/tests/test_prisma_cascade.sh +104 -0
  133. package/skills/clean-data/SKILL.md +180 -0
  134. package/skills/clean-data/references/cleaning_patterns.md +299 -0
  135. package/skills/clean-data/references/profiling_template.py +304 -0
  136. package/skills/clean-data/scripts/check_structural_zero.py +174 -0
  137. package/skills/clean-data/skill.yml +35 -0
  138. package/skills/clean-data/tests/fixtures/smoking.csv +8 -0
  139. package/skills/clean-data/tests/test_structural_zero.sh +49 -0
  140. package/skills/cross-national/SKILL.md +264 -0
  141. package/skills/cross-national/skill.yml +37 -0
  142. package/skills/define-variables/SKILL.md +146 -0
  143. package/skills/define-variables/references/common_definitions.md +190 -0
  144. package/skills/define-variables/skill.yml +34 -0
  145. package/skills/define-variables/templates/variable_operationalization.md +64 -0
  146. package/skills/deidentify/SKILL.md +203 -0
  147. package/skills/deidentify/deidentify.py +1224 -0
  148. package/skills/deidentify/locales/_template.json +45 -0
  149. package/skills/deidentify/locales/au.json +43 -0
  150. package/skills/deidentify/locales/ca.json +44 -0
  151. package/skills/deidentify/locales/cn.json +47 -0
  152. package/skills/deidentify/locales/de.json +48 -0
  153. package/skills/deidentify/locales/fr.json +48 -0
  154. package/skills/deidentify/locales/in.json +48 -0
  155. package/skills/deidentify/locales/jp.json +48 -0
  156. package/skills/deidentify/locales/kr.json +48 -0
  157. package/skills/deidentify/locales/uk.json +45 -0
  158. package/skills/deidentify/locales/us.json +43 -0
  159. package/skills/deidentify/references/date_shift_guide.md +82 -0
  160. package/skills/deidentify/references/hipaa_18_identifiers.md +48 -0
  161. package/skills/deidentify/references/korean_phi_patterns.md +135 -0
  162. package/skills/deidentify/skill.yml +43 -0
  163. package/skills/deidentify/tests/README.md +26 -0
  164. package/skills/deidentify/tests/test_clean.csv +16 -0
  165. package/skills/deidentify/tests/test_edge_cases.csv +11 -0
  166. package/skills/deidentify/tests/test_phi_korean.csv +11 -0
  167. package/skills/design-ai-benchmarking/SKILL.md +214 -0
  168. package/skills/design-ai-benchmarking/references/benchmark_export_schema.json +69 -0
  169. package/skills/design-ai-benchmarking/references/elicitation_rubric_template.md +37 -0
  170. package/skills/design-ai-benchmarking/skill.yml +38 -0
  171. package/skills/design-study/SKILL.md +298 -0
  172. package/skills/design-study/skill.yml +33 -0
  173. package/skills/fill-icmje-coi/SKILL.md +216 -0
  174. package/skills/fill-icmje-coi/scripts/fill_icmje_coi.py +140 -0
  175. package/skills/fill-icmje-coi/skill.yml +35 -0
  176. package/skills/fill-icmje-coi/templates/icmje_coi_seed_synthetic.docx +0 -0
  177. package/skills/fill-protocol/SKILL.md +248 -0
  178. package/skills/fill-protocol/examples/example_irb_template.yaml +53 -0
  179. package/skills/fill-protocol/references/best_practices.md +121 -0
  180. package/skills/fill-protocol/scripts/doc_to_docx.py +111 -0
  181. package/skills/fill-protocol/scripts/fill_form.py +611 -0
  182. package/skills/fill-protocol/scripts/inspect_template.py +61 -0
  183. package/skills/fill-protocol/setup.sh +162 -0
  184. package/skills/fill-protocol/skill.yml +37 -0
  185. package/skills/find-cohort-gap/SKILL.md +309 -0
  186. package/skills/find-cohort-gap/references/cohort_profile_template.md +93 -0
  187. package/skills/find-cohort-gap/references/onepager_template.md +84 -0
  188. package/skills/find-cohort-gap/references/pattern_scoring_rubric.md +169 -0
  189. package/skills/find-cohort-gap/references/saturation_query_templates.md +143 -0
  190. package/skills/find-cohort-gap/skill.yml +35 -0
  191. package/skills/find-journal/POLICY.md +87 -0
  192. package/skills/find-journal/SKILL.md +340 -0
  193. package/skills/find-journal/references/journal_profiles/AJNR.md +29 -0
  194. package/skills/find-journal/references/journal_profiles/AJR.md +30 -0
  195. package/skills/find-journal/references/journal_profiles/Abdominal_Radiology.md +30 -0
  196. package/skills/find-journal/references/journal_profiles/Academic_Radiology.md +30 -0
  197. package/skills/find-journal/references/journal_profiles/Annals_of_Internal_Medicine.md +33 -0
  198. package/skills/find-journal/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +28 -0
  199. package/skills/find-journal/references/journal_profiles/BMC_Medicine.md +31 -0
  200. package/skills/find-journal/references/journal_profiles/British_Journal_of_Radiology.md +39 -0
  201. package/skills/find-journal/references/journal_profiles/CVIR.md +30 -0
  202. package/skills/find-journal/references/journal_profiles/Chest.md +39 -0
  203. package/skills/find-journal/references/journal_profiles/Clinical_Radiology.md +30 -0
  204. package/skills/find-journal/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +32 -0
  205. package/skills/find-journal/references/journal_profiles/Diabetes_Metabolism_Journal.md +36 -0
  206. package/skills/find-journal/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +32 -0
  207. package/skills/find-journal/references/journal_profiles/Endocrinology_and_Metabolism.md +37 -0
  208. package/skills/find-journal/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +39 -0
  209. package/skills/find-journal/references/journal_profiles/European_Radiology.md +29 -0
  210. package/skills/find-journal/references/journal_profiles/Hepatology_Communications.md +40 -0
  211. package/skills/find-journal/references/journal_profiles/Hepatology_International.md +37 -0
  212. package/skills/find-journal/references/journal_profiles/IEEE_JBHI.md +28 -0
  213. package/skills/find-journal/references/journal_profiles/IEEE_TMI.md +28 -0
  214. package/skills/find-journal/references/journal_profiles/INSI.md +29 -0
  215. package/skills/find-journal/references/journal_profiles/Investigative_Radiology.md +25 -0
  216. package/skills/find-journal/references/journal_profiles/JACC_Advances.md +41 -0
  217. package/skills/find-journal/references/journal_profiles/JACC_Asia.md +30 -0
  218. package/skills/find-journal/references/journal_profiles/JACR.md +28 -0
  219. package/skills/find-journal/references/journal_profiles/JAMA.md +40 -0
  220. package/skills/find-journal/references/journal_profiles/JAMA_Network_Open.md +30 -0
  221. package/skills/find-journal/references/journal_profiles/JCSM.md +39 -0
  222. package/skills/find-journal/references/journal_profiles/JKMS.md +32 -0
  223. package/skills/find-journal/references/journal_profiles/JMIR.md +29 -0
  224. package/skills/find-journal/references/journal_profiles/JMIR_Medical_Education.md +29 -0
  225. package/skills/find-journal/references/journal_profiles/JNIS.md +35 -0
  226. package/skills/find-journal/references/journal_profiles/JVIR.md +31 -0
  227. package/skills/find-journal/references/journal_profiles/Journal_of_Biomedical_Informatics.md +29 -0
  228. package/skills/find-journal/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +40 -0
  229. package/skills/find-journal/references/journal_profiles/Journal_of_Magnetic_Resonance_Imaging.md +30 -0
  230. package/skills/find-journal/references/journal_profiles/Journal_of_Nuclear_Medicine.md +31 -0
  231. package/skills/find-journal/references/journal_profiles/Journal_of_Stroke.md +32 -0
  232. package/skills/find-journal/references/journal_profiles/KJR.md +38 -0
  233. package/skills/find-journal/references/journal_profiles/Korean_Circulation_Journal.md +38 -0
  234. package/skills/find-journal/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +36 -0
  235. package/skills/find-journal/references/journal_profiles/Lancet_Diabetes_and_Endocrinology.md +40 -0
  236. package/skills/find-journal/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +49 -0
  237. package/skills/find-journal/references/journal_profiles/Lancet_Infectious_Diseases.md +38 -0
  238. package/skills/find-journal/references/journal_profiles/Lancet_Neurology.md +39 -0
  239. package/skills/find-journal/references/journal_profiles/Lancet_Oncology.md +40 -0
  240. package/skills/find-journal/references/journal_profiles/Lancet_Psychiatry.md +38 -0
  241. package/skills/find-journal/references/journal_profiles/Lancet_Public_Health.md +30 -0
  242. package/skills/find-journal/references/journal_profiles/Lancet_Respiratory_Medicine.md +39 -0
  243. package/skills/find-journal/references/journal_profiles/Liver_International.md +33 -0
  244. package/skills/find-journal/references/journal_profiles/Medical_Image_Analysis.md +28 -0
  245. package/skills/find-journal/references/journal_profiles/NEJM.md +33 -0
  246. package/skills/find-journal/references/journal_profiles/Nature_Machine_Intelligence.md +31 -0
  247. package/skills/find-journal/references/journal_profiles/Nature_Medicine.md +39 -0
  248. package/skills/find-journal/references/journal_profiles/Neuroradiology.md +31 -0
  249. package/skills/find-journal/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +39 -0
  250. package/skills/find-journal/references/journal_profiles/PLOS_Medicine.md +32 -0
  251. package/skills/find-journal/references/journal_profiles/RYAI.md +28 -0
  252. package/skills/find-journal/references/journal_profiles/Radiology.md +29 -0
  253. package/skills/find-journal/references/journal_profiles/Skeletal_Radiology.md +31 -0
  254. package/skills/find-journal/references/journal_profiles/Stroke.md +37 -0
  255. package/skills/find-journal/references/journal_profiles/The_BMJ.md +31 -0
  256. package/skills/find-journal/references/journal_profiles/The_Lancet.md +31 -0
  257. package/skills/find-journal/references/journal_profiles/The_Lancet_Digital_Health.md +29 -0
  258. package/skills/find-journal/references/journal_profiles/World_Journal_of_Hepatology.md +53 -0
  259. package/skills/find-journal/references/journal_profiles/npj_Digital_Medicine.md +29 -0
  260. package/skills/find-journal/skill.yml +34 -0
  261. package/skills/fulltext-retrieval/SKILL.md +174 -0
  262. package/skills/fulltext-retrieval/fetch_oa.py +433 -0
  263. package/skills/fulltext-retrieval/pdf_to_md.py +160 -0
  264. package/skills/fulltext-retrieval/skill.yml +41 -0
  265. package/skills/generate-codebook/SKILL.md +155 -0
  266. package/skills/generate-codebook/references/codebook_schema.md +76 -0
  267. package/skills/generate-codebook/scripts/generate_codebook.py +278 -0
  268. package/skills/generate-codebook/skill.yml +35 -0
  269. package/skills/generate-codebook/tests/test_generate_codebook.sh +76 -0
  270. package/skills/grant-builder/SKILL.md +251 -0
  271. package/skills/grant-builder/skill.yml +34 -0
  272. package/skills/humanize/SKILL.md +251 -0
  273. package/skills/humanize/references/ai_patterns.md +571 -0
  274. package/skills/humanize/skill.yml +33 -0
  275. package/skills/intake-project/SKILL.md +264 -0
  276. package/skills/intake-project/skill.yml +34 -0
  277. package/skills/lit-sync/SKILL.md +448 -0
  278. package/skills/lit-sync/references/locale/ko/note_templates.md +110 -0
  279. package/skills/lit-sync/skill.yml +52 -0
  280. package/skills/lit-sync/tests/test_poll_logic.sh +92 -0
  281. package/skills/ma-scout/SKILL.md +640 -0
  282. package/skills/ma-scout/references/project_readme_template.md +95 -0
  283. package/skills/ma-scout/references/project_readme_template_ko.md +82 -0
  284. package/skills/ma-scout/skill.yml +33 -0
  285. package/skills/make-figures/SKILL.md +957 -0
  286. package/skills/make-figures/references/critic_rubrics/data_plot.md +166 -0
  287. package/skills/make-figures/references/critic_rubrics/flow_diagram.md +169 -0
  288. package/skills/make-figures/references/design_principles.md +181 -0
  289. package/skills/make-figures/references/exemplar_diagrams/README.md +65 -0
  290. package/skills/make-figures/references/exemplar_diagrams/consort/README.md +15 -0
  291. package/skills/make-figures/references/exemplar_diagrams/consort/template_input.yaml +37 -0
  292. package/skills/make-figures/references/exemplar_diagrams/consort/template_output.pdf +0 -0
  293. package/skills/make-figures/references/exemplar_diagrams/consort/template_output.png +0 -0
  294. package/skills/make-figures/references/exemplar_diagrams/consort/template_output_600.png +0 -0
  295. package/skills/make-figures/references/exemplar_diagrams/other/other_02.meta.yaml +4 -0
  296. package/skills/make-figures/references/exemplar_diagrams/other/other_02.png +0 -0
  297. package/skills/make-figures/references/exemplar_diagrams/other/other_02_why.md +13 -0
  298. package/skills/make-figures/references/exemplar_diagrams/pipeline/README.md +15 -0
  299. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.meta.yaml +4 -0
  300. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.png +0 -0
  301. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01_why.md +13 -0
  302. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.meta.yaml +4 -0
  303. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.png +0 -0
  304. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03_why.md +13 -0
  305. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.meta.yaml +4 -0
  306. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.png +0 -0
  307. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04_why.md +13 -0
  308. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.meta.yaml +4 -0
  309. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.png +0 -0
  310. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05_why.md +13 -0
  311. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.meta.yaml +4 -0
  312. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.png +0 -0
  313. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06_why.md +13 -0
  314. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.meta.yaml +4 -0
  315. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.png +0 -0
  316. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07_why.md +13 -0
  317. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.meta.yaml +4 -0
  318. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.png +0 -0
  319. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08_why.md +13 -0
  320. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.meta.yaml +4 -0
  321. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.png +0 -0
  322. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09_why.md +13 -0
  323. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.meta.yaml +4 -0
  324. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.png +0 -0
  325. package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10_why.md +13 -0
  326. package/skills/make-figures/references/exemplar_diagrams/prisma/README.md +15 -0
  327. package/skills/make-figures/references/exemplar_diagrams/prisma/template_input.yaml +47 -0
  328. package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.pdf +0 -0
  329. package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.png +0 -0
  330. package/skills/make-figures/references/exemplar_diagrams/prisma/template_output_600.png +0 -0
  331. package/skills/make-figures/references/exemplar_diagrams/stard/README.md +15 -0
  332. package/skills/make-figures/references/exemplar_diagrams/stard/template_input.yaml +40 -0
  333. package/skills/make-figures/references/exemplar_diagrams/stard/template_output.pdf +0 -0
  334. package/skills/make-figures/references/exemplar_diagrams/stard/template_output.png +0 -0
  335. package/skills/make-figures/references/exemplar_diagrams/stard/template_output_600.png +0 -0
  336. package/skills/make-figures/references/exemplar_diagrams/strobe/template_input.yaml +43 -0
  337. package/skills/make-figures/references/exemplar_diagrams/strobe/template_input_pptx.yaml +43 -0
  338. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pdf +0 -0
  339. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.png +0 -0
  340. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pptx +0 -0
  341. package/skills/make-figures/references/exemplar_diagrams/strobe/template_output_600.png +0 -0
  342. package/skills/make-figures/references/figure_specs.md +291 -0
  343. package/skills/make-figures/references/flow_diagram_lessons.md +164 -0
  344. package/skills/make-figures/references/jacc_central_illustration_principles.md +91 -0
  345. package/skills/make-figures/references/medical_illustration_sources.md +98 -0
  346. package/skills/make-figures/references/pipeline_concepts_medical_ai.md +240 -0
  347. package/skills/make-figures/references/reporting_guideline_figure_map.md +104 -0
  348. package/skills/make-figures/references/visual_abstract_templates/european_radiology.pptx +0 -0
  349. package/skills/make-figures/references/visual_abstract_templates/jacc_central_illustration.pptx +0 -0
  350. package/skills/make-figures/references/visual_abstract_templates/medsci_default.pptx +0 -0
  351. package/skills/make-figures/references/visual_abstract_templates/template_guide.md +114 -0
  352. package/skills/make-figures/scripts/build_jacc_template.py +77 -0
  353. package/skills/make-figures/scripts/build_prisma2020_template.py +371 -0
  354. package/skills/make-figures/scripts/build_strobe_template.py +351 -0
  355. package/skills/make-figures/scripts/critic_figure.py +264 -0
  356. package/skills/make-figures/scripts/derive_figure_legend_counts.py +138 -0
  357. package/skills/make-figures/scripts/extract_exemplar_from_pdf.py +186 -0
  358. package/skills/make-figures/scripts/fetch_official_templates.sh +88 -0
  359. package/skills/make-figures/scripts/fill_prisma_template.py +142 -0
  360. package/skills/make-figures/scripts/generate_flow_diagram.R +133 -0
  361. package/skills/make-figures/scripts/generate_image.py +99 -0
  362. package/skills/make-figures/scripts/generate_visual_abstract.py +438 -0
  363. package/skills/make-figures/scripts/validate_pptx_mac_compat.py +233 -0
  364. package/skills/make-figures/skill.yml +52 -0
  365. package/skills/make-figures/templates/official/NOTES.md +62 -0
  366. package/skills/make-figures/templates/official/consort2010/CONSORT_2025_editable_checklist.docx +0 -0
  367. package/skills/make-figures/templates/official/consort2010/CONSORT_2025_flow_diagram.docx +0 -0
  368. package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v1.pptx +0 -0
  369. package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v2.pptx +0 -0
  370. package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_updated_v2.pptx +0 -0
  371. package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_editable_checklist.docx +0 -0
  372. package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_participant_timeline.docx +0 -0
  373. package/skills/make-figures/templates/official/stard2015/STARD_2015_checklist.docx +0 -0
  374. package/skills/make-figures/templates/official/stard2015/STARD_2015_flow_diagram.pdf +0 -0
  375. package/skills/make-figures/tests/fixtures/figure1_flow.yaml +8 -0
  376. package/skills/make-figures/tests/fixtures/manuscript_ok.md +9 -0
  377. package/skills/make-figures/tests/fixtures/manuscript_stale.md +4 -0
  378. package/skills/make-figures/tests/test_legend_reconcile.sh +36 -0
  379. package/skills/manage-project/SKILL.md +358 -0
  380. package/skills/manage-project/references/pre_submission_checklist.md +53 -0
  381. package/skills/manage-project/references/project_state_template.json +37 -0
  382. package/skills/manage-project/references/scaffold_templates.md +118 -0
  383. package/skills/manage-project/references/status_output_format.md +44 -0
  384. package/skills/manage-project/references/timeline_example.md +20 -0
  385. package/skills/manage-project/skill.yml +36 -0
  386. package/skills/manage-project/templates/SSOT.yaml.template +41 -0
  387. package/skills/manage-refs/LICENSE.zotero-mcp +21 -0
  388. package/skills/manage-refs/NOTICE.md +29 -0
  389. package/skills/manage-refs/SKILL.md +289 -0
  390. package/skills/manage-refs/citation_styles/README.md +40 -0
  391. package/skills/manage-refs/citation_styles/american-journal-of-roentgenology.csl +211 -0
  392. package/skills/manage-refs/citation_styles/cardiovascular-and-interventional-radiology.csl +19 -0
  393. package/skills/manage-refs/citation_styles/european-radiology.csl +19 -0
  394. package/skills/manage-refs/citation_styles/journal-of-cachexia-sarcopenia-and-muscle.csl +150 -0
  395. package/skills/manage-refs/citation_styles/journal-of-korean-medical-science-strict.csl +533 -0
  396. package/skills/manage-refs/citation_styles/journal-of-korean-medical-science.csl +16 -0
  397. package/skills/manage-refs/citation_styles/korean-journal-of-radiology.csl +155 -0
  398. package/skills/manage-refs/citation_styles/nature.csl +189 -0
  399. package/skills/manage-refs/citation_styles/nlm-citation-sequence.csl +535 -0
  400. package/skills/manage-refs/citation_styles/radiology.csl +228 -0
  401. package/skills/manage-refs/citation_styles/springer-basic-brackets.csl +187 -0
  402. package/skills/manage-refs/citation_styles/springer-vancouver-brackets.csl +276 -0
  403. package/skills/manage-refs/citation_styles/vancouver-superscript.csl +536 -0
  404. package/skills/manage-refs/citation_styles/vancouver.csl +535 -0
  405. package/skills/manage-refs/references/REFERENCE_STYLE_SPECS.md +59 -0
  406. package/skills/manage-refs/references/check_xref_symptoms.md +35 -0
  407. package/skills/manage-refs/scripts/_vendor_citation_writer.py +600 -0
  408. package/skills/manage-refs/scripts/check_citation_keys.py +112 -0
  409. package/skills/manage-refs/scripts/check_csl_render.py +102 -0
  410. package/skills/manage-refs/scripts/check_xref.py +633 -0
  411. package/skills/manage-refs/scripts/fill_journal_abbrev.py +104 -0
  412. package/skills/manage-refs/scripts/inject_zotero_cwyw.py +133 -0
  413. package/skills/manage-refs/scripts/md_marker_convert.py +193 -0
  414. package/skills/manage-refs/scripts/pre_submission_gate.sh +238 -0
  415. package/skills/manage-refs/scripts/render_pandoc.sh +88 -0
  416. package/skills/manage-refs/skill.yml +70 -0
  417. package/skills/manage-refs/tests/fixtures/pre_submission_gate/README.md +32 -0
  418. package/skills/manage-refs/tests/fixtures/pre_submission_gate/manuscript.md +10 -0
  419. package/skills/manage-refs/tests/fixtures/pre_submission_gate/refs.bib +34 -0
  420. package/skills/manage-refs/tests/fixtures/pre_submission_gate/run.sh +117 -0
  421. package/skills/manage-refs/tests/test_vN_docx_check.sh +145 -0
  422. package/skills/meta-analysis/SKILL.md +739 -0
  423. package/skills/meta-analysis/references/LICENSES.md +21 -0
  424. package/skills/meta-analysis/references/PROSPERO_template.md +221 -0
  425. package/skills/meta-analysis/references/ai_pre_screening_template.py +245 -0
  426. package/skills/meta-analysis/references/checklists/JBI_Case_Series.md +45 -0
  427. package/skills/meta-analysis/references/checklists/NOS.md +88 -0
  428. package/skills/meta-analysis/references/checklists/PRISMA_DTA.md +36 -0
  429. package/skills/meta-analysis/references/checklists/PROBAST.md +75 -0
  430. package/skills/meta-analysis/references/checklists/QUADAS2.md +77 -0
  431. package/skills/meta-analysis/references/checklists/ROBINS_I.md +87 -0
  432. package/skills/meta-analysis/references/checklists/RoB2.md +79 -0
  433. package/skills/meta-analysis/references/data_integrity_checklist.md +57 -0
  434. package/skills/meta-analysis/references/icmje_coi_guide.md +181 -0
  435. package/skills/meta-analysis/references/phase10_recovery.md +136 -0
  436. package/skills/meta-analysis/references/phase4_km_composite.md +58 -0
  437. package/skills/meta-analysis/references/phase6_statistical_synthesis.md +148 -0
  438. package/skills/meta-analysis/references/phase9_circulation.md +84 -0
  439. package/skills/meta-analysis/references/post_submission_release_ops.md +41 -0
  440. package/skills/meta-analysis/references/r_templates.md +132 -0
  441. package/skills/meta-analysis/references/review_orchestration.md +40 -0
  442. package/skills/meta-analysis/references/submission_package_drift.md +71 -0
  443. package/skills/meta-analysis/scripts/check_pool_consistency.py +201 -0
  444. package/skills/meta-analysis/scripts/cohort_overlap_check.py +242 -0
  445. package/skills/meta-analysis/scripts/dta_extraction_qc.py +137 -0
  446. package/skills/meta-analysis/scripts/screening_reconcile.py +160 -0
  447. package/skills/meta-analysis/skill.yml +47 -0
  448. package/skills/meta-analysis/templates/FINAL_POOL_LOCK.yaml.template +70 -0
  449. package/skills/meta-analysis/templates/extraction_form_v2.md +129 -0
  450. package/skills/meta-analysis/templates/supplementary_8file_checklist.md +94 -0
  451. package/skills/meta-analysis/tests/test_pool_consistency.sh +123 -0
  452. package/skills/orchestrate/SKILL.md +501 -0
  453. package/skills/orchestrate/references/dialogue_nodes.md +196 -0
  454. package/skills/orchestrate/references/report_template.md +109 -0
  455. package/skills/orchestrate/references/report_template_ko.md +88 -0
  456. package/skills/orchestrate/skill.yml +44 -0
  457. package/skills/peer-review/SKILL.md +381 -0
  458. package/skills/peer-review/references/aczel_2021_reviewer2_patterns.md +88 -0
  459. package/skills/peer-review/references/domain-probes/ai_overclaiming.md +47 -0
  460. package/skills/peer-review/references/domain-probes/narrative_review.md +44 -0
  461. package/skills/peer-review/references/domain-probes/observational_confounding.md +48 -0
  462. package/skills/peer-review/references/domain-probes/radiomics.md +38 -0
  463. package/skills/peer-review/references/domain-probes/sr_ma.md +87 -0
  464. package/skills/peer-review/references/domain-probes/survival_prognostic.md +68 -0
  465. package/skills/peer-review/references/exemplar_reviews/README.md +43 -0
  466. package/skills/peer-review/references/exemplar_reviews/ai_overclaiming.md +47 -0
  467. package/skills/peer-review/references/exemplar_reviews/calibration_missing.md +44 -0
  468. package/skills/peer-review/references/exemplar_reviews/data_leakage.md +48 -0
  469. package/skills/peer-review/references/exemplar_reviews/reference_standard_validity.md +45 -0
  470. package/skills/peer-review/references/narrative_review_audit.md +67 -0
  471. package/skills/peer-review/references/reviewer_calibration/README.md +34 -0
  472. package/skills/peer-review/references/reviewer_calibration/compliance_floor.md +52 -0
  473. package/skills/peer-review/references/reviewer_profiles/AJR.md +82 -0
  474. package/skills/peer-review/references/reviewer_profiles/EURE.md +64 -0
  475. package/skills/peer-review/references/reviewer_profiles/INSI.md +57 -0
  476. package/skills/peer-review/references/reviewer_profiles/KJR.md +100 -0
  477. package/skills/peer-review/references/reviewer_profiles/README.md +32 -0
  478. package/skills/peer-review/references/reviewer_profiles/RYAI.md +86 -0
  479. package/skills/peer-review/skill.yml +39 -0
  480. package/skills/present-paper/SKILL.md +675 -0
  481. package/skills/present-paper/references/critic_rubrics/slide.md +155 -0
  482. package/skills/present-paper/references/generate_pptx_templates.py +604 -0
  483. package/skills/present-paper/references/medical_presentation_templates.md +277 -0
  484. package/skills/present-paper/references/slide_design_principles.md +202 -0
  485. package/skills/present-paper/references/slide_visual_styles/nature_lancet.md +168 -0
  486. package/skills/present-paper/references/workflow-checklist.md +109 -0
  487. package/skills/present-paper/scripts/extract_pdf_figures.py +243 -0
  488. package/skills/present-paper/scripts/inject_pronunciation_notes.py +178 -0
  489. package/skills/present-paper/scripts/inject_speaker_notes.py +133 -0
  490. package/skills/present-paper/scripts/strip_notes_for_sharing.py +140 -0
  491. package/skills/present-paper/scripts/trim_caption.py +271 -0
  492. package/skills/present-paper/skill.yml +41 -0
  493. package/skills/present-paper/templates/build_pptx_nature_lancet.py +688 -0
  494. package/skills/publish-skill/SKILL.md +370 -0
  495. package/skills/publish-skill/references/license-compatibility-matrix.md +132 -0
  496. package/skills/publish-skill/references/pii-patterns.md +130 -0
  497. package/skills/publish-skill/scripts/audit_skill.sh +278 -0
  498. package/skills/publish-skill/skill.yml +35 -0
  499. package/skills/render-pdf-doc/SKILL.md +146 -0
  500. package/skills/render-pdf-doc/references/known_pitfalls.md +53 -0
  501. package/skills/render-pdf-doc/references/pandoc_korean_cheatsheet.md +77 -0
  502. package/skills/render-pdf-doc/scripts/check_deps.sh +42 -0
  503. package/skills/render-pdf-doc/scripts/infer_colwidths.py +164 -0
  504. package/skills/render-pdf-doc/scripts/render_pdf.sh +98 -0
  505. package/skills/render-pdf-doc/skill.yml +57 -0
  506. package/skills/render-pdf-doc/templates/anchor-doc.md +27 -0
  507. package/skills/render-pdf-doc/templates/anchor-doc_ko.md +25 -0
  508. package/skills/render-pdf-doc/templates/briefing-handout.md +33 -0
  509. package/skills/render-pdf-doc/templates/briefing-handout_ko.md +31 -0
  510. package/skills/render-pdf-doc/templates/proposal-cover.md +33 -0
  511. package/skills/render-pdf-doc/templates/proposal-cover_ko.md +31 -0
  512. package/skills/render-pdf-doc/templates/reference-table.md +22 -0
  513. package/skills/render-pdf-doc/templates/reference-table_ko.md +20 -0
  514. package/skills/replicate-study/SKILL.md +150 -0
  515. package/skills/replicate-study/references/harmonization_3country.csv +47 -0
  516. package/skills/replicate-study/references/harmonization_knhanes_nhanes.csv +68 -0
  517. package/skills/replicate-study/references/methodology_extraction_template.md +134 -0
  518. package/skills/replicate-study/skill.yml +37 -0
  519. package/skills/review-paper/SKILL.md +104 -0
  520. package/skills/review-paper/references/macro_skeleton.md +6 -0
  521. package/skills/review-paper/skill.yml +25 -0
  522. package/skills/revise/SKILL.md +515 -0
  523. package/skills/revise/references/r2r_voice.md +346 -0
  524. package/skills/revise/skill.yml +43 -0
  525. package/skills/search-lit/SKILL.md +443 -0
  526. package/skills/search-lit/references/parse_pubmed.py +326 -0
  527. package/skills/search-lit/references/pubmed_eutils.sh +111 -0
  528. package/skills/search-lit/skill.yml +46 -0
  529. package/skills/self-review/SKILL.md +1045 -0
  530. package/skills/self-review/references/domain-probes/ai_overclaiming.md +47 -0
  531. package/skills/self-review/references/domain-probes/narrative_review.md +44 -0
  532. package/skills/self-review/references/domain-probes/observational_confounding.md +48 -0
  533. package/skills/self-review/references/domain-probes/radiomics.md +38 -0
  534. package/skills/self-review/references/domain-probes/sr_ma.md +87 -0
  535. package/skills/self-review/references/domain-probes/survival_prognostic.md +68 -0
  536. package/skills/self-review/references/exemplar_findings/README.md +43 -0
  537. package/skills/self-review/references/exemplar_findings/cohort_arithmetic_mismatch.md +35 -0
  538. package/skills/self-review/references/exemplar_findings/estimand_drift_posthoc_primary.md +39 -0
  539. package/skills/self-review/references/exemplar_findings/scope_overreach_cross_sectional.md +35 -0
  540. package/skills/self-review/references/exemplar_findings/unadjusted_confounder.md +36 -0
  541. package/skills/self-review/references/panel_review_template.md +177 -0
  542. package/skills/self-review/scripts/check_artifact_coverage.py +301 -0
  543. package/skills/self-review/scripts/check_claim_artifact.py +248 -0
  544. package/skills/self-review/scripts/check_classical_style.py +185 -0
  545. package/skills/self-review/scripts/check_cohort_arithmetic.py +481 -0
  546. package/skills/self-review/scripts/check_confounding_completeness.py +287 -0
  547. package/skills/self-review/scripts/check_panel_diversity.py +336 -0
  548. package/skills/self-review/scripts/check_reference_adequacy.py +392 -0
  549. package/skills/self-review/scripts/check_reviewer_team_consistency.py +412 -0
  550. package/skills/self-review/scripts/check_scope_coherence.py +177 -0
  551. package/skills/self-review/skill.yml +47 -0
  552. package/skills/self-review/tests/fixtures/claim_manuscript.md +17 -0
  553. package/skills/self-review/tests/fixtures/claim_prereg.md +6 -0
  554. package/skills/self-review/tests/fixtures/cohort_bad.md +21 -0
  555. package/skills/self-review/tests/fixtures/cohort_clean.md +21 -0
  556. package/skills/self-review/tests/fixtures/cohort_partition.csv +5 -0
  557. package/skills/self-review/tests/fixtures/coverage_analysis/31_delong_nested_added_value.csv +3 -0
  558. package/skills/self-review/tests/fixtures/coverage_analysis/table1_demographics.csv +3 -0
  559. package/skills/self-review/tests/fixtures/coverage_clean.md +13 -0
  560. package/skills/self-review/tests/fixtures/coverage_manuscript.md +11 -0
  561. package/skills/self-review/tests/fixtures/panel_collapse.json +27 -0
  562. package/skills/self-review/tests/fixtures/panel_good.json +32 -0
  563. package/skills/self-review/tests/fixtures/panel_monoculture.json +32 -0
  564. package/skills/self-review/tests/fixtures/refadeq_letter.md +13 -0
  565. package/skills/self-review/tests/fixtures/refadeq_original_fixed.md +42 -0
  566. package/skills/self-review/tests/fixtures/refadeq_original_uncited.md +40 -0
  567. package/skills/self-review/tests/fixtures/scope_bad.md +9 -0
  568. package/skills/self-review/tests/fixtures/scope_clean.md +8 -0
  569. package/skills/self-review/tests/fixtures/scope_surrogate.md +8 -0
  570. package/skills/self-review/tests/fixtures/style_bad.md +13 -0
  571. package/skills/self-review/tests/fixtures/style_clean.md +11 -0
  572. package/skills/self-review/tests/fixtures/table1_by_exposure.csv +11 -0
  573. package/skills/self-review/tests/test_artifact_coverage.sh +44 -0
  574. package/skills/self-review/tests/test_claim_artifact.sh +50 -0
  575. package/skills/self-review/tests/test_classical_style.sh +44 -0
  576. package/skills/self-review/tests/test_cohort_arithmetic.sh +49 -0
  577. package/skills/self-review/tests/test_confounding_completeness.sh +66 -0
  578. package/skills/self-review/tests/test_panel_diversity.sh +55 -0
  579. package/skills/self-review/tests/test_panel_mode.sh +69 -0
  580. package/skills/self-review/tests/test_reference_adequacy.sh +68 -0
  581. package/skills/self-review/tests/test_reviewer_team_consistency.sh +138 -0
  582. package/skills/self-review/tests/test_scope_coherence.sh +46 -0
  583. package/skills/setup-medsci/SKILL.md +110 -0
  584. package/skills/setup-medsci/references/setup-checklist.md +51 -0
  585. package/skills/setup-medsci/skill.yml +30 -0
  586. package/skills/sync-submission/SKILL.md +382 -0
  587. package/skills/sync-submission/scripts/author_registry_example.yaml +36 -0
  588. package/skills/sync-submission/scripts/blind_sweep.py +203 -0
  589. package/skills/sync-submission/scripts/check_asset_anonymization.py +300 -0
  590. package/skills/sync-submission/scripts/check_cross_artifact_stale.py +211 -0
  591. package/skills/sync-submission/scripts/cover_letter_drift_check.py +451 -0
  592. package/skills/sync-submission/scripts/cross_document_n_check.py +486 -0
  593. package/skills/sync-submission/scripts/detect_copy_divergence.py +136 -0
  594. package/skills/sync-submission/scripts/preflight_gate.py +458 -0
  595. package/skills/sync-submission/scripts/scope_drift_check.py +362 -0
  596. package/skills/sync-submission/scripts/sync_submission.py +169 -0
  597. package/skills/sync-submission/skill.yml +43 -0
  598. package/skills/sync-submission/tests/fixtures/copy_ok.md +5 -0
  599. package/skills/sync-submission/tests/fixtures/copy_stale.md +5 -0
  600. package/skills/sync-submission/tests/fixtures/ssot.md +5 -0
  601. package/skills/sync-submission/tests/test_asset_anonymization.sh +99 -0
  602. package/skills/sync-submission/tests/test_copy_divergence.sh +44 -0
  603. package/skills/sync-submission/tests/test_cross_artifact_stale.sh +80 -0
  604. package/skills/sync-submission/tests/test_cross_document_n.sh +132 -0
  605. package/skills/sync-submission/tests/test_preflight_gate.sh +112 -0
  606. package/skills/sync-submission/tests/test_scope_drift.sh +122 -0
  607. package/skills/sync-submission/tests/test_vN_docx_assertion.sh +51 -0
  608. package/skills/verify-refs/SKILL.md +177 -0
  609. package/skills/verify-refs/references/manual_checkpoint_guide.md +100 -0
  610. package/skills/verify-refs/scripts/verify_cli.sh +62 -0
  611. package/skills/verify-refs/scripts/verify_refs.py +782 -0
  612. package/skills/verify-refs/skill.yml +44 -0
  613. package/skills/verify-refs/tests/fixtures/pagination_placeholder.bib +17 -0
  614. package/skills/verify-refs/tests/test_pagination_placeholder.sh +42 -0
  615. package/skills/version-dataset/SKILL.md +143 -0
  616. package/skills/version-dataset/references/manifest_schema.md +72 -0
  617. package/skills/version-dataset/scripts/version_dataset.py +242 -0
  618. package/skills/version-dataset/skill.yml +35 -0
  619. package/skills/version-dataset/tests/test_version_dataset.sh +52 -0
  620. package/skills/write-paper/SKILL.md +1148 -0
  621. package/skills/write-paper/references/exemplar_methods/README.md +38 -0
  622. package/skills/write-paper/references/exemplar_methods/ai_validation_tripod_claim.md +47 -0
  623. package/skills/write-paper/references/exemplar_methods/diagnostic_accuracy_stard.md +50 -0
  624. package/skills/write-paper/references/exemplar_methods/observational_cohort_strobe.md +43 -0
  625. package/skills/write-paper/references/journal_profiles/AJNR.md +185 -0
  626. package/skills/write-paper/references/journal_profiles/AJR.md +149 -0
  627. package/skills/write-paper/references/journal_profiles/Abdominal_Radiology.md +139 -0
  628. package/skills/write-paper/references/journal_profiles/Academic_Radiology.md +90 -0
  629. package/skills/write-paper/references/journal_profiles/Annals_of_Internal_Medicine.md +150 -0
  630. package/skills/write-paper/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +82 -0
  631. package/skills/write-paper/references/journal_profiles/British_Journal_of_Radiology.md +161 -0
  632. package/skills/write-paper/references/journal_profiles/CVIR.md +157 -0
  633. package/skills/write-paper/references/journal_profiles/Chest.md +270 -0
  634. package/skills/write-paper/references/journal_profiles/Clinical_Radiology.md +160 -0
  635. package/skills/write-paper/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +147 -0
  636. package/skills/write-paper/references/journal_profiles/Diabetes_Metabolism_Journal.md +163 -0
  637. package/skills/write-paper/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +216 -0
  638. package/skills/write-paper/references/journal_profiles/Endocrinology_and_Metabolism.md +167 -0
  639. package/skills/write-paper/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +192 -0
  640. package/skills/write-paper/references/journal_profiles/European_Radiology.md +159 -0
  641. package/skills/write-paper/references/journal_profiles/Hepatology_Communications.md +110 -0
  642. package/skills/write-paper/references/journal_profiles/Hepatology_International.md +106 -0
  643. package/skills/write-paper/references/journal_profiles/IEEE_TMI.md +180 -0
  644. package/skills/write-paper/references/journal_profiles/INSI.md +163 -0
  645. package/skills/write-paper/references/journal_profiles/Investigative_Radiology.md +86 -0
  646. package/skills/write-paper/references/journal_profiles/JACC_Advances.md +197 -0
  647. package/skills/write-paper/references/journal_profiles/JACC_Asia.md +168 -0
  648. package/skills/write-paper/references/journal_profiles/JACR.md +87 -0
  649. package/skills/write-paper/references/journal_profiles/JAMA.md +188 -0
  650. package/skills/write-paper/references/journal_profiles/JAMA_Network_Open.md +170 -0
  651. package/skills/write-paper/references/journal_profiles/JCSM.md +266 -0
  652. package/skills/write-paper/references/journal_profiles/JKMS.md +201 -0
  653. package/skills/write-paper/references/journal_profiles/JMIR.md +88 -0
  654. package/skills/write-paper/references/journal_profiles/JMIR_Medical_Education.md +86 -0
  655. package/skills/write-paper/references/journal_profiles/JNIS.md +227 -0
  656. package/skills/write-paper/references/journal_profiles/JVIR.md +158 -0
  657. package/skills/write-paper/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +191 -0
  658. package/skills/write-paper/references/journal_profiles/Journal_of_Stroke.md +176 -0
  659. package/skills/write-paper/references/journal_profiles/KJR.md +185 -0
  660. package/skills/write-paper/references/journal_profiles/Korean_Circulation_Journal.md +184 -0
  661. package/skills/write-paper/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +178 -0
  662. package/skills/write-paper/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +127 -0
  663. package/skills/write-paper/references/journal_profiles/Liver_International.md +165 -0
  664. package/skills/write-paper/references/journal_profiles/Medical_Image_Analysis.md +147 -0
  665. package/skills/write-paper/references/journal_profiles/NEJM.md +147 -0
  666. package/skills/write-paper/references/journal_profiles/Nature_Medicine.md +181 -0
  667. package/skills/write-paper/references/journal_profiles/Neuroradiology.md +151 -0
  668. package/skills/write-paper/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +184 -0
  669. package/skills/write-paper/references/journal_profiles/PLOS_Medicine.md +166 -0
  670. package/skills/write-paper/references/journal_profiles/RYAI.md +124 -0
  671. package/skills/write-paper/references/journal_profiles/Radiology.md +173 -0
  672. package/skills/write-paper/references/journal_profiles/Skeletal_Radiology.md +135 -0
  673. package/skills/write-paper/references/journal_profiles/Stroke.md +210 -0
  674. package/skills/write-paper/references/journal_profiles/The_BMJ.md +121 -0
  675. package/skills/write-paper/references/journal_profiles/The_Lancet.md +112 -0
  676. package/skills/write-paper/references/journal_profiles/The_Lancet_Digital_Health.md +104 -0
  677. package/skills/write-paper/references/journal_profiles/World_Journal_of_Hepatology.md +106 -0
  678. package/skills/write-paper/references/journal_profiles/npj_Digital_Medicine.md +93 -0
  679. package/skills/write-paper/references/paper_types/ai_validation.md +270 -0
  680. package/skills/write-paper/references/paper_types/animal_study.md +194 -0
  681. package/skills/write-paper/references/paper_types/case_report.md +237 -0
  682. package/skills/write-paper/references/paper_types/cross_national.md +328 -0
  683. package/skills/write-paper/references/paper_types/letter.md +127 -0
  684. package/skills/write-paper/references/paper_types/meta_analysis.md +181 -0
  685. package/skills/write-paper/references/paper_types/nhis_cohort.md +297 -0
  686. package/skills/write-paper/references/paper_types/original_article.md +221 -0
  687. package/skills/write-paper/references/paper_types/technical_note.md +131 -0
  688. package/skills/write-paper/references/section_guides/discussion.md +155 -0
  689. package/skills/write-paper/references/section_guides/introduction.md +108 -0
  690. package/skills/write-paper/references/section_guides/methods.md +144 -0
  691. package/skills/write-paper/references/section_guides/results.md +113 -0
  692. package/skills/write-paper/references/section_guides/step7_1_classical_qc.md +67 -0
  693. package/skills/write-paper/references/section_guides/step7_4a_audit_recovery.md +74 -0
  694. package/skills/write-paper/references/section_guides/title_abstract.md +123 -0
  695. package/skills/write-paper/references/section_templates/methods_statistical.md +147 -0
  696. package/skills/write-paper/scripts/check_placeholders.py +182 -0
  697. package/skills/write-paper/skill.yml +48 -0
  698. package/skills/write-paper/tests/test_placeholders.sh +107 -0
  699. package/skills/write-protocol/SKILL.md +243 -0
  700. package/skills/write-protocol/references/ethics_checklist.md +150 -0
  701. package/skills/write-protocol/references/protocol_template.md +304 -0
  702. package/skills/write-protocol/skill.yml +34 -0
@@ -0,0 +1,44 @@
1
+ schema_version: 2
2
+ name: verify-refs
3
+ layer: A
4
+ owner_domain: reference_integrity
5
+ when_to_use:
6
+ - Audit-only verification of manuscript references against PubMed and CrossRef
7
+ - Pre-submission citation hallucination check (PostToolUse hook trigger on circulation/submission docx)
8
+ - Detecting author hallucination (DOI real but a cited author name wrong at any position — full-author cross-check against PubMed efetch, v1.3.0)
9
+ - LLM-assisted drafting gate — `--strict` mode required when AI generated or rewrote citations
10
+ when_NOT_to_use:
11
+ - Adding new references (use /search-lit + /lit-sync)
12
+ - Rendering references list (use /manage-refs render_pandoc.sh)
13
+ - Modifying refs.bib or library.bib (audit-only — never writes back)
14
+ inputs:
15
+ - manuscript.md
16
+ - manuscript.docx
17
+ - references.bib
18
+ outputs:
19
+ - qc/reference_audit.json
20
+ deterministic_scripts:
21
+ - scripts/verify_refs.py
22
+ - scripts/verify_cli.sh
23
+ side_effects:
24
+ - writes_project_artifacts
25
+ downstream_consumers:
26
+ - write-paper
27
+ - sync-submission
28
+ - orchestrate
29
+ forbidden_actions:
30
+ - generate_references_from_memory
31
+ - silently_include_unverified_references
32
+
33
+ # v2.1 quality card
34
+ purpose: "Audit-only verification of manuscript references against PubMed and CrossRef (full-author cross-check); writes qc/reference_audit.json. Does not modify references."
35
+ safety_boundaries:
36
+ - "Audit-only: never edits references/ or refs.bib; never generates references from memory."
37
+ - "Unverified references are flagged, not silently included."
38
+ known_limitations:
39
+ - "Confirms DOI/PMID and author identity, not topical appropriateness of the citation."
40
+ - "CrossRef given-name errors are possible; PubMed efetch is treated as authoritative."
41
+ validation_commands:
42
+ - "bash scripts/verify_cli.sh <refs.bib>"
43
+ - "confirm qc/reference_audit.json submission_safe: true"
44
+ evidence_surface: bundled_script
@@ -0,0 +1,17 @@
1
+ @article{methodref_inpress,
2
+ title = {A method paper still in press},
3
+ author = {Smith, John and Doe, Jane},
4
+ year = {2026},
5
+ journal = {Journal of Examples},
6
+ pages = {e000--e000},
7
+ note = {in press}
8
+ }
9
+
10
+ @article{normalref_2025,
11
+ title = {A normal complete reference},
12
+ author = {Jones, Alice and Brown, Bob},
13
+ year = {2025},
14
+ journal = {Journal of Examples},
15
+ volume = {12},
16
+ pages = {123--130}
17
+ }
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env bash
2
+ # Regression test for verify-refs Gate 6 (pagination-placeholder detection).
3
+ # Offline (no network): a bib entry whose pages are "e000--e000" with an "in press"
4
+ # note must get note="pagination_placeholder"; a normal entry must not. verify-refs
5
+ # stays manuscript-agnostic — it only flags; the P0/centrality call is /self-review's.
6
+ # Stdlib-only (python3).
7
+ set -u
8
+
9
+ HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
+ SCRIPT="$HERE/../scripts/verify_refs.py"
11
+ BIB="$HERE/fixtures/pagination_placeholder.bib"
12
+ ROOT="$(mktemp -d -t vrp_XXXX)"
13
+ trap 'rm -rf "$ROOT"' EXIT
14
+
15
+ fail=0
16
+ check() { local label="$1"; shift
17
+ if "$@" >/dev/null 2>&1; then printf ' PASS %s\n' "$label"
18
+ else printf ' FAIL %s\n' "$label"; fail=$((fail+1)); fi
19
+ }
20
+
21
+ [[ -f "$SCRIPT" ]] || { echo "ENV-ERR: script missing" >&2; exit 2; }
22
+
23
+ python3 "$SCRIPT" "$BIB" --project-root "$ROOT" --offline >/dev/null 2>&1
24
+ AUDIT="$ROOT/qc/reference_audit.json"
25
+ check "audit JSON written" test -s "$AUDIT"
26
+
27
+ assert_py() { python3 -c "
28
+ import json
29
+ d = json.load(open('$AUDIT'))
30
+ recs = {r['ref_id']: r for r in d['records']}
31
+ $1
32
+ "; }
33
+
34
+ check "placeholder entry flagged note=pagination_placeholder" \
35
+ assert_py "assert 'pagination_placeholder' in recs['methodref_inpress'].get('note',''), recs['methodref_inpress']"
36
+ check "placeholder entry status UNVERIFIED" \
37
+ assert_py "assert recs['methodref_inpress']['status']=='UNVERIFIED', recs['methodref_inpress']['status']"
38
+ check "normal entry NOT flagged" \
39
+ assert_py "assert 'pagination_placeholder' not in recs['normalref_2025'].get('note',''), recs['normalref_2025']"
40
+
41
+ echo "fail=$fail"; [[ "$fail" -eq 0 ]] && echo "ALL PASS" || echo "FAILURES: $fail"
42
+ exit "$fail"
@@ -0,0 +1,143 @@
1
+ ---
2
+ name: version-dataset
3
+ description: Dataset version control for research reproducibility. Builds a deterministic content-hash manifest of a dataset (file SHA-256 + tabular schema + per-column value hashes), verifies a later copy against it to detect drift (schema change, row-count change, value changes), and diffs two manifests. Use to prove an analysis ran on the intended data, lock a dataset version, or reproducibility-lock bundled demos.
4
+ triggers: version dataset, dataset version, data manifest, data hash, dataset drift, reproducibility lock, verify dataset, data provenance, did my data change, manifest.lock
5
+ tools: Read, Write, Edit, Bash, Grep, Glob
6
+ model: inherit
7
+ ---
8
+
9
+ # Version Dataset Skill
10
+
11
+ You help a medical researcher put a dataset under version control: fingerprint it,
12
+ detect when it changes, and lock a reproducible version. This guards the
13
+ data-integrity rule — an analysis must run on the data it claims to, with a fixed
14
+ seed — by making any drift between runs loud instead of silent.
15
+
16
+ ## Communication Rules
17
+
18
+ - Communicate with the user in their preferred language.
19
+ - Manifest fields, drift reports, and provenance notes are in English.
20
+
21
+ ## Philosophy
22
+
23
+ A dataset is an input to a result; if it changes silently, every downstream
24
+ number is suspect. This skill records a deterministic fingerprint (file SHA-256 +,
25
+ for tabular files, schema and per-column value hashes) so a later run can *prove*
26
+ the inputs are unchanged. It does not alter data, and it records nothing
27
+ non-deterministic (no timestamps unless explicitly passed), so the same data
28
+ always yields the same manifest.
29
+
30
+ ## Reference Files
31
+
32
+ - **Manifest schema + workflow**: `${CLAUDE_SKILL_DIR}/references/manifest_schema.md` —
33
+ the manifest.json structure, what each drift category means, and the non-
34
+ deterministic-artifact policy (PPTX/DOCX timestamps). Read before interpreting drift.
35
+
36
+ ## Deterministic Script
37
+
38
+ ```bash
39
+ # Build a manifest (record the analysis seed + provenance)
40
+ python "${CLAUDE_SKILL_DIR}/scripts/version_dataset.py" manifest data.csv \
41
+ --out manifest.json --seed 42 --provenance "KNHANES 2018 extract v1"
42
+
43
+ # Verify a later copy against it (CI / pre-analysis gate)
44
+ python "${CLAUDE_SKILL_DIR}/scripts/version_dataset.py" verify --manifest manifest.json --strict
45
+
46
+ # Compare two manifests (what changed between versions)
47
+ python "${CLAUDE_SKILL_DIR}/scripts/version_dataset.py" diff --old v1.json --new v2.json
48
+ ```
49
+
50
+ File hashing is stdlib-only; tabular schema/column hashing uses pandas when present.
51
+ `--ignore-cols` excludes volatile columns; `--base` makes manifest keys relative.
52
+
53
+ ## Workflow
54
+
55
+ ### Step 1: Lock the version (gate)
56
+
57
+ Build the manifest at the moment the dataset is frozen for analysis. **Gate:**
58
+ confirm with the user the seed and provenance note are correct before locking —
59
+ the manifest is the record they will cite as "this is the data the results came from."
60
+
61
+ ### Step 2: Verify before each run (gate)
62
+
63
+ Before re-running an analysis (or in CI), `verify --strict`. **Gate:** if drift is
64
+ reported, stop and show the user the drift report; do not proceed on changed data
65
+ without their explicit acknowledgement and a re-lock. Silent re-run on drifted data
66
+ is the failure this skill exists to prevent.
67
+
68
+ ### Step 3: Diff across versions
69
+
70
+ When a dataset is intentionally updated, `diff` the old and new manifests and
71
+ present the change set (added/removed/changed columns, row-count delta) so the
72
+ user can record what changed and re-lock. **Gate:** the user approves the new
73
+ version before it replaces the locked one.
74
+
75
+ ## Non-Deterministic Artifacts
76
+
77
+ Some outputs (PPTX/DOCX with embedded timestamps, figures with render metadata)
78
+ change byte-for-byte on every build even when the analysis is identical. Do not
79
+ put these under strict byte verification — manifest only the deterministic inputs
80
+ and tabular outputs (data files, result CSVs), or use `--ignore-cols` for volatile
81
+ columns. See references for the policy.
82
+
83
+ ## Scope Limitations
84
+
85
+ ### Supported
86
+ - Content-hash manifest of any file; schema + per-column hashes for tabular files
87
+ (CSV/TSV/Parquet/Stata/SAS/Excel).
88
+ - Drift verification and manifest-to-manifest diff.
89
+
90
+ ### NOT Supported
91
+ - Storing or transmitting the data itself (manifests hold hashes, not contents).
92
+ - Cleaning, profiling, or de-identifying — use `/clean-data`, `/generate-codebook`, `/deidentify`.
93
+ - Full pipeline-output reproducibility for non-deterministic binaries (see above).
94
+
95
+ ## Cross-Skill Integration
96
+
97
+ - **/generate-codebook** documents *what* is in the data; version-dataset locks *which version*.
98
+ - **/deidentify** should run before a manifest is shared (example values are not stored, but provenance notes may carry context).
99
+ - Demo reproducibility: each bundled `demo/*/` carries a `manifest.lock.json` (input data + deterministic result tables) that `verify --strict` checks.
100
+
101
+ ## Worked Example
102
+
103
+ Lock a freshly-frozen extract:
104
+
105
+ ```bash
106
+ python "${CLAUDE_SKILL_DIR}/scripts/version_dataset.py" manifest cohort.csv \
107
+ --out manifest.json --seed 42 --provenance "KNHANES 2018 extract, frozen 2026-05"
108
+ # -> {"files": 1, "out": "manifest.json"}
109
+ ```
110
+
111
+ Before re-running the analysis next month:
112
+
113
+ ```bash
114
+ python "${CLAUDE_SKILL_DIR}/scripts/version_dataset.py" verify --manifest manifest.json --strict
115
+ # OK: 1 file(s) match the manifest. (exit 0 — safe to run)
116
+ ```
117
+
118
+ If someone silently re-exported the data with three extra rows:
119
+
120
+ ```text
121
+ =========================================
122
+ Dataset Manifest Verify
123
+ =========================================
124
+ DRIFT (3):
125
+ ROW COUNT cohort.csv: 3457 -> 3460
126
+ CHANGED column cohort.csv:bmi
127
+ CHANGED column cohort.csv:hba1c
128
+ MANIFEST_DRIFT: dataset differs from manifest. (exit 1 — STOP)
129
+ ```
130
+
131
+ The analysis does **not** proceed: the result the manuscript will cite would no
132
+ longer match the locked data. The researcher reviews the drift, decides whether
133
+ the change is intended, and only then re-locks (`manifest` again) and records the
134
+ new provenance. A tabular file is compared on its **logical content** (schema +
135
+ per-column value hashes), not raw bytes — re-saving the same data, reordering
136
+ columns, or an `--ignore-cols` volatile timestamp column does not trip a false drift.
137
+
138
+ ## Anti-Hallucination
139
+
140
+ - Never claim a dataset is unchanged without running `verify`.
141
+ - Manifests record only observed hashes/schema; no provenance is invented — the
142
+ `provenance` note is user-supplied text.
143
+ - Report drift exactly as computed; do not downplay a changed column hash.
@@ -0,0 +1,72 @@
1
+ # Manifest Schema & Drift Categories
2
+
3
+ `version_dataset.py` produces a deterministic `manifest.json`. This documents the
4
+ structure, the drift categories `verify`/`diff` report, and the non-deterministic
5
+ artifact policy.
6
+
7
+ ## manifest.json schema (schema_version 1)
8
+
9
+ ```jsonc
10
+ {
11
+ "schema_version": 1,
12
+ "seed": 42, // analysis seed, user-supplied (null if none)
13
+ "provenance": "KNHANES 2018 v1", // user-supplied note (null if none)
14
+ "stamp": null, // omitted by default; set only via --stamp
15
+ "files": {
16
+ "data/cohort.csv": {
17
+ "sha256": "…", // byte hash of the file
18
+ "bytes": 12345,
19
+ "tabular": { // present only for CSV/TSV/Parquet/Stata/SAS/Excel
20
+ "n_rows": 200,
21
+ "n_cols": 9,
22
+ "column_hashes": {"age": "…", "bmi": "…"} // sha256 of the column's literal
23
+ // cell strings (row order)
24
+ }
25
+ }
26
+ }
27
+ }
28
+ ```
29
+
30
+ Determinism: no timestamp is written unless `--stamp` is passed, so the same bytes
31
+ always yield the same manifest. `--base` stores file keys relative to a directory
32
+ (portable manifests); `--ignore-cols` omits volatile columns from `column_hashes`.
33
+
34
+ ## Drift categories (verify / diff)
35
+
36
+ | Category | Meaning |
37
+ |---|---|
38
+ | `CHANGED bytes: F` | A **non-tabular** file's SHA-256 differs. Tabular files are compared on logical content (below), not raw bytes, since re-save / float formatting / an `--ignore-cols` column would otherwise produce spurious byte drift. |
39
+ | `MISSING file: F` | F was in the manifest but is absent now. |
40
+ | `UNEXPECTED file: F` | F is present now but not in the manifest. |
41
+ | `ROW COUNT F: a -> b` | Tabular row count changed. |
42
+ | `ADDED column F:c` / `REMOVED column F:c` | Schema change. |
43
+ | `CHANGED column F:c` | Column c's values (or dtype) changed, even if row count is stable. |
44
+
45
+ `verify --strict` exits non-zero if any drift is found; without `--strict` it
46
+ reports and exits 0 (for advisory runs).
47
+
48
+ ## Non-deterministic artifact policy
49
+
50
+ Byte-for-byte hashing is correct for data files and result tables (CSV), but
51
+ **not** for artifacts that embed timestamps or render metadata:
52
+
53
+ - **PPTX / DOCX** embed creation/modification timestamps → hash changes every build.
54
+ - **PDF / PNG figures** may embed render metadata.
55
+
56
+ Policy: manifest only the **deterministic** surface — input data files and
57
+ tabular result outputs. Do not put PPTX/DOCX/figure binaries under `verify --strict`.
58
+ For tabular files with a volatile column (e.g. an export timestamp column), use
59
+ `--ignore-cols <name>` so the rest of the table is still verified.
60
+
61
+ ## Demo reproducibility (codex Improvement E)
62
+
63
+ Each bundled `demo/<name>/manifest.lock.json` fingerprints the demo's input data
64
+ and deterministic result tables. Verify a demo reproduces with:
65
+
66
+ ```bash
67
+ python skills/version-dataset/scripts/version_dataset.py verify \
68
+ --manifest demo/01_wisconsin_bc/manifest.lock.json --base demo/01_wisconsin_bc --strict
69
+ ```
70
+
71
+ The locks intentionally exclude the manuscript `.docx` and `.pptx` (timestamped)
72
+ and cover the input dataset plus the `analysis/tables/*.csv` outputs.
@@ -0,0 +1,242 @@
1
+ #!/usr/bin/env python3
2
+ """Dataset version control: content-hash manifest, drift verification, and diff.
3
+
4
+ Records a deterministic fingerprint of a dataset (or any analysis artifact) so a
5
+ later run can prove the inputs/outputs are unchanged, and so two versions can be
6
+ compared. Serves two needs:
7
+
8
+ 1. Dataset versioning for research — detect that an extract changed between
9
+ analysis runs (schema drift, row-count change, value changes) instead of
10
+ silently re-running on different data (data-integrity rule, seed=42).
11
+ 2. Reproducibility lock for bundled demos — hash input data + deterministic
12
+ outputs into a manifest.lock so CI can verify a demo still reproduces.
13
+
14
+ Subcommands:
15
+ manifest <paths...> --out FILE build a manifest (file SHA-256 + tabular schema)
16
+ verify --manifest FILE [paths] recompute and compare; --strict exits non-zero on drift
17
+ diff --old A --new B compare two manifests
18
+
19
+ File-level SHA-256 works with the stdlib alone. Tabular schema/column hashing
20
+ uses pandas when available; without it, files are still hashed at the byte level.
21
+ Deterministic by design: no timestamps are written unless passed via --stamp.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import argparse
27
+ import hashlib
28
+ import json
29
+ import sys
30
+ from pathlib import Path
31
+
32
+ try:
33
+ import pandas as pd
34
+ _HAVE_PANDAS = True
35
+ except ImportError:
36
+ _HAVE_PANDAS = False
37
+
38
+ TABULAR = {".csv", ".tsv", ".parquet", ".pq", ".dta", ".sas7bdat", ".xlsx"}
39
+
40
+
41
+ def file_sha256(path: Path) -> str:
42
+ h = hashlib.sha256()
43
+ with open(path, "rb") as f:
44
+ for chunk in iter(lambda: f.read(1 << 20), b""):
45
+ h.update(chunk)
46
+ return h.hexdigest()
47
+
48
+
49
+ def _read_str(path: Path):
50
+ """Read a tabular file with every cell as its literal string.
51
+
52
+ Hashing must depend only on the data, not on the reader's environment. Native
53
+ dtype inference (int64 vs int32, object vs string, NaN coercion, float repr)
54
+ varies across pandas versions and platforms, which made manifests fail to
55
+ reproduce in CI. Reading CSV/TSV with dtype=str + keep_default_na=False
56
+ captures the exact textual content; other formats are read then stringified.
57
+ """
58
+ suf = path.suffix.lower()
59
+ if suf in (".csv", ".txt"):
60
+ return pd.read_csv(path, dtype=str, keep_default_na=False)
61
+ if suf == ".tsv":
62
+ return pd.read_csv(path, sep="\t", dtype=str, keep_default_na=False)
63
+ if suf in (".parquet", ".pq"):
64
+ df = pd.read_parquet(path)
65
+ elif suf == ".dta":
66
+ df = pd.read_stata(path)
67
+ elif suf == ".sas7bdat":
68
+ df = pd.read_sas(path)
69
+ elif suf == ".xlsx":
70
+ df = pd.read_excel(path, dtype=str)
71
+ else:
72
+ return None
73
+ return df.astype(str)
74
+
75
+
76
+ def column_hashes(path: Path, ignore_cols: set[str]) -> dict | None:
77
+ if not _HAVE_PANDAS or path.suffix.lower() not in TABULAR:
78
+ return None
79
+ try:
80
+ df = _read_str(path)
81
+ except Exception:
82
+ return None
83
+ if df is None:
84
+ return None
85
+ cols = {}
86
+ for c in df.columns:
87
+ if c in ignore_cols:
88
+ continue
89
+ # Cells are already canonical strings (environment-independent); the
90
+ # pandas dtype is deliberately NOT part of the digest.
91
+ payload = ("\x1e".join(df[c].tolist())).encode("utf-8")
92
+ cols[str(c)] = hashlib.sha256(payload).hexdigest()
93
+ return {
94
+ "n_rows": int(len(df)),
95
+ "n_cols": int(df.shape[1]),
96
+ "column_hashes": cols,
97
+ }
98
+
99
+
100
+ def build_entry(path: Path, ignore_cols: set[str]) -> dict:
101
+ entry = {"sha256": file_sha256(path), "bytes": path.stat().st_size}
102
+ tab = column_hashes(path, ignore_cols)
103
+ if tab is not None:
104
+ entry["tabular"] = tab
105
+ return entry
106
+
107
+
108
+ def cmd_manifest(args: argparse.Namespace) -> int:
109
+ ignore = set(args.ignore_cols or [])
110
+ files = sorted(Path(p) for p in args.paths)
111
+ missing = [str(p) for p in files if not p.is_file()]
112
+ if missing:
113
+ print(f"ERROR: not found: {', '.join(missing)}", file=sys.stderr)
114
+ return 2
115
+ base = Path(args.base).resolve() if args.base else None
116
+ entries = {}
117
+ for p in files:
118
+ key = p.resolve().relative_to(base).as_posix() if base else p.as_posix()
119
+ entries[key] = build_entry(p, ignore)
120
+ manifest = {
121
+ "schema_version": 1,
122
+ "seed": args.seed,
123
+ "provenance": args.provenance,
124
+ "files": entries,
125
+ }
126
+ if args.stamp:
127
+ manifest["stamp"] = args.stamp
128
+ out = Path(args.out)
129
+ out.write_text(json.dumps(manifest, indent=2, ensure_ascii=False), encoding="utf-8")
130
+ print(json.dumps({"files": len(entries), "out": str(out)}, indent=2))
131
+ return 0
132
+
133
+
134
+ def _compare(expected: dict, actual: dict) -> list[str]:
135
+ drift: list[str] = []
136
+ exp_files, act_files = expected.get("files", {}), actual.get("files", {})
137
+ for name in sorted(set(exp_files) | set(act_files)):
138
+ if name not in act_files:
139
+ drift.append(f"MISSING file: {name}")
140
+ continue
141
+ if name not in exp_files:
142
+ drift.append(f"UNEXPECTED file: {name}")
143
+ continue
144
+ e, a = exp_files[name], act_files[name]
145
+ et, at = e.get("tabular"), a.get("tabular")
146
+ if et and at:
147
+ # Tabular: compare LOGICAL content (schema + column hashes), not raw
148
+ # bytes. Byte hash is over-sensitive (re-save, float formatting, an
149
+ # --ignore-cols column) and the column hashes fully characterize the
150
+ # data; only flag a byte change for non-tabular files below.
151
+ if et["n_rows"] != at["n_rows"]:
152
+ drift.append(f"ROW COUNT {name}: {et['n_rows']} -> {at['n_rows']}")
153
+ ec, ac = set(et["column_hashes"]), set(at["column_hashes"])
154
+ for col in sorted(ec - ac):
155
+ drift.append(f"REMOVED column {name}:{col}")
156
+ for col in sorted(ac - ec):
157
+ drift.append(f"ADDED column {name}:{col}")
158
+ for col in sorted(ec & ac):
159
+ if et["column_hashes"][col] != at["column_hashes"][col]:
160
+ drift.append(f"CHANGED column {name}:{col}")
161
+ else:
162
+ # Non-tabular (or no longer readable as tabular): byte hash is the
163
+ # only available signal.
164
+ if e.get("sha256") != a.get("sha256"):
165
+ drift.append(f"CHANGED bytes: {name}")
166
+ return drift
167
+
168
+
169
+ def cmd_verify(args: argparse.Namespace) -> int:
170
+ expected = json.loads(Path(args.manifest).read_text(encoding="utf-8"))
171
+ ignore = set(args.ignore_cols or [])
172
+ base = Path(args.base).resolve() if args.base else None
173
+ actual_files = {}
174
+ for name in expected.get("files", {}):
175
+ p = (base / name) if base else Path(name)
176
+ if not p.is_file():
177
+ actual_files[name] = {"sha256": None}
178
+ continue
179
+ actual_files[name] = build_entry(p, ignore)
180
+ actual = {"files": actual_files}
181
+ drift = _compare(expected, actual)
182
+ print("=" * 41)
183
+ print(" Dataset Manifest Verify")
184
+ print("=" * 41)
185
+ if not drift:
186
+ print(f"OK: {len(expected.get('files', {}))} file(s) match the manifest.")
187
+ return 0
188
+ print(f"DRIFT ({len(drift)}):")
189
+ for d in drift:
190
+ print(f" {d}")
191
+ if args.strict:
192
+ print("\nMANIFEST_DRIFT: dataset differs from manifest.", file=sys.stderr)
193
+ return 1
194
+ print("\n(non-strict: reported only; rerun with --strict to fail.)")
195
+ return 0
196
+
197
+
198
+ def cmd_diff(args: argparse.Namespace) -> int:
199
+ old = json.loads(Path(args.old).read_text(encoding="utf-8"))
200
+ new = json.loads(Path(args.new).read_text(encoding="utf-8"))
201
+ drift = _compare(old, new)
202
+ if not drift:
203
+ print("No differences between manifests.")
204
+ return 0
205
+ print(f"Differences ({len(drift)}):")
206
+ for d in drift:
207
+ print(f" {d}")
208
+ return 0
209
+
210
+
211
+ def main() -> int:
212
+ ap = argparse.ArgumentParser(description="Dataset version control: manifest / verify / diff.")
213
+ sub = ap.add_subparsers(dest="cmd", required=True)
214
+
215
+ m = sub.add_parser("manifest", help="Build a content-hash manifest.")
216
+ m.add_argument("paths", nargs="+", help="Data/artifact files to fingerprint.")
217
+ m.add_argument("--out", default="manifest.json", help="Output manifest path.")
218
+ m.add_argument("--base", help="Base dir; manifest keys are paths relative to it.")
219
+ m.add_argument("--seed", type=int, default=None, help="Analysis seed to record (e.g. 42).")
220
+ m.add_argument("--provenance", default=None, help="Free-text provenance note.")
221
+ m.add_argument("--stamp", default=None, help="Optional timestamp string to record (omitted by default for determinism).")
222
+ m.add_argument("--ignore-cols", nargs="*", help="Column names excluded from hashing.")
223
+ m.set_defaults(func=cmd_manifest)
224
+
225
+ v = sub.add_parser("verify", help="Recompute and compare against a manifest.")
226
+ v.add_argument("--manifest", required=True)
227
+ v.add_argument("--base", help="Base dir for resolving manifest file keys.")
228
+ v.add_argument("--ignore-cols", nargs="*")
229
+ v.add_argument("--strict", action="store_true", help="Exit non-zero on any drift.")
230
+ v.set_defaults(func=cmd_verify)
231
+
232
+ d = sub.add_parser("diff", help="Compare two manifests.")
233
+ d.add_argument("--old", required=True)
234
+ d.add_argument("--new", required=True)
235
+ d.set_defaults(func=cmd_diff)
236
+
237
+ args = ap.parse_args()
238
+ return args.func(args)
239
+
240
+
241
+ if __name__ == "__main__":
242
+ sys.exit(main())
@@ -0,0 +1,35 @@
1
+ schema_version: 2
2
+ name: version-dataset
3
+ layer: A
4
+ owner_domain: dataset_versioning
5
+
6
+ when_to_use: "Version-control a dataset with SHA-256 manifests for reproducibility."
7
+ when_NOT_to_use: "Cleaning the data (use clean-data); documenting variables (use generate-codebook)."
8
+
9
+ inputs:
10
+ - "analysis dataset"
11
+ - "prior manifest (optional)"
12
+ outputs:
13
+ - "dataset manifest.lock with SHA-256 hashes"
14
+ deterministic_scripts:
15
+ - scripts/version_dataset.py
16
+ side_effects:
17
+ - writes_manifest_artifacts
18
+ downstream_consumers:
19
+ - analyze-stats
20
+ - self-review
21
+ forbidden_actions:
22
+ - alter_data_to_match_a_manifest
23
+ - report_verification_pass_without_running_verify
24
+
25
+ # v2.1 quality card
26
+ purpose: "Pin a dataset's contents with SHA-256 manifests so analyses are reproducible and drift is detectable."
27
+ safety_boundaries:
28
+ - "Hashes are computed from the actual files; a manifest is never edited to force a pass."
29
+ - "Verification is deterministic and re-runnable in CI."
30
+ known_limitations:
31
+ - "Detects byte-level drift, not semantic correctness of the data."
32
+ - "A manifest is only as trustworthy as the moment it was locked."
33
+ validation_commands:
34
+ - "python3 scripts/version_dataset.py verify --manifest <lock> --strict"
35
+ evidence_surface: ci_validator
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env bash
2
+ # Regression tests for version-dataset/scripts/version_dataset.py.
3
+ # Self-contained: builds synthetic CSVs (no committed data).
4
+
5
+ set -uo pipefail
6
+
7
+ REPO_ROOT="$(cd "$(dirname "$0")/../../.." && pwd)"
8
+ VS="$REPO_ROOT/skills/version-dataset/scripts/version_dataset.py"
9
+ TMP="$(mktemp -d -t versionds.XXXXXX)"
10
+ trap 'rm -rf "$TMP"' EXIT
11
+
12
+ [[ -f "$VS" ]] || { echo "ENV-ERR: script missing" >&2; exit 2; }
13
+
14
+ fail=0; ran=0
15
+ check() {
16
+ local label="$1" expected="$2" actual="$3"
17
+ ran=$((ran+1))
18
+ if [[ "$expected" == "$actual" ]]; then printf ' PASS %-48s %s\n' "$label" "$actual"
19
+ else printf ' FAIL %-48s expected=%s actual=%s\n' "$label" "$expected" "$actual"; fail=$((fail+1)); fi
20
+ }
21
+ ec() { "$@" >/dev/null 2>&1; echo $?; }
22
+
23
+ printf 'id,age,grp\n1,50,A\n2,61,B\n3,47,A\n' > "$TMP/d.csv"
24
+
25
+ # manifest builds (exit 0)
26
+ check "manifest build" 0 "$(ec python3 "$VS" manifest "$TMP/d.csv" --out "$TMP/m.json" --seed 42 --provenance test)"
27
+ check "manifest file written" 0 "$([[ -f "$TMP/m.json" ]] && echo 0 || echo 1)"
28
+
29
+ # verify clean (exit 0)
30
+ check "verify clean --strict" 0 "$(ec python3 "$VS" verify --manifest "$TMP/m.json" --strict)"
31
+
32
+ # mutate a value -> drift (exit 1) + CHANGED column reported
33
+ printf 'id,age,grp\n1,50,A\n2,99,B\n3,47,A\n' > "$TMP/d.csv"
34
+ check "verify value-change --strict" 1 "$(ec python3 "$VS" verify --manifest "$TMP/m.json" --strict)"
35
+ out="$(python3 "$VS" verify --manifest "$TMP/m.json" 2>&1)"
36
+ check "drift reports CHANGED column age" 0 "$([[ "$out" == *"CHANGED column"*":age"* ]] && echo 0 || echo 1)"
37
+ check "non-strict drift exits 0" 0 "$(ec python3 "$VS" verify --manifest "$TMP/m.json")"
38
+
39
+ # add a row -> new manifest + diff reports ROW COUNT
40
+ printf 'id,age,grp\n1,50,A\n2,99,B\n3,47,A\n4,55,C\n' > "$TMP/d.csv"
41
+ python3 "$VS" manifest "$TMP/d.csv" --out "$TMP/m2.json" >/dev/null 2>&1
42
+ dout="$(python3 "$VS" diff --old "$TMP/m.json" --new "$TMP/m2.json" 2>&1)"
43
+ check "diff reports ROW COUNT 3 -> 4" 0 "$([[ "$dout" == *"ROW COUNT"*"3 -> 4"* ]] && echo 0 || echo 1)"
44
+
45
+ # --ignore-cols excludes a volatile column from hashing
46
+ printf 'id,age,ts\n1,50,t1\n2,61,t2\n' > "$TMP/v.csv"
47
+ python3 "$VS" manifest "$TMP/v.csv" --out "$TMP/vm.json" --ignore-cols ts >/dev/null 2>&1
48
+ printf 'id,age,ts\n1,50,t9\n2,61,t8\n' > "$TMP/v.csv" # only ts changes
49
+ check "verify ignores volatile col" 0 "$(ec python3 "$VS" verify --manifest "$TMP/vm.json" --ignore-cols ts --strict)"
50
+
51
+ printf '\n%d/%d checks passed\n' "$((ran-fail))" "$ran"
52
+ [[ "$fail" -eq 0 ]] || exit 1