medsci-skills 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +50 -0
- package/README.md +602 -0
- package/README_FIRST.md +27 -0
- package/bin/medsci-skills.js +159 -0
- package/installers/install-macos.command +19 -0
- package/installers/install-windows.cmd +26 -0
- package/installers/install-windows.ps1 +17 -0
- package/installers/install.py +218 -0
- package/metadata/skills_catalog.json +452 -0
- package/package.json +48 -0
- package/skills/academic-aio/SKILL.md +408 -0
- package/skills/academic-aio/references/case_studies/kjr_mllm_2025.md +82 -0
- package/skills/academic-aio/references/checklists/AIO_GENERAL.md +354 -0
- package/skills/academic-aio/references/journal_summarybox_templates.yaml +126 -0
- package/skills/academic-aio/references/oac_funding_checklist.yaml +129 -0
- package/skills/academic-aio/references/reporting_guideline_mapping.md +39 -0
- package/skills/academic-aio/references/schema_markup_templates/CodeRepository.jsonld +32 -0
- package/skills/academic-aio/references/schema_markup_templates/Dataset.jsonld +36 -0
- package/skills/academic-aio/references/schema_markup_templates/Person.jsonld +30 -0
- package/skills/academic-aio/references/schema_markup_templates/README.md +43 -0
- package/skills/academic-aio/references/schema_markup_templates/ScholarlyArticle.jsonld +55 -0
- package/skills/academic-aio/scripts/batch_metadata_audit.py +169 -0
- package/skills/academic-aio/scripts/validate_schema.py +118 -0
- package/skills/academic-aio/skill.yml +36 -0
- package/skills/academic-aio/templates/aio_audit_checklist.md.j2 +108 -0
- package/skills/add-journal/SKILL.md +482 -0
- package/skills/add-journal/skill.yml +33 -0
- package/skills/analyze-stats/SKILL.md +598 -0
- package/skills/analyze-stats/references/analysis_guides/missing_data.md +109 -0
- package/skills/analyze-stats/references/analysis_guides/nhis_icd10_mapping.md +247 -0
- package/skills/analyze-stats/references/analysis_guides/propensity_score.md +132 -0
- package/skills/analyze-stats/references/analysis_guides/regression.md +115 -0
- package/skills/analyze-stats/references/analysis_guides/repeated_measures.md +160 -0
- package/skills/analyze-stats/references/analysis_guides/survey_weighted.md +366 -0
- package/skills/analyze-stats/references/analysis_guides/test_selection.md +86 -0
- package/skills/analyze-stats/references/style/figure_style.mplstyle +69 -0
- package/skills/analyze-stats/references/style/theme_publication.R +147 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/ajr.yaml +51 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/european_radiology.yaml +55 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/jama.yaml +66 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/lancet.yaml +57 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/nejm.yaml +51 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/radiology.yaml +66 -0
- package/skills/analyze-stats/references/table-standards/table-standards.md +287 -0
- package/skills/analyze-stats/references/table-standards/table-types/diagnostic_accuracy.md +36 -0
- package/skills/analyze-stats/references/table-standards/table-types/meta_analysis.md +58 -0
- package/skills/analyze-stats/references/table-standards/table-types/model_comparison.md +36 -0
- package/skills/analyze-stats/references/table-standards/table-types/regression_results.md +50 -0
- package/skills/analyze-stats/references/table-standards/table-types/table1_demographics.md +51 -0
- package/skills/analyze-stats/references/table-standards/tool-comparison.md +79 -0
- package/skills/analyze-stats/references/templates/agreement_analysis.py +436 -0
- package/skills/analyze-stats/references/templates/dca_plot.R +237 -0
- package/skills/analyze-stats/references/templates/diagnostic_accuracy.py +401 -0
- package/skills/analyze-stats/references/templates/dta_meta_analysis.R +384 -0
- package/skills/analyze-stats/references/templates/forest_plot.py +412 -0
- package/skills/analyze-stats/references/templates/likert_summary.py +356 -0
- package/skills/analyze-stats/references/templates/meta_analysis.R +365 -0
- package/skills/analyze-stats/references/templates/propensity_score.py +478 -0
- package/skills/analyze-stats/references/templates/regression.py +425 -0
- package/skills/analyze-stats/references/templates/repeated_measures.py +434 -0
- package/skills/analyze-stats/references/templates/sample_size.R +382 -0
- package/skills/analyze-stats/references/templates/survey_weighted_analysis.py +411 -0
- package/skills/analyze-stats/references/templates/survival_analysis.py +325 -0
- package/skills/analyze-stats/references/templates/table1_demographics.py +287 -0
- package/skills/analyze-stats/scripts/check_generated_code.py +335 -0
- package/skills/analyze-stats/skill.yml +38 -0
- package/skills/analyze-stats/tests/fixtures/gen_bad.R +16 -0
- package/skills/analyze-stats/tests/fixtures/gen_bad.py +24 -0
- package/skills/analyze-stats/tests/fixtures/gen_clean.py +21 -0
- package/skills/analyze-stats/tests/test_generated_code.sh +59 -0
- package/skills/analyze-stats/tests/test_survival_template.sh +53 -0
- package/skills/author-strategy/SKILL.md +117 -0
- package/skills/author-strategy/analyze_patterns.py +303 -0
- package/skills/author-strategy/fetch_pubmed.py +374 -0
- package/skills/author-strategy/skill.yml +34 -0
- package/skills/batch-cohort/SKILL.md +223 -0
- package/skills/batch-cohort/references/base_template_knhanes.R +210 -0
- package/skills/batch-cohort/references/batch_template_generator.R +222 -0
- package/skills/batch-cohort/references/variable_coding_registry.md +136 -0
- package/skills/batch-cohort/skill.yml +35 -0
- package/skills/calc-sample-size/SKILL.md +491 -0
- package/skills/calc-sample-size/references/formulas.md +655 -0
- package/skills/calc-sample-size/references/observational_cohort.md +49 -0
- package/skills/calc-sample-size/skill.yml +51 -0
- package/skills/check-reporting/SKILL.md +534 -0
- package/skills/check-reporting/references/LICENSES.md +41 -0
- package/skills/check-reporting/references/checklists/AMSTAR2.md +54 -0
- package/skills/check-reporting/references/checklists/ARRIVE_2.md +234 -0
- package/skills/check-reporting/references/checklists/CARE.md +102 -0
- package/skills/check-reporting/references/checklists/CLAIM_2024.md +128 -0
- package/skills/check-reporting/references/checklists/CLEAR.md +113 -0
- package/skills/check-reporting/references/checklists/CONSORT.md +86 -0
- package/skills/check-reporting/references/checklists/COSMIN_RoB.md +136 -0
- package/skills/check-reporting/references/checklists/GRRAS.md +61 -0
- package/skills/check-reporting/references/checklists/MI_CLEAR_LLM.md +167 -0
- package/skills/check-reporting/references/checklists/MOOSE.md +85 -0
- package/skills/check-reporting/references/checklists/NOS.md +88 -0
- package/skills/check-reporting/references/checklists/PRISMA_2020.md +135 -0
- package/skills/check-reporting/references/checklists/PRISMA_DTA.md +36 -0
- package/skills/check-reporting/references/checklists/PRISMA_P.md +56 -0
- package/skills/check-reporting/references/checklists/PROBAST.md +75 -0
- package/skills/check-reporting/references/checklists/PROBAST_AI.md +130 -0
- package/skills/check-reporting/references/checklists/QUADAS2.md +77 -0
- package/skills/check-reporting/references/checklists/QUADAS_C.md +131 -0
- package/skills/check-reporting/references/checklists/ROBINS_E.md +179 -0
- package/skills/check-reporting/references/checklists/ROBINS_I.md +87 -0
- package/skills/check-reporting/references/checklists/ROBIS.md +114 -0
- package/skills/check-reporting/references/checklists/ROB_ME.md +126 -0
- package/skills/check-reporting/references/checklists/RoB2.md +79 -0
- package/skills/check-reporting/references/checklists/RoB_NMA.md +96 -0
- package/skills/check-reporting/references/checklists/SPIRIT.md +112 -0
- package/skills/check-reporting/references/checklists/SQUIRE_2.md +68 -0
- package/skills/check-reporting/references/checklists/STARD.md +129 -0
- package/skills/check-reporting/references/checklists/STARD_AI.md +211 -0
- package/skills/check-reporting/references/checklists/STROBE.md +80 -0
- package/skills/check-reporting/references/checklists/SWiM.md +33 -0
- package/skills/check-reporting/references/checklists/TRIPOD.md +157 -0
- package/skills/check-reporting/references/checklists/TRIPOD_AI.md +140 -0
- package/skills/check-reporting/references/step4c_registration_timing.md +93 -0
- package/skills/check-reporting/references/step4d_prisma_figure_audit.md +137 -0
- package/skills/check-reporting/scripts/check_checklist_exists.py +183 -0
- package/skills/check-reporting/scripts/check_checklist_version.py +168 -0
- package/skills/check-reporting/scripts/check_framework_naming.py +206 -0
- package/skills/check-reporting/scripts/check_prisma_figure.py +209 -0
- package/skills/check-reporting/scripts/prisma_cascade_check.py +274 -0
- package/skills/check-reporting/skill.yml +41 -0
- package/skills/check-reporting/tests/fixtures/framework_bad.md +8 -0
- package/skills/check-reporting/tests/fixtures/framework_clean.md +7 -0
- package/skills/check-reporting/tests/test_checklist_fail_fast.sh +77 -0
- package/skills/check-reporting/tests/test_checklist_version.sh +72 -0
- package/skills/check-reporting/tests/test_framework_naming.sh +45 -0
- package/skills/check-reporting/tests/test_prisma_cascade.sh +104 -0
- package/skills/clean-data/SKILL.md +180 -0
- package/skills/clean-data/references/cleaning_patterns.md +299 -0
- package/skills/clean-data/references/profiling_template.py +304 -0
- package/skills/clean-data/scripts/check_structural_zero.py +174 -0
- package/skills/clean-data/skill.yml +35 -0
- package/skills/clean-data/tests/fixtures/smoking.csv +8 -0
- package/skills/clean-data/tests/test_structural_zero.sh +49 -0
- package/skills/cross-national/SKILL.md +264 -0
- package/skills/cross-national/skill.yml +37 -0
- package/skills/define-variables/SKILL.md +146 -0
- package/skills/define-variables/references/common_definitions.md +190 -0
- package/skills/define-variables/skill.yml +34 -0
- package/skills/define-variables/templates/variable_operationalization.md +64 -0
- package/skills/deidentify/SKILL.md +203 -0
- package/skills/deidentify/deidentify.py +1224 -0
- package/skills/deidentify/locales/_template.json +45 -0
- package/skills/deidentify/locales/au.json +43 -0
- package/skills/deidentify/locales/ca.json +44 -0
- package/skills/deidentify/locales/cn.json +47 -0
- package/skills/deidentify/locales/de.json +48 -0
- package/skills/deidentify/locales/fr.json +48 -0
- package/skills/deidentify/locales/in.json +48 -0
- package/skills/deidentify/locales/jp.json +48 -0
- package/skills/deidentify/locales/kr.json +48 -0
- package/skills/deidentify/locales/uk.json +45 -0
- package/skills/deidentify/locales/us.json +43 -0
- package/skills/deidentify/references/date_shift_guide.md +82 -0
- package/skills/deidentify/references/hipaa_18_identifiers.md +48 -0
- package/skills/deidentify/references/korean_phi_patterns.md +135 -0
- package/skills/deidentify/skill.yml +43 -0
- package/skills/deidentify/tests/README.md +26 -0
- package/skills/deidentify/tests/test_clean.csv +16 -0
- package/skills/deidentify/tests/test_edge_cases.csv +11 -0
- package/skills/deidentify/tests/test_phi_korean.csv +11 -0
- package/skills/design-ai-benchmarking/SKILL.md +214 -0
- package/skills/design-ai-benchmarking/references/benchmark_export_schema.json +69 -0
- package/skills/design-ai-benchmarking/references/elicitation_rubric_template.md +37 -0
- package/skills/design-ai-benchmarking/skill.yml +38 -0
- package/skills/design-study/SKILL.md +298 -0
- package/skills/design-study/skill.yml +33 -0
- package/skills/fill-icmje-coi/SKILL.md +216 -0
- package/skills/fill-icmje-coi/scripts/fill_icmje_coi.py +140 -0
- package/skills/fill-icmje-coi/skill.yml +35 -0
- package/skills/fill-icmje-coi/templates/icmje_coi_seed_synthetic.docx +0 -0
- package/skills/fill-protocol/SKILL.md +248 -0
- package/skills/fill-protocol/examples/example_irb_template.yaml +53 -0
- package/skills/fill-protocol/references/best_practices.md +121 -0
- package/skills/fill-protocol/scripts/doc_to_docx.py +111 -0
- package/skills/fill-protocol/scripts/fill_form.py +611 -0
- package/skills/fill-protocol/scripts/inspect_template.py +61 -0
- package/skills/fill-protocol/setup.sh +162 -0
- package/skills/fill-protocol/skill.yml +37 -0
- package/skills/find-cohort-gap/SKILL.md +309 -0
- package/skills/find-cohort-gap/references/cohort_profile_template.md +93 -0
- package/skills/find-cohort-gap/references/onepager_template.md +84 -0
- package/skills/find-cohort-gap/references/pattern_scoring_rubric.md +169 -0
- package/skills/find-cohort-gap/references/saturation_query_templates.md +143 -0
- package/skills/find-cohort-gap/skill.yml +35 -0
- package/skills/find-journal/POLICY.md +87 -0
- package/skills/find-journal/SKILL.md +340 -0
- package/skills/find-journal/references/journal_profiles/AJNR.md +29 -0
- package/skills/find-journal/references/journal_profiles/AJR.md +30 -0
- package/skills/find-journal/references/journal_profiles/Abdominal_Radiology.md +30 -0
- package/skills/find-journal/references/journal_profiles/Academic_Radiology.md +30 -0
- package/skills/find-journal/references/journal_profiles/Annals_of_Internal_Medicine.md +33 -0
- package/skills/find-journal/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +28 -0
- package/skills/find-journal/references/journal_profiles/BMC_Medicine.md +31 -0
- package/skills/find-journal/references/journal_profiles/British_Journal_of_Radiology.md +39 -0
- package/skills/find-journal/references/journal_profiles/CVIR.md +30 -0
- package/skills/find-journal/references/journal_profiles/Chest.md +39 -0
- package/skills/find-journal/references/journal_profiles/Clinical_Radiology.md +30 -0
- package/skills/find-journal/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +32 -0
- package/skills/find-journal/references/journal_profiles/Diabetes_Metabolism_Journal.md +36 -0
- package/skills/find-journal/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +32 -0
- package/skills/find-journal/references/journal_profiles/Endocrinology_and_Metabolism.md +37 -0
- package/skills/find-journal/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +39 -0
- package/skills/find-journal/references/journal_profiles/European_Radiology.md +29 -0
- package/skills/find-journal/references/journal_profiles/Hepatology_Communications.md +40 -0
- package/skills/find-journal/references/journal_profiles/Hepatology_International.md +37 -0
- package/skills/find-journal/references/journal_profiles/IEEE_JBHI.md +28 -0
- package/skills/find-journal/references/journal_profiles/IEEE_TMI.md +28 -0
- package/skills/find-journal/references/journal_profiles/INSI.md +29 -0
- package/skills/find-journal/references/journal_profiles/Investigative_Radiology.md +25 -0
- package/skills/find-journal/references/journal_profiles/JACC_Advances.md +41 -0
- package/skills/find-journal/references/journal_profiles/JACC_Asia.md +30 -0
- package/skills/find-journal/references/journal_profiles/JACR.md +28 -0
- package/skills/find-journal/references/journal_profiles/JAMA.md +40 -0
- package/skills/find-journal/references/journal_profiles/JAMA_Network_Open.md +30 -0
- package/skills/find-journal/references/journal_profiles/JCSM.md +39 -0
- package/skills/find-journal/references/journal_profiles/JKMS.md +32 -0
- package/skills/find-journal/references/journal_profiles/JMIR.md +29 -0
- package/skills/find-journal/references/journal_profiles/JMIR_Medical_Education.md +29 -0
- package/skills/find-journal/references/journal_profiles/JNIS.md +35 -0
- package/skills/find-journal/references/journal_profiles/JVIR.md +31 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Biomedical_Informatics.md +29 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +40 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Magnetic_Resonance_Imaging.md +30 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Nuclear_Medicine.md +31 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Stroke.md +32 -0
- package/skills/find-journal/references/journal_profiles/KJR.md +38 -0
- package/skills/find-journal/references/journal_profiles/Korean_Circulation_Journal.md +38 -0
- package/skills/find-journal/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +36 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Diabetes_and_Endocrinology.md +40 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +49 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Infectious_Diseases.md +38 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Neurology.md +39 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Oncology.md +40 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Psychiatry.md +38 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Public_Health.md +30 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Respiratory_Medicine.md +39 -0
- package/skills/find-journal/references/journal_profiles/Liver_International.md +33 -0
- package/skills/find-journal/references/journal_profiles/Medical_Image_Analysis.md +28 -0
- package/skills/find-journal/references/journal_profiles/NEJM.md +33 -0
- package/skills/find-journal/references/journal_profiles/Nature_Machine_Intelligence.md +31 -0
- package/skills/find-journal/references/journal_profiles/Nature_Medicine.md +39 -0
- package/skills/find-journal/references/journal_profiles/Neuroradiology.md +31 -0
- package/skills/find-journal/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +39 -0
- package/skills/find-journal/references/journal_profiles/PLOS_Medicine.md +32 -0
- package/skills/find-journal/references/journal_profiles/RYAI.md +28 -0
- package/skills/find-journal/references/journal_profiles/Radiology.md +29 -0
- package/skills/find-journal/references/journal_profiles/Skeletal_Radiology.md +31 -0
- package/skills/find-journal/references/journal_profiles/Stroke.md +37 -0
- package/skills/find-journal/references/journal_profiles/The_BMJ.md +31 -0
- package/skills/find-journal/references/journal_profiles/The_Lancet.md +31 -0
- package/skills/find-journal/references/journal_profiles/The_Lancet_Digital_Health.md +29 -0
- package/skills/find-journal/references/journal_profiles/World_Journal_of_Hepatology.md +53 -0
- package/skills/find-journal/references/journal_profiles/npj_Digital_Medicine.md +29 -0
- package/skills/find-journal/skill.yml +34 -0
- package/skills/fulltext-retrieval/SKILL.md +174 -0
- package/skills/fulltext-retrieval/fetch_oa.py +433 -0
- package/skills/fulltext-retrieval/pdf_to_md.py +160 -0
- package/skills/fulltext-retrieval/skill.yml +41 -0
- package/skills/generate-codebook/SKILL.md +155 -0
- package/skills/generate-codebook/references/codebook_schema.md +76 -0
- package/skills/generate-codebook/scripts/generate_codebook.py +278 -0
- package/skills/generate-codebook/skill.yml +35 -0
- package/skills/generate-codebook/tests/test_generate_codebook.sh +76 -0
- package/skills/grant-builder/SKILL.md +251 -0
- package/skills/grant-builder/skill.yml +34 -0
- package/skills/humanize/SKILL.md +251 -0
- package/skills/humanize/references/ai_patterns.md +571 -0
- package/skills/humanize/skill.yml +33 -0
- package/skills/intake-project/SKILL.md +264 -0
- package/skills/intake-project/skill.yml +34 -0
- package/skills/lit-sync/SKILL.md +448 -0
- package/skills/lit-sync/references/locale/ko/note_templates.md +110 -0
- package/skills/lit-sync/skill.yml +52 -0
- package/skills/lit-sync/tests/test_poll_logic.sh +92 -0
- package/skills/ma-scout/SKILL.md +640 -0
- package/skills/ma-scout/references/project_readme_template.md +95 -0
- package/skills/ma-scout/references/project_readme_template_ko.md +82 -0
- package/skills/ma-scout/skill.yml +33 -0
- package/skills/make-figures/SKILL.md +957 -0
- package/skills/make-figures/references/critic_rubrics/data_plot.md +166 -0
- package/skills/make-figures/references/critic_rubrics/flow_diagram.md +169 -0
- package/skills/make-figures/references/design_principles.md +181 -0
- package/skills/make-figures/references/exemplar_diagrams/README.md +65 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/README.md +15 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/template_input.yaml +37 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/template_output.pdf +0 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/template_output.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/template_output_600.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/other/other_02.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/other/other_02.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/other/other_02_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/README.md +15 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/README.md +15 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/template_input.yaml +47 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.pdf +0 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/template_output_600.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/README.md +15 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/template_input.yaml +40 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/template_output.pdf +0 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/template_output.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/template_output_600.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_input.yaml +43 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_input_pptx.yaml +43 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pdf +0 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pptx +0 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_output_600.png +0 -0
- package/skills/make-figures/references/figure_specs.md +291 -0
- package/skills/make-figures/references/flow_diagram_lessons.md +164 -0
- package/skills/make-figures/references/jacc_central_illustration_principles.md +91 -0
- package/skills/make-figures/references/medical_illustration_sources.md +98 -0
- package/skills/make-figures/references/pipeline_concepts_medical_ai.md +240 -0
- package/skills/make-figures/references/reporting_guideline_figure_map.md +104 -0
- package/skills/make-figures/references/visual_abstract_templates/european_radiology.pptx +0 -0
- package/skills/make-figures/references/visual_abstract_templates/jacc_central_illustration.pptx +0 -0
- package/skills/make-figures/references/visual_abstract_templates/medsci_default.pptx +0 -0
- package/skills/make-figures/references/visual_abstract_templates/template_guide.md +114 -0
- package/skills/make-figures/scripts/build_jacc_template.py +77 -0
- package/skills/make-figures/scripts/build_prisma2020_template.py +371 -0
- package/skills/make-figures/scripts/build_strobe_template.py +351 -0
- package/skills/make-figures/scripts/critic_figure.py +264 -0
- package/skills/make-figures/scripts/derive_figure_legend_counts.py +138 -0
- package/skills/make-figures/scripts/extract_exemplar_from_pdf.py +186 -0
- package/skills/make-figures/scripts/fetch_official_templates.sh +88 -0
- package/skills/make-figures/scripts/fill_prisma_template.py +142 -0
- package/skills/make-figures/scripts/generate_flow_diagram.R +133 -0
- package/skills/make-figures/scripts/generate_image.py +99 -0
- package/skills/make-figures/scripts/generate_visual_abstract.py +438 -0
- package/skills/make-figures/scripts/validate_pptx_mac_compat.py +233 -0
- package/skills/make-figures/skill.yml +52 -0
- package/skills/make-figures/templates/official/NOTES.md +62 -0
- package/skills/make-figures/templates/official/consort2010/CONSORT_2025_editable_checklist.docx +0 -0
- package/skills/make-figures/templates/official/consort2010/CONSORT_2025_flow_diagram.docx +0 -0
- package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v1.pptx +0 -0
- package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v2.pptx +0 -0
- package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_updated_v2.pptx +0 -0
- package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_editable_checklist.docx +0 -0
- package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_participant_timeline.docx +0 -0
- package/skills/make-figures/templates/official/stard2015/STARD_2015_checklist.docx +0 -0
- package/skills/make-figures/templates/official/stard2015/STARD_2015_flow_diagram.pdf +0 -0
- package/skills/make-figures/tests/fixtures/figure1_flow.yaml +8 -0
- package/skills/make-figures/tests/fixtures/manuscript_ok.md +9 -0
- package/skills/make-figures/tests/fixtures/manuscript_stale.md +4 -0
- package/skills/make-figures/tests/test_legend_reconcile.sh +36 -0
- package/skills/manage-project/SKILL.md +358 -0
- package/skills/manage-project/references/pre_submission_checklist.md +53 -0
- package/skills/manage-project/references/project_state_template.json +37 -0
- package/skills/manage-project/references/scaffold_templates.md +118 -0
- package/skills/manage-project/references/status_output_format.md +44 -0
- package/skills/manage-project/references/timeline_example.md +20 -0
- package/skills/manage-project/skill.yml +36 -0
- package/skills/manage-project/templates/SSOT.yaml.template +41 -0
- package/skills/manage-refs/LICENSE.zotero-mcp +21 -0
- package/skills/manage-refs/NOTICE.md +29 -0
- package/skills/manage-refs/SKILL.md +289 -0
- package/skills/manage-refs/citation_styles/README.md +40 -0
- package/skills/manage-refs/citation_styles/american-journal-of-roentgenology.csl +211 -0
- package/skills/manage-refs/citation_styles/cardiovascular-and-interventional-radiology.csl +19 -0
- package/skills/manage-refs/citation_styles/european-radiology.csl +19 -0
- package/skills/manage-refs/citation_styles/journal-of-cachexia-sarcopenia-and-muscle.csl +150 -0
- package/skills/manage-refs/citation_styles/journal-of-korean-medical-science-strict.csl +533 -0
- package/skills/manage-refs/citation_styles/journal-of-korean-medical-science.csl +16 -0
- package/skills/manage-refs/citation_styles/korean-journal-of-radiology.csl +155 -0
- package/skills/manage-refs/citation_styles/nature.csl +189 -0
- package/skills/manage-refs/citation_styles/nlm-citation-sequence.csl +535 -0
- package/skills/manage-refs/citation_styles/radiology.csl +228 -0
- package/skills/manage-refs/citation_styles/springer-basic-brackets.csl +187 -0
- package/skills/manage-refs/citation_styles/springer-vancouver-brackets.csl +276 -0
- package/skills/manage-refs/citation_styles/vancouver-superscript.csl +536 -0
- package/skills/manage-refs/citation_styles/vancouver.csl +535 -0
- package/skills/manage-refs/references/REFERENCE_STYLE_SPECS.md +59 -0
- package/skills/manage-refs/references/check_xref_symptoms.md +35 -0
- package/skills/manage-refs/scripts/_vendor_citation_writer.py +600 -0
- package/skills/manage-refs/scripts/check_citation_keys.py +112 -0
- package/skills/manage-refs/scripts/check_csl_render.py +102 -0
- package/skills/manage-refs/scripts/check_xref.py +633 -0
- package/skills/manage-refs/scripts/fill_journal_abbrev.py +104 -0
- package/skills/manage-refs/scripts/inject_zotero_cwyw.py +133 -0
- package/skills/manage-refs/scripts/md_marker_convert.py +193 -0
- package/skills/manage-refs/scripts/pre_submission_gate.sh +238 -0
- package/skills/manage-refs/scripts/render_pandoc.sh +88 -0
- package/skills/manage-refs/skill.yml +70 -0
- package/skills/manage-refs/tests/fixtures/pre_submission_gate/README.md +32 -0
- package/skills/manage-refs/tests/fixtures/pre_submission_gate/manuscript.md +10 -0
- package/skills/manage-refs/tests/fixtures/pre_submission_gate/refs.bib +34 -0
- package/skills/manage-refs/tests/fixtures/pre_submission_gate/run.sh +117 -0
- package/skills/manage-refs/tests/test_vN_docx_check.sh +145 -0
- package/skills/meta-analysis/SKILL.md +739 -0
- package/skills/meta-analysis/references/LICENSES.md +21 -0
- package/skills/meta-analysis/references/PROSPERO_template.md +221 -0
- package/skills/meta-analysis/references/ai_pre_screening_template.py +245 -0
- package/skills/meta-analysis/references/checklists/JBI_Case_Series.md +45 -0
- package/skills/meta-analysis/references/checklists/NOS.md +88 -0
- package/skills/meta-analysis/references/checklists/PRISMA_DTA.md +36 -0
- package/skills/meta-analysis/references/checklists/PROBAST.md +75 -0
- package/skills/meta-analysis/references/checklists/QUADAS2.md +77 -0
- package/skills/meta-analysis/references/checklists/ROBINS_I.md +87 -0
- package/skills/meta-analysis/references/checklists/RoB2.md +79 -0
- package/skills/meta-analysis/references/data_integrity_checklist.md +57 -0
- package/skills/meta-analysis/references/icmje_coi_guide.md +181 -0
- package/skills/meta-analysis/references/phase10_recovery.md +136 -0
- package/skills/meta-analysis/references/phase4_km_composite.md +58 -0
- package/skills/meta-analysis/references/phase6_statistical_synthesis.md +148 -0
- package/skills/meta-analysis/references/phase9_circulation.md +84 -0
- package/skills/meta-analysis/references/post_submission_release_ops.md +41 -0
- package/skills/meta-analysis/references/r_templates.md +132 -0
- package/skills/meta-analysis/references/review_orchestration.md +40 -0
- package/skills/meta-analysis/references/submission_package_drift.md +71 -0
- package/skills/meta-analysis/scripts/check_pool_consistency.py +201 -0
- package/skills/meta-analysis/scripts/cohort_overlap_check.py +242 -0
- package/skills/meta-analysis/scripts/dta_extraction_qc.py +137 -0
- package/skills/meta-analysis/scripts/screening_reconcile.py +160 -0
- package/skills/meta-analysis/skill.yml +47 -0
- package/skills/meta-analysis/templates/FINAL_POOL_LOCK.yaml.template +70 -0
- package/skills/meta-analysis/templates/extraction_form_v2.md +129 -0
- package/skills/meta-analysis/templates/supplementary_8file_checklist.md +94 -0
- package/skills/meta-analysis/tests/test_pool_consistency.sh +123 -0
- package/skills/orchestrate/SKILL.md +501 -0
- package/skills/orchestrate/references/dialogue_nodes.md +196 -0
- package/skills/orchestrate/references/report_template.md +109 -0
- package/skills/orchestrate/references/report_template_ko.md +88 -0
- package/skills/orchestrate/skill.yml +44 -0
- package/skills/peer-review/SKILL.md +381 -0
- package/skills/peer-review/references/aczel_2021_reviewer2_patterns.md +88 -0
- package/skills/peer-review/references/domain-probes/ai_overclaiming.md +47 -0
- package/skills/peer-review/references/domain-probes/narrative_review.md +44 -0
- package/skills/peer-review/references/domain-probes/observational_confounding.md +48 -0
- package/skills/peer-review/references/domain-probes/radiomics.md +38 -0
- package/skills/peer-review/references/domain-probes/sr_ma.md +87 -0
- package/skills/peer-review/references/domain-probes/survival_prognostic.md +68 -0
- package/skills/peer-review/references/exemplar_reviews/README.md +43 -0
- package/skills/peer-review/references/exemplar_reviews/ai_overclaiming.md +47 -0
- package/skills/peer-review/references/exemplar_reviews/calibration_missing.md +44 -0
- package/skills/peer-review/references/exemplar_reviews/data_leakage.md +48 -0
- package/skills/peer-review/references/exemplar_reviews/reference_standard_validity.md +45 -0
- package/skills/peer-review/references/narrative_review_audit.md +67 -0
- package/skills/peer-review/references/reviewer_calibration/README.md +34 -0
- package/skills/peer-review/references/reviewer_calibration/compliance_floor.md +52 -0
- package/skills/peer-review/references/reviewer_profiles/AJR.md +82 -0
- package/skills/peer-review/references/reviewer_profiles/EURE.md +64 -0
- package/skills/peer-review/references/reviewer_profiles/INSI.md +57 -0
- package/skills/peer-review/references/reviewer_profiles/KJR.md +100 -0
- package/skills/peer-review/references/reviewer_profiles/README.md +32 -0
- package/skills/peer-review/references/reviewer_profiles/RYAI.md +86 -0
- package/skills/peer-review/skill.yml +39 -0
- package/skills/present-paper/SKILL.md +675 -0
- package/skills/present-paper/references/critic_rubrics/slide.md +155 -0
- package/skills/present-paper/references/generate_pptx_templates.py +604 -0
- package/skills/present-paper/references/medical_presentation_templates.md +277 -0
- package/skills/present-paper/references/slide_design_principles.md +202 -0
- package/skills/present-paper/references/slide_visual_styles/nature_lancet.md +168 -0
- package/skills/present-paper/references/workflow-checklist.md +109 -0
- package/skills/present-paper/scripts/extract_pdf_figures.py +243 -0
- package/skills/present-paper/scripts/inject_pronunciation_notes.py +178 -0
- package/skills/present-paper/scripts/inject_speaker_notes.py +133 -0
- package/skills/present-paper/scripts/strip_notes_for_sharing.py +140 -0
- package/skills/present-paper/scripts/trim_caption.py +271 -0
- package/skills/present-paper/skill.yml +41 -0
- package/skills/present-paper/templates/build_pptx_nature_lancet.py +688 -0
- package/skills/publish-skill/SKILL.md +370 -0
- package/skills/publish-skill/references/license-compatibility-matrix.md +132 -0
- package/skills/publish-skill/references/pii-patterns.md +130 -0
- package/skills/publish-skill/scripts/audit_skill.sh +278 -0
- package/skills/publish-skill/skill.yml +35 -0
- package/skills/render-pdf-doc/SKILL.md +146 -0
- package/skills/render-pdf-doc/references/known_pitfalls.md +53 -0
- package/skills/render-pdf-doc/references/pandoc_korean_cheatsheet.md +77 -0
- package/skills/render-pdf-doc/scripts/check_deps.sh +42 -0
- package/skills/render-pdf-doc/scripts/infer_colwidths.py +164 -0
- package/skills/render-pdf-doc/scripts/render_pdf.sh +98 -0
- package/skills/render-pdf-doc/skill.yml +57 -0
- package/skills/render-pdf-doc/templates/anchor-doc.md +27 -0
- package/skills/render-pdf-doc/templates/anchor-doc_ko.md +25 -0
- package/skills/render-pdf-doc/templates/briefing-handout.md +33 -0
- package/skills/render-pdf-doc/templates/briefing-handout_ko.md +31 -0
- package/skills/render-pdf-doc/templates/proposal-cover.md +33 -0
- package/skills/render-pdf-doc/templates/proposal-cover_ko.md +31 -0
- package/skills/render-pdf-doc/templates/reference-table.md +22 -0
- package/skills/render-pdf-doc/templates/reference-table_ko.md +20 -0
- package/skills/replicate-study/SKILL.md +150 -0
- package/skills/replicate-study/references/harmonization_3country.csv +47 -0
- package/skills/replicate-study/references/harmonization_knhanes_nhanes.csv +68 -0
- package/skills/replicate-study/references/methodology_extraction_template.md +134 -0
- package/skills/replicate-study/skill.yml +37 -0
- package/skills/review-paper/SKILL.md +104 -0
- package/skills/review-paper/references/macro_skeleton.md +6 -0
- package/skills/review-paper/skill.yml +25 -0
- package/skills/revise/SKILL.md +515 -0
- package/skills/revise/references/r2r_voice.md +346 -0
- package/skills/revise/skill.yml +43 -0
- package/skills/search-lit/SKILL.md +443 -0
- package/skills/search-lit/references/parse_pubmed.py +326 -0
- package/skills/search-lit/references/pubmed_eutils.sh +111 -0
- package/skills/search-lit/skill.yml +46 -0
- package/skills/self-review/SKILL.md +1045 -0
- package/skills/self-review/references/domain-probes/ai_overclaiming.md +47 -0
- package/skills/self-review/references/domain-probes/narrative_review.md +44 -0
- package/skills/self-review/references/domain-probes/observational_confounding.md +48 -0
- package/skills/self-review/references/domain-probes/radiomics.md +38 -0
- package/skills/self-review/references/domain-probes/sr_ma.md +87 -0
- package/skills/self-review/references/domain-probes/survival_prognostic.md +68 -0
- package/skills/self-review/references/exemplar_findings/README.md +43 -0
- package/skills/self-review/references/exemplar_findings/cohort_arithmetic_mismatch.md +35 -0
- package/skills/self-review/references/exemplar_findings/estimand_drift_posthoc_primary.md +39 -0
- package/skills/self-review/references/exemplar_findings/scope_overreach_cross_sectional.md +35 -0
- package/skills/self-review/references/exemplar_findings/unadjusted_confounder.md +36 -0
- package/skills/self-review/references/panel_review_template.md +177 -0
- package/skills/self-review/scripts/check_artifact_coverage.py +301 -0
- package/skills/self-review/scripts/check_claim_artifact.py +248 -0
- package/skills/self-review/scripts/check_classical_style.py +185 -0
- package/skills/self-review/scripts/check_cohort_arithmetic.py +481 -0
- package/skills/self-review/scripts/check_confounding_completeness.py +287 -0
- package/skills/self-review/scripts/check_panel_diversity.py +336 -0
- package/skills/self-review/scripts/check_reference_adequacy.py +392 -0
- package/skills/self-review/scripts/check_reviewer_team_consistency.py +412 -0
- package/skills/self-review/scripts/check_scope_coherence.py +177 -0
- package/skills/self-review/skill.yml +47 -0
- package/skills/self-review/tests/fixtures/claim_manuscript.md +17 -0
- package/skills/self-review/tests/fixtures/claim_prereg.md +6 -0
- package/skills/self-review/tests/fixtures/cohort_bad.md +21 -0
- package/skills/self-review/tests/fixtures/cohort_clean.md +21 -0
- package/skills/self-review/tests/fixtures/cohort_partition.csv +5 -0
- package/skills/self-review/tests/fixtures/coverage_analysis/31_delong_nested_added_value.csv +3 -0
- package/skills/self-review/tests/fixtures/coverage_analysis/table1_demographics.csv +3 -0
- package/skills/self-review/tests/fixtures/coverage_clean.md +13 -0
- package/skills/self-review/tests/fixtures/coverage_manuscript.md +11 -0
- package/skills/self-review/tests/fixtures/panel_collapse.json +27 -0
- package/skills/self-review/tests/fixtures/panel_good.json +32 -0
- package/skills/self-review/tests/fixtures/panel_monoculture.json +32 -0
- package/skills/self-review/tests/fixtures/refadeq_letter.md +13 -0
- package/skills/self-review/tests/fixtures/refadeq_original_fixed.md +42 -0
- package/skills/self-review/tests/fixtures/refadeq_original_uncited.md +40 -0
- package/skills/self-review/tests/fixtures/scope_bad.md +9 -0
- package/skills/self-review/tests/fixtures/scope_clean.md +8 -0
- package/skills/self-review/tests/fixtures/scope_surrogate.md +8 -0
- package/skills/self-review/tests/fixtures/style_bad.md +13 -0
- package/skills/self-review/tests/fixtures/style_clean.md +11 -0
- package/skills/self-review/tests/fixtures/table1_by_exposure.csv +11 -0
- package/skills/self-review/tests/test_artifact_coverage.sh +44 -0
- package/skills/self-review/tests/test_claim_artifact.sh +50 -0
- package/skills/self-review/tests/test_classical_style.sh +44 -0
- package/skills/self-review/tests/test_cohort_arithmetic.sh +49 -0
- package/skills/self-review/tests/test_confounding_completeness.sh +66 -0
- package/skills/self-review/tests/test_panel_diversity.sh +55 -0
- package/skills/self-review/tests/test_panel_mode.sh +69 -0
- package/skills/self-review/tests/test_reference_adequacy.sh +68 -0
- package/skills/self-review/tests/test_reviewer_team_consistency.sh +138 -0
- package/skills/self-review/tests/test_scope_coherence.sh +46 -0
- package/skills/setup-medsci/SKILL.md +110 -0
- package/skills/setup-medsci/references/setup-checklist.md +51 -0
- package/skills/setup-medsci/skill.yml +30 -0
- package/skills/sync-submission/SKILL.md +382 -0
- package/skills/sync-submission/scripts/author_registry_example.yaml +36 -0
- package/skills/sync-submission/scripts/blind_sweep.py +203 -0
- package/skills/sync-submission/scripts/check_asset_anonymization.py +300 -0
- package/skills/sync-submission/scripts/check_cross_artifact_stale.py +211 -0
- package/skills/sync-submission/scripts/cover_letter_drift_check.py +451 -0
- package/skills/sync-submission/scripts/cross_document_n_check.py +486 -0
- package/skills/sync-submission/scripts/detect_copy_divergence.py +136 -0
- package/skills/sync-submission/scripts/preflight_gate.py +458 -0
- package/skills/sync-submission/scripts/scope_drift_check.py +362 -0
- package/skills/sync-submission/scripts/sync_submission.py +169 -0
- package/skills/sync-submission/skill.yml +43 -0
- package/skills/sync-submission/tests/fixtures/copy_ok.md +5 -0
- package/skills/sync-submission/tests/fixtures/copy_stale.md +5 -0
- package/skills/sync-submission/tests/fixtures/ssot.md +5 -0
- package/skills/sync-submission/tests/test_asset_anonymization.sh +99 -0
- package/skills/sync-submission/tests/test_copy_divergence.sh +44 -0
- package/skills/sync-submission/tests/test_cross_artifact_stale.sh +80 -0
- package/skills/sync-submission/tests/test_cross_document_n.sh +132 -0
- package/skills/sync-submission/tests/test_preflight_gate.sh +112 -0
- package/skills/sync-submission/tests/test_scope_drift.sh +122 -0
- package/skills/sync-submission/tests/test_vN_docx_assertion.sh +51 -0
- package/skills/verify-refs/SKILL.md +177 -0
- package/skills/verify-refs/references/manual_checkpoint_guide.md +100 -0
- package/skills/verify-refs/scripts/verify_cli.sh +62 -0
- package/skills/verify-refs/scripts/verify_refs.py +782 -0
- package/skills/verify-refs/skill.yml +44 -0
- package/skills/verify-refs/tests/fixtures/pagination_placeholder.bib +17 -0
- package/skills/verify-refs/tests/test_pagination_placeholder.sh +42 -0
- package/skills/version-dataset/SKILL.md +143 -0
- package/skills/version-dataset/references/manifest_schema.md +72 -0
- package/skills/version-dataset/scripts/version_dataset.py +242 -0
- package/skills/version-dataset/skill.yml +35 -0
- package/skills/version-dataset/tests/test_version_dataset.sh +52 -0
- package/skills/write-paper/SKILL.md +1148 -0
- package/skills/write-paper/references/exemplar_methods/README.md +38 -0
- package/skills/write-paper/references/exemplar_methods/ai_validation_tripod_claim.md +47 -0
- package/skills/write-paper/references/exemplar_methods/diagnostic_accuracy_stard.md +50 -0
- package/skills/write-paper/references/exemplar_methods/observational_cohort_strobe.md +43 -0
- package/skills/write-paper/references/journal_profiles/AJNR.md +185 -0
- package/skills/write-paper/references/journal_profiles/AJR.md +149 -0
- package/skills/write-paper/references/journal_profiles/Abdominal_Radiology.md +139 -0
- package/skills/write-paper/references/journal_profiles/Academic_Radiology.md +90 -0
- package/skills/write-paper/references/journal_profiles/Annals_of_Internal_Medicine.md +150 -0
- package/skills/write-paper/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +82 -0
- package/skills/write-paper/references/journal_profiles/British_Journal_of_Radiology.md +161 -0
- package/skills/write-paper/references/journal_profiles/CVIR.md +157 -0
- package/skills/write-paper/references/journal_profiles/Chest.md +270 -0
- package/skills/write-paper/references/journal_profiles/Clinical_Radiology.md +160 -0
- package/skills/write-paper/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +147 -0
- package/skills/write-paper/references/journal_profiles/Diabetes_Metabolism_Journal.md +163 -0
- package/skills/write-paper/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +216 -0
- package/skills/write-paper/references/journal_profiles/Endocrinology_and_Metabolism.md +167 -0
- package/skills/write-paper/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +192 -0
- package/skills/write-paper/references/journal_profiles/European_Radiology.md +159 -0
- package/skills/write-paper/references/journal_profiles/Hepatology_Communications.md +110 -0
- package/skills/write-paper/references/journal_profiles/Hepatology_International.md +106 -0
- package/skills/write-paper/references/journal_profiles/IEEE_TMI.md +180 -0
- package/skills/write-paper/references/journal_profiles/INSI.md +163 -0
- package/skills/write-paper/references/journal_profiles/Investigative_Radiology.md +86 -0
- package/skills/write-paper/references/journal_profiles/JACC_Advances.md +197 -0
- package/skills/write-paper/references/journal_profiles/JACC_Asia.md +168 -0
- package/skills/write-paper/references/journal_profiles/JACR.md +87 -0
- package/skills/write-paper/references/journal_profiles/JAMA.md +188 -0
- package/skills/write-paper/references/journal_profiles/JAMA_Network_Open.md +170 -0
- package/skills/write-paper/references/journal_profiles/JCSM.md +266 -0
- package/skills/write-paper/references/journal_profiles/JKMS.md +201 -0
- package/skills/write-paper/references/journal_profiles/JMIR.md +88 -0
- package/skills/write-paper/references/journal_profiles/JMIR_Medical_Education.md +86 -0
- package/skills/write-paper/references/journal_profiles/JNIS.md +227 -0
- package/skills/write-paper/references/journal_profiles/JVIR.md +158 -0
- package/skills/write-paper/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +191 -0
- package/skills/write-paper/references/journal_profiles/Journal_of_Stroke.md +176 -0
- package/skills/write-paper/references/journal_profiles/KJR.md +185 -0
- package/skills/write-paper/references/journal_profiles/Korean_Circulation_Journal.md +184 -0
- package/skills/write-paper/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +178 -0
- package/skills/write-paper/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +127 -0
- package/skills/write-paper/references/journal_profiles/Liver_International.md +165 -0
- package/skills/write-paper/references/journal_profiles/Medical_Image_Analysis.md +147 -0
- package/skills/write-paper/references/journal_profiles/NEJM.md +147 -0
- package/skills/write-paper/references/journal_profiles/Nature_Medicine.md +181 -0
- package/skills/write-paper/references/journal_profiles/Neuroradiology.md +151 -0
- package/skills/write-paper/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +184 -0
- package/skills/write-paper/references/journal_profiles/PLOS_Medicine.md +166 -0
- package/skills/write-paper/references/journal_profiles/RYAI.md +124 -0
- package/skills/write-paper/references/journal_profiles/Radiology.md +173 -0
- package/skills/write-paper/references/journal_profiles/Skeletal_Radiology.md +135 -0
- package/skills/write-paper/references/journal_profiles/Stroke.md +210 -0
- package/skills/write-paper/references/journal_profiles/The_BMJ.md +121 -0
- package/skills/write-paper/references/journal_profiles/The_Lancet.md +112 -0
- package/skills/write-paper/references/journal_profiles/The_Lancet_Digital_Health.md +104 -0
- package/skills/write-paper/references/journal_profiles/World_Journal_of_Hepatology.md +106 -0
- package/skills/write-paper/references/journal_profiles/npj_Digital_Medicine.md +93 -0
- package/skills/write-paper/references/paper_types/ai_validation.md +270 -0
- package/skills/write-paper/references/paper_types/animal_study.md +194 -0
- package/skills/write-paper/references/paper_types/case_report.md +237 -0
- package/skills/write-paper/references/paper_types/cross_national.md +328 -0
- package/skills/write-paper/references/paper_types/letter.md +127 -0
- package/skills/write-paper/references/paper_types/meta_analysis.md +181 -0
- package/skills/write-paper/references/paper_types/nhis_cohort.md +297 -0
- package/skills/write-paper/references/paper_types/original_article.md +221 -0
- package/skills/write-paper/references/paper_types/technical_note.md +131 -0
- package/skills/write-paper/references/section_guides/discussion.md +155 -0
- package/skills/write-paper/references/section_guides/introduction.md +108 -0
- package/skills/write-paper/references/section_guides/methods.md +144 -0
- package/skills/write-paper/references/section_guides/results.md +113 -0
- package/skills/write-paper/references/section_guides/step7_1_classical_qc.md +67 -0
- package/skills/write-paper/references/section_guides/step7_4a_audit_recovery.md +74 -0
- package/skills/write-paper/references/section_guides/title_abstract.md +123 -0
- package/skills/write-paper/references/section_templates/methods_statistical.md +147 -0
- package/skills/write-paper/scripts/check_placeholders.py +182 -0
- package/skills/write-paper/skill.yml +48 -0
- package/skills/write-paper/tests/test_placeholders.sh +107 -0
- package/skills/write-protocol/SKILL.md +243 -0
- package/skills/write-protocol/references/ethics_checklist.md +150 -0
- package/skills/write-protocol/references/protocol_template.md +304 -0
- package/skills/write-protocol/skill.yml +34 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# Common Clinical Data Cleaning Patterns
|
|
2
|
+
|
|
3
|
+
Reference document for the clean-data skill. Covers recurring data quality issues
|
|
4
|
+
in electronic health records, registries, and research databases.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## 1. Missing Data Patterns
|
|
9
|
+
|
|
10
|
+
### Classification
|
|
11
|
+
|
|
12
|
+
- **MCAR (Missing Completely At Random)**: Missingness is unrelated to any variable.
|
|
13
|
+
Example: random equipment failure during lab measurement.
|
|
14
|
+
Test: Little's MCAR test (chi-square). If p > 0.05, MCAR is plausible.
|
|
15
|
+
|
|
16
|
+
- **MAR (Missing At Random)**: Missingness depends on observed variables but not the
|
|
17
|
+
missing value itself. Example: younger patients less likely to have bone density measured.
|
|
18
|
+
Cannot be directly tested; inferred from associations between missingness indicators
|
|
19
|
+
and observed covariates.
|
|
20
|
+
|
|
21
|
+
- **MNAR (Missing Not At Random)**: Missingness depends on the unobserved value itself.
|
|
22
|
+
Example: severely ill patients too sick to complete follow-up surveys.
|
|
23
|
+
Cannot be tested from the data alone; requires domain knowledge.
|
|
24
|
+
|
|
25
|
+
### Heuristic Assessment
|
|
26
|
+
|
|
27
|
+
1. Compute missing percentage per variable.
|
|
28
|
+
2. Create missingness indicator (0/1) for each variable with >5% missing.
|
|
29
|
+
3. Correlate missingness indicators with observed variables (chi-square, t-test).
|
|
30
|
+
4. If strong correlations exist: likely MAR. If none: plausible MCAR. If clinical
|
|
31
|
+
reasoning suggests the value itself drives missingness: suspect MNAR.
|
|
32
|
+
|
|
33
|
+
### When to Use Each Imputation Method
|
|
34
|
+
|
|
35
|
+
| Method | When appropriate | Caution |
|
|
36
|
+
|--------|-----------------|---------|
|
|
37
|
+
| Listwise deletion (complete case) | MCAR, low % missing (<5%), large sample | Biased if MAR/MNAR; reduces power |
|
|
38
|
+
| Mean/median imputation | Quick exploratory analysis only | Underestimates variance; distorts distributions |
|
|
39
|
+
| Last observation carried forward | Longitudinal data, slow-changing variables | Biased if trajectory is changing |
|
|
40
|
+
| Multiple imputation (MICE) | MAR, moderate missing (5-40%), multivariate | Requires careful model specification |
|
|
41
|
+
| Maximum likelihood (FIML) | MAR, SEM or regression contexts | Needs software support |
|
|
42
|
+
| Sensitivity analysis | Always for MNAR suspicion | Report results under multiple assumptions |
|
|
43
|
+
|
|
44
|
+
### Key Rule
|
|
45
|
+
|
|
46
|
+
Never impute the outcome variable in the primary analysis without explicit justification.
|
|
47
|
+
Report the missing data mechanism assumption in the methods section.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## 2. Outlier Detection
|
|
52
|
+
|
|
53
|
+
### Statistical Methods
|
|
54
|
+
|
|
55
|
+
**IQR Method (Tukey Fences)**:
|
|
56
|
+
- Lower fence: Q1 - 1.5 * IQR
|
|
57
|
+
- Upper fence: Q3 + 1.5 * IQR
|
|
58
|
+
- Robust to non-normal distributions
|
|
59
|
+
- Preferred for clinical data where normality is rarely guaranteed
|
|
60
|
+
|
|
61
|
+
**Z-Score Method**:
|
|
62
|
+
- Flag values with |z| > 3 (or |z| > 2.5 for smaller samples)
|
|
63
|
+
- Assumes approximate normality
|
|
64
|
+
- Sensitive to the outliers themselves (mean and SD are affected)
|
|
65
|
+
|
|
66
|
+
**Modified Z-Score (MAD-based)**:
|
|
67
|
+
- Uses median and Median Absolute Deviation instead of mean/SD
|
|
68
|
+
- More robust than standard Z-score
|
|
69
|
+
- Formula: M_i = 0.6745 * (x_i - median) / MAD
|
|
70
|
+
|
|
71
|
+
### Decision Framework
|
|
72
|
+
|
|
73
|
+
| Scenario | Recommended action |
|
|
74
|
+
|----------|--------------------|
|
|
75
|
+
| Data entry error (clearly impossible) | Correct if source available; else set to missing |
|
|
76
|
+
| Measurement error (instrument fault) | Set to missing; document in cleaning log |
|
|
77
|
+
| True extreme value (biologically plausible) | Keep in dataset; consider sensitivity analysis with/without |
|
|
78
|
+
| Ambiguous | Flag for domain expert review; do not remove without justification |
|
|
79
|
+
|
|
80
|
+
### Clinical Context Matters
|
|
81
|
+
|
|
82
|
+
A BMI of 50 is an outlier statistically but clinically real. An age of 200 is impossible.
|
|
83
|
+
A creatinine of 15 mg/dL is extreme but occurs in dialysis patients. Always consult the
|
|
84
|
+
codebook and clinical context before removing outliers.
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 3. Duplicate Detection
|
|
89
|
+
|
|
90
|
+
### Exact Duplicates
|
|
91
|
+
|
|
92
|
+
- Identical across ALL columns.
|
|
93
|
+
- Usually safe to remove (keep first occurrence).
|
|
94
|
+
- Common cause: accidental double-submission or ETL errors.
|
|
95
|
+
|
|
96
|
+
### Near-Duplicates
|
|
97
|
+
|
|
98
|
+
- Same patient identifier, different records.
|
|
99
|
+
- May be legitimate (multiple visits) or errors (same visit entered twice with typos).
|
|
100
|
+
|
|
101
|
+
### Detection Strategy
|
|
102
|
+
|
|
103
|
+
1. Check for exact row duplicates: `df.duplicated().sum()`
|
|
104
|
+
2. Check for duplicate patient IDs: `df['patient_id'].duplicated().sum()`
|
|
105
|
+
3. For near-duplicates: group by patient ID, sort by date, check for records within
|
|
106
|
+
a suspiciously short time window (e.g., same day for what should be annual visits).
|
|
107
|
+
4. Fuzzy matching: consider Levenshtein distance on name fields if no unique ID exists.
|
|
108
|
+
|
|
109
|
+
### Resolution
|
|
110
|
+
|
|
111
|
+
- Exact duplicates: drop duplicates, log count.
|
|
112
|
+
- Same-patient near-duplicates: present to researcher for manual review.
|
|
113
|
+
- Never auto-merge patient records without explicit approval.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## 4. Date Handling
|
|
118
|
+
|
|
119
|
+
### Common Date Formats in Clinical Data
|
|
120
|
+
|
|
121
|
+
| Format | Example | Source |
|
|
122
|
+
|--------|---------|--------|
|
|
123
|
+
| YYYY-MM-DD | 2024-03-15 | ISO 8601, most databases |
|
|
124
|
+
| MM/DD/YYYY | 03/15/2024 | US clinical systems |
|
|
125
|
+
| DD/MM/YYYY | 15/03/2024 | European systems |
|
|
126
|
+
| YYYYMMDD | 20240315 | DICOM, HL7 |
|
|
127
|
+
| DD-Mon-YYYY | 15-Mar-2024 | Some EMR exports |
|
|
128
|
+
| Excel serial | 45366 | Excel numeric date |
|
|
129
|
+
|
|
130
|
+
### Common Issues
|
|
131
|
+
|
|
132
|
+
- **Ambiguous dates**: Is 03/04/2024 March 4th or April 3rd? Check the data source locale.
|
|
133
|
+
Look for values >12 in the first or second position to disambiguate.
|
|
134
|
+
- **Impossible dates**: February 30, month 13, year 0001.
|
|
135
|
+
- **Future dates**: Dates after the data extraction date (except for scheduled appointments).
|
|
136
|
+
- **Timezone issues**: Rarely relevant for clinical research dates, but critical for timestamps
|
|
137
|
+
in multi-site studies across time zones.
|
|
138
|
+
- **Two-digit years**: 24 could be 1924 or 2024. Use a pivot year (e.g., 30: <=30 means 2000s,
|
|
139
|
+
>30 means 1900s) or infer from context.
|
|
140
|
+
|
|
141
|
+
### Standardization
|
|
142
|
+
|
|
143
|
+
1. Parse all date columns to datetime using `pd.to_datetime(col, format=..., errors='coerce')`.
|
|
144
|
+
2. Check for NaT (failed parses) and investigate.
|
|
145
|
+
3. Standardize to ISO 8601 (YYYY-MM-DD) for storage.
|
|
146
|
+
4. Calculate derived variables (age at event, follow-up duration) from standardized dates.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## 5. Category Harmonization
|
|
151
|
+
|
|
152
|
+
### Common Inconsistencies
|
|
153
|
+
|
|
154
|
+
| Raw values | Harmonized |
|
|
155
|
+
|-----------|-----------|
|
|
156
|
+
| "Male", "male", "M", "MALE", " Male " | "Male" |
|
|
157
|
+
| "Y", "Yes", "yes", "YES", "1", "True" | 1 or "Yes" |
|
|
158
|
+
| "Right", "Rt", "R", "right", "RT" | "Right" |
|
|
159
|
+
| "Non-small cell", "NSCLC", "non small cell" | "NSCLC" |
|
|
160
|
+
|
|
161
|
+
### Harmonization Steps
|
|
162
|
+
|
|
163
|
+
1. Strip whitespace: `series.str.strip()`
|
|
164
|
+
2. Normalize case: `series.str.lower()` or `series.str.title()`
|
|
165
|
+
3. Build a mapping dictionary for known synonyms.
|
|
166
|
+
4. Review unmapped values manually.
|
|
167
|
+
5. Apply mapping: `series.map(mapping_dict).fillna(series)`
|
|
168
|
+
|
|
169
|
+
### Encoding Standards
|
|
170
|
+
|
|
171
|
+
- **ICD-10**: Diagnosis codes. Watch for version differences (ICD-10-CM vs ICD-10-PCS).
|
|
172
|
+
- **SNOMED CT**: Clinical terminology. More granular than ICD-10.
|
|
173
|
+
- **LOINC**: Laboratory observations. Use for standardizing lab test names.
|
|
174
|
+
- **CPT/HCPCS**: Procedure codes.
|
|
175
|
+
|
|
176
|
+
When possible, map free-text categories to standard coding systems. Document the mapping
|
|
177
|
+
table and include it in supplementary materials.
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## 6. Common Clinical Data Pitfalls
|
|
182
|
+
|
|
183
|
+
### Lab Values with Inequality Prefixes
|
|
184
|
+
|
|
185
|
+
Values like "<0.01", ">10000", "<=5" are common for lab results at detection limits.
|
|
186
|
+
|
|
187
|
+
**Handling options**:
|
|
188
|
+
- Replace with the limit value: "<0.01" -> 0.01 (conservative)
|
|
189
|
+
- Replace with half the limit: "<0.01" -> 0.005 (common in environmental studies)
|
|
190
|
+
- Replace with limit / sqrt(2): "<0.01" -> 0.00707 (EPA method)
|
|
191
|
+
- Keep as censored data and use appropriate statistical methods (Tobit regression)
|
|
192
|
+
|
|
193
|
+
Document the chosen method in the statistical analysis plan.
|
|
194
|
+
|
|
195
|
+
### Mixed Units
|
|
196
|
+
|
|
197
|
+
Common in multi-site studies or data merged from different systems.
|
|
198
|
+
|
|
199
|
+
| Analyte | Unit A | Unit B | Conversion |
|
|
200
|
+
|---------|--------|--------|-----------|
|
|
201
|
+
| Glucose | mg/dL | mmol/L | mg/dL = mmol/L * 18.018 |
|
|
202
|
+
| Creatinine | mg/dL | umol/L | mg/dL = umol/L / 88.4 |
|
|
203
|
+
| Hemoglobin | g/dL | g/L | g/dL = g/L / 10 |
|
|
204
|
+
| Calcium | mg/dL | mmol/L | mg/dL = mmol/L * 4.008 |
|
|
205
|
+
|
|
206
|
+
**Detection**: look for bimodal distributions in lab values -- one mode per unit system.
|
|
207
|
+
|
|
208
|
+
### Sentinel Values
|
|
209
|
+
|
|
210
|
+
Values used as placeholders for missing data in legacy systems:
|
|
211
|
+
|
|
212
|
+
| Sentinel | Meaning |
|
|
213
|
+
|----------|---------|
|
|
214
|
+
| 999, 9999, 99999 | Missing / not recorded |
|
|
215
|
+
| -1, -9, -99 | Missing / not applicable |
|
|
216
|
+
| 0 | Could be true zero OR missing -- context-dependent |
|
|
217
|
+
| 88, 77 | "Not applicable" or "Refused" in survey data |
|
|
218
|
+
| 8888 | "Not applicable" or "Missing" in health screening/institutional databases |
|
|
219
|
+
| 01/01/1900 | Default/missing date |
|
|
220
|
+
|
|
221
|
+
**Action**: Replace sentinel values with `NaN` BEFORE computing any statistics.
|
|
222
|
+
Document which values were treated as sentinel.
|
|
223
|
+
|
|
224
|
+
### Excel Date Corruption
|
|
225
|
+
|
|
226
|
+
Excel auto-converts certain strings to dates:
|
|
227
|
+
- Gene names: SEPT1 -> Sep-1, MARCH1 -> Mar-1, DEC1 -> Dec-1
|
|
228
|
+
- Sample IDs: 1-3 -> Jan-3, 2/4 -> Feb-4
|
|
229
|
+
|
|
230
|
+
**Prevention**: Open CSV in a text editor first to verify. Import with explicit dtypes
|
|
231
|
+
in pandas: `pd.read_csv(path, dtype={'gene': str})`.
|
|
232
|
+
|
|
233
|
+
**Detection**: Look for datetime values in columns that should contain gene names or
|
|
234
|
+
sample identifiers.
|
|
235
|
+
|
|
236
|
+
### Numeric Precision
|
|
237
|
+
|
|
238
|
+
- Floating point: 0.1 + 0.2 != 0.3. Use `np.isclose()` for comparisons.
|
|
239
|
+
- Rounding: Be consistent. Define rounding rules before analysis.
|
|
240
|
+
- Integer overflow: Rare in Python, but watch for 32-bit integer limits in R or
|
|
241
|
+
database imports (max 2,147,483,647).
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## 7. Recommended Workflow
|
|
246
|
+
|
|
247
|
+
The recommended end-to-end data cleaning workflow:
|
|
248
|
+
|
|
249
|
+
1. **Profile**: Run the profiling script. Understand what you have.
|
|
250
|
+
2. **Flag**: Identify potential issues. Categorize by type and severity.
|
|
251
|
+
3. **Review**: Present flags to the domain expert (you, the researcher).
|
|
252
|
+
4. **Approve**: Decide which flags to act on. Document rationale for each decision.
|
|
253
|
+
5. **Clean**: Generate and run cleaning code for approved actions only.
|
|
254
|
+
6. **Verify**: Compare before/after summaries. Check that cleaning did not introduce
|
|
255
|
+
new problems.
|
|
256
|
+
7. **Document**: Save the cleaning log, mapping tables, and decision rationale.
|
|
257
|
+
Include in supplementary materials or methods section.
|
|
258
|
+
|
|
259
|
+
### Documentation Checklist
|
|
260
|
+
|
|
261
|
+
- [ ] Number of rows before and after cleaning
|
|
262
|
+
- [ ] Number and percentage of missing values per variable (before/after)
|
|
263
|
+
- [ ] Outlier handling decisions with justification
|
|
264
|
+
- [ ] Duplicate removal count
|
|
265
|
+
- [ ] Category mapping tables
|
|
266
|
+
- [ ] Imputation method and variables imputed
|
|
267
|
+
- [ ] Any variables excluded from analysis and why
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## 8. Key References
|
|
272
|
+
|
|
273
|
+
1. Van den Broeck J, Cunningham SA,"; R,"; AB. Data cleaning: detecting,
|
|
274
|
+
diagnosing, and editing data abnormalities. *PLoS Med*. 2005;2(10):e267.
|
|
275
|
+
DOI: 10.1371/journal.pmed.0020267
|
|
276
|
+
|
|
277
|
+
2. Kang H. The prevention and handling of the missing data.
|
|
278
|
+
*Korean J Anesthesiol*. 2013;64(5):402-406.
|
|
279
|
+
DOI: 10.4097/kjae.2013.64.5.402
|
|
280
|
+
|
|
281
|
+
3. Sterne JAC, White IR, Carlin JB, et al. Multiple imputation for missing data
|
|
282
|
+
in epidemiological and clinical research: potential and pitfalls.
|
|
283
|
+
*BMJ*. 2009;338:b2393.
|
|
284
|
+
DOI: 10.1136/bmj.b2393
|
|
285
|
+
|
|
286
|
+
4. Altman DG, Bland JM. Missing data. *BMJ*. 2007;334(7590):424.
|
|
287
|
+
DOI: 10.1136/bmj.38977.682025.2C
|
|
288
|
+
|
|
289
|
+
5. White IR, Royston P, Wood AM. Multiple imputation using chained equations:
|
|
290
|
+
Issues and guidance for practice. *Stat Med*. 2011;30(4):377-399.
|
|
291
|
+
DOI: 10.1002/sim.4067
|
|
292
|
+
|
|
293
|
+
6. Ziemann M, Eren Y, El-Osta A. Gene name errors are widespread in the
|
|
294
|
+
scientific literature. *Genome Biol*. 2016;17(1):177.
|
|
295
|
+
DOI: 10.1186/s13059-016-1044-7
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
*This reference is part of the clean-data skill for the medical-research-skills package.*
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Data Profiling Template for Clinical Research Datasets
|
|
4
|
+
======================================================
|
|
5
|
+
|
|
6
|
+
Generates a structured profile of a CSV or Excel dataset.
|
|
7
|
+
Outputs a summary table to the console and saves it as CSV.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python profiling_template.py <file_path> [--output <output_dir>]
|
|
11
|
+
|
|
12
|
+
Requirements:
|
|
13
|
+
- pandas
|
|
14
|
+
- numpy
|
|
15
|
+
- matplotlib, seaborn (optional, for plots)
|
|
16
|
+
|
|
17
|
+
This script does NOT modify the input data. It is read-only.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import os
|
|
22
|
+
import sys
|
|
23
|
+
import random
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
import pandas as pd
|
|
28
|
+
|
|
29
|
+
# Reproducibility
|
|
30
|
+
np.random.seed(42)
|
|
31
|
+
random.seed(42)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# 1. Data Loading
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
def load_data(file_path: str) -> pd.DataFrame:
|
|
39
|
+
"""Auto-detect CSV vs Excel and load into a DataFrame."""
|
|
40
|
+
path = Path(file_path)
|
|
41
|
+
ext = path.suffix.lower()
|
|
42
|
+
|
|
43
|
+
if ext in (".csv", ".tsv"):
|
|
44
|
+
sep = "\t" if ext == ".tsv" else ","
|
|
45
|
+
df = pd.read_csv(path, sep=sep, low_memory=False)
|
|
46
|
+
elif ext in (".xls", ".xlsx", ".xlsm"):
|
|
47
|
+
df = pd.read_excel(path, engine="openpyxl")
|
|
48
|
+
else:
|
|
49
|
+
raise ValueError(f"Unsupported file format: {ext}. Use CSV, TSV, or Excel.")
|
|
50
|
+
|
|
51
|
+
print(f"Loaded {len(df)} rows x {len(df.columns)} columns from {path.name}")
|
|
52
|
+
return df
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# 2. Variable Summary
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
def build_variable_summary(df: pd.DataFrame) -> pd.DataFrame:
|
|
60
|
+
"""Build a per-variable summary with type, missingness, and descriptive stats."""
|
|
61
|
+
records = []
|
|
62
|
+
|
|
63
|
+
for col in df.columns:
|
|
64
|
+
series = df[col]
|
|
65
|
+
n_missing = int(series.isna().sum())
|
|
66
|
+
pct_missing = round(100 * n_missing / len(df), 2) if len(df) > 0 else 0.0
|
|
67
|
+
n_unique = int(series.nunique(dropna=True))
|
|
68
|
+
inferred_type = _infer_variable_type(series)
|
|
69
|
+
|
|
70
|
+
rec = {
|
|
71
|
+
"variable": col,
|
|
72
|
+
"dtype": str(series.dtype),
|
|
73
|
+
"inferred_type": inferred_type,
|
|
74
|
+
"n_total": len(df),
|
|
75
|
+
"n_missing": n_missing,
|
|
76
|
+
"pct_missing": pct_missing,
|
|
77
|
+
"n_unique": n_unique,
|
|
78
|
+
"min": None,
|
|
79
|
+
"max": None,
|
|
80
|
+
"mean": None,
|
|
81
|
+
"median": None,
|
|
82
|
+
"sd": None,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Numeric descriptive statistics
|
|
86
|
+
if inferred_type == "numeric":
|
|
87
|
+
numeric = pd.to_numeric(series, errors="coerce")
|
|
88
|
+
rec["min"] = round(float(numeric.min()), 4) if numeric.notna().any() else None
|
|
89
|
+
rec["max"] = round(float(numeric.max()), 4) if numeric.notna().any() else None
|
|
90
|
+
rec["mean"] = round(float(numeric.mean()), 4) if numeric.notna().any() else None
|
|
91
|
+
rec["median"] = round(float(numeric.median()), 4) if numeric.notna().any() else None
|
|
92
|
+
rec["sd"] = round(float(numeric.std()), 4) if numeric.notna().any() else None
|
|
93
|
+
|
|
94
|
+
records.append(rec)
|
|
95
|
+
|
|
96
|
+
summary = pd.DataFrame(records)
|
|
97
|
+
return summary
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _infer_variable_type(series: pd.Series) -> str:
|
|
101
|
+
"""Heuristic type inference: numeric, categorical, datetime, or text."""
|
|
102
|
+
if pd.api.types.is_numeric_dtype(series):
|
|
103
|
+
return "numeric"
|
|
104
|
+
if pd.api.types.is_datetime64_any_dtype(series):
|
|
105
|
+
return "datetime"
|
|
106
|
+
|
|
107
|
+
# Try to parse as numeric (catches numeric-stored-as-string)
|
|
108
|
+
coerced = pd.to_numeric(series.dropna(), errors="coerce")
|
|
109
|
+
if coerced.notna().sum() > 0.8 * series.dropna().shape[0]:
|
|
110
|
+
return "numeric"
|
|
111
|
+
|
|
112
|
+
# Try to parse as datetime
|
|
113
|
+
try:
|
|
114
|
+
parsed = pd.to_datetime(series.dropna(), infer_datetime_format=True, errors="coerce")
|
|
115
|
+
if parsed.notna().sum() > 0.8 * series.dropna().shape[0]:
|
|
116
|
+
return "datetime"
|
|
117
|
+
except Exception:
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
# Categorical vs free text heuristic
|
|
121
|
+
n_unique = series.nunique(dropna=True)
|
|
122
|
+
n_rows = len(series.dropna())
|
|
123
|
+
if n_rows > 0 and n_unique / n_rows < 0.05:
|
|
124
|
+
return "categorical"
|
|
125
|
+
if n_unique <= 20:
|
|
126
|
+
return "categorical"
|
|
127
|
+
|
|
128
|
+
return "text"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
# 3. Flag Detection
|
|
133
|
+
# ---------------------------------------------------------------------------
|
|
134
|
+
|
|
135
|
+
def flag_missing(summary: pd.DataFrame, threshold: float = 5.0) -> pd.DataFrame:
|
|
136
|
+
"""Flag variables with missing percentage above the threshold."""
|
|
137
|
+
flagged = summary[summary["pct_missing"] > threshold].copy()
|
|
138
|
+
flagged["issue"] = "Missing > " + str(threshold) + "%"
|
|
139
|
+
flagged["severity"] = flagged["pct_missing"].apply(
|
|
140
|
+
lambda x: "High" if x > 30 else ("Medium" if x > 10 else "Low")
|
|
141
|
+
)
|
|
142
|
+
return flagged[["variable", "issue", "n_missing", "pct_missing", "severity"]]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def flag_outliers_iqr(df: pd.DataFrame, summary: pd.DataFrame) -> pd.DataFrame:
|
|
146
|
+
"""Flag numeric variables with outliers using the IQR method."""
|
|
147
|
+
results = []
|
|
148
|
+
numeric_vars = summary[summary["inferred_type"] == "numeric"]["variable"].tolist()
|
|
149
|
+
|
|
150
|
+
for col in numeric_vars:
|
|
151
|
+
numeric = pd.to_numeric(df[col], errors="coerce").dropna()
|
|
152
|
+
if len(numeric) < 10:
|
|
153
|
+
continue
|
|
154
|
+
q1 = numeric.quantile(0.25)
|
|
155
|
+
q3 = numeric.quantile(0.75)
|
|
156
|
+
iqr = q3 - q1
|
|
157
|
+
if iqr == 0:
|
|
158
|
+
continue
|
|
159
|
+
lower = q1 - 1.5 * iqr
|
|
160
|
+
upper = q3 + 1.5 * iqr
|
|
161
|
+
outliers = numeric[(numeric < lower) | (numeric > upper)]
|
|
162
|
+
if len(outliers) > 0:
|
|
163
|
+
results.append({
|
|
164
|
+
"variable": col,
|
|
165
|
+
"issue": f"Outlier (IQR): {len(outliers)} values outside [{lower:.2f}, {upper:.2f}]",
|
|
166
|
+
"count": len(outliers),
|
|
167
|
+
"severity": "Medium",
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
return pd.DataFrame(results) if results else pd.DataFrame(
|
|
171
|
+
columns=["variable", "issue", "count", "severity"]
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
# 4. Distribution Plots (optional)
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
def plot_distributions(df: pd.DataFrame, summary: pd.DataFrame, output_dir: str):
|
|
180
|
+
"""Generate histograms for numeric and bar charts for categorical variables."""
|
|
181
|
+
try:
|
|
182
|
+
import matplotlib
|
|
183
|
+
matplotlib.use("Agg")
|
|
184
|
+
import matplotlib.pyplot as plt
|
|
185
|
+
import seaborn as sns
|
|
186
|
+
except ImportError:
|
|
187
|
+
print("[INFO] matplotlib/seaborn not installed. Skipping distribution plots.")
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
plot_dir = os.path.join(output_dir, "profile_plots")
|
|
191
|
+
os.makedirs(plot_dir, exist_ok=True)
|
|
192
|
+
|
|
193
|
+
# Numeric histograms
|
|
194
|
+
numeric_vars = summary[summary["inferred_type"] == "numeric"]["variable"].tolist()
|
|
195
|
+
for col in numeric_vars[:20]: # Limit to first 20 to avoid excessive plots
|
|
196
|
+
fig, ax = plt.subplots(figsize=(6, 4))
|
|
197
|
+
numeric = pd.to_numeric(df[col], errors="coerce").dropna()
|
|
198
|
+
if len(numeric) == 0:
|
|
199
|
+
plt.close(fig)
|
|
200
|
+
continue
|
|
201
|
+
ax.hist(numeric, bins=30, edgecolor="black", alpha=0.7)
|
|
202
|
+
ax.set_title(f"Distribution: {col}")
|
|
203
|
+
ax.set_xlabel(col)
|
|
204
|
+
ax.set_ylabel("Frequency")
|
|
205
|
+
fig.tight_layout()
|
|
206
|
+
fig.savefig(os.path.join(plot_dir, f"hist_{col}.png"), dpi=100)
|
|
207
|
+
plt.close(fig)
|
|
208
|
+
|
|
209
|
+
# Categorical bar charts
|
|
210
|
+
cat_vars = summary[summary["inferred_type"] == "categorical"]["variable"].tolist()
|
|
211
|
+
for col in cat_vars[:20]:
|
|
212
|
+
fig, ax = plt.subplots(figsize=(6, 4))
|
|
213
|
+
counts = df[col].value_counts().head(15)
|
|
214
|
+
counts.plot(kind="barh", ax=ax, color="steelblue", edgecolor="black")
|
|
215
|
+
ax.set_title(f"Categories: {col}")
|
|
216
|
+
ax.set_xlabel("Count")
|
|
217
|
+
fig.tight_layout()
|
|
218
|
+
fig.savefig(os.path.join(plot_dir, f"bar_{col}.png"), dpi=100)
|
|
219
|
+
plt.close(fig)
|
|
220
|
+
|
|
221
|
+
print(f"[INFO] Distribution plots saved to {plot_dir}/")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# ---------------------------------------------------------------------------
|
|
225
|
+
# 5. Report Output
|
|
226
|
+
# ---------------------------------------------------------------------------
|
|
227
|
+
|
|
228
|
+
def print_summary(summary: pd.DataFrame):
|
|
229
|
+
"""Print a formatted summary table to the console."""
|
|
230
|
+
display_cols = [
|
|
231
|
+
"variable", "inferred_type", "n_missing", "pct_missing",
|
|
232
|
+
"n_unique", "min", "max", "mean", "median", "sd"
|
|
233
|
+
]
|
|
234
|
+
print("\n" + "=" * 80)
|
|
235
|
+
print("VARIABLE SUMMARY")
|
|
236
|
+
print("=" * 80)
|
|
237
|
+
print(summary[display_cols].to_string(index=False))
|
|
238
|
+
print("=" * 80)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def save_outputs(summary: pd.DataFrame, flags_missing: pd.DataFrame,
|
|
242
|
+
flags_outlier: pd.DataFrame, output_dir: str):
|
|
243
|
+
"""Save profiling results as CSV files."""
|
|
244
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
245
|
+
|
|
246
|
+
summary_path = os.path.join(output_dir, "variable_summary.csv")
|
|
247
|
+
summary.to_csv(summary_path, index=False)
|
|
248
|
+
print(f"[SAVED] Variable summary -> {summary_path}")
|
|
249
|
+
|
|
250
|
+
if len(flags_missing) > 0:
|
|
251
|
+
missing_path = os.path.join(output_dir, "flags_missing.csv")
|
|
252
|
+
flags_missing.to_csv(missing_path, index=False)
|
|
253
|
+
print(f"[SAVED] Missing flags -> {missing_path}")
|
|
254
|
+
|
|
255
|
+
if len(flags_outlier) > 0:
|
|
256
|
+
outlier_path = os.path.join(output_dir, "flags_outliers.csv")
|
|
257
|
+
flags_outlier.to_csv(outlier_path, index=False)
|
|
258
|
+
print(f"[SAVED] Outlier flags -> {outlier_path}")
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# ---------------------------------------------------------------------------
|
|
262
|
+
# Main
|
|
263
|
+
# ---------------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
def main():
|
|
266
|
+
parser = argparse.ArgumentParser(description="Profile a clinical research dataset.")
|
|
267
|
+
parser.add_argument("file_path", help="Path to CSV or Excel file")
|
|
268
|
+
parser.add_argument("--output", default="./data_profile",
|
|
269
|
+
help="Output directory for profiling results (default: ./data_profile)")
|
|
270
|
+
parser.add_argument("--no-plots", action="store_true",
|
|
271
|
+
help="Skip distribution plots")
|
|
272
|
+
args = parser.parse_args()
|
|
273
|
+
|
|
274
|
+
# Load
|
|
275
|
+
df = load_data(args.file_path)
|
|
276
|
+
|
|
277
|
+
# Profile
|
|
278
|
+
summary = build_variable_summary(df)
|
|
279
|
+
print_summary(summary)
|
|
280
|
+
|
|
281
|
+
# Flag
|
|
282
|
+
flags_missing = flag_missing(summary, threshold=5.0)
|
|
283
|
+
flags_outlier = flag_outliers_iqr(df, summary)
|
|
284
|
+
|
|
285
|
+
if len(flags_missing) > 0:
|
|
286
|
+
print("\n[FLAGS] Variables with >5% missing:")
|
|
287
|
+
print(flags_missing.to_string(index=False))
|
|
288
|
+
|
|
289
|
+
if len(flags_outlier) > 0:
|
|
290
|
+
print("\n[FLAGS] Variables with IQR outliers:")
|
|
291
|
+
print(flags_outlier.to_string(index=False))
|
|
292
|
+
|
|
293
|
+
# Save
|
|
294
|
+
save_outputs(summary, flags_missing, flags_outlier, args.output)
|
|
295
|
+
|
|
296
|
+
# Plot (optional)
|
|
297
|
+
if not args.no_plots:
|
|
298
|
+
plot_distributions(df, summary, args.output)
|
|
299
|
+
|
|
300
|
+
print("\n[DONE] Profiling complete. Review outputs before proceeding to cleaning.")
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
if __name__ == "__main__":
|
|
304
|
+
main()
|