medsci-skills 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +50 -0
- package/README.md +602 -0
- package/README_FIRST.md +27 -0
- package/bin/medsci-skills.js +159 -0
- package/installers/install-macos.command +19 -0
- package/installers/install-windows.cmd +26 -0
- package/installers/install-windows.ps1 +17 -0
- package/installers/install.py +218 -0
- package/metadata/skills_catalog.json +452 -0
- package/package.json +48 -0
- package/skills/academic-aio/SKILL.md +408 -0
- package/skills/academic-aio/references/case_studies/kjr_mllm_2025.md +82 -0
- package/skills/academic-aio/references/checklists/AIO_GENERAL.md +354 -0
- package/skills/academic-aio/references/journal_summarybox_templates.yaml +126 -0
- package/skills/academic-aio/references/oac_funding_checklist.yaml +129 -0
- package/skills/academic-aio/references/reporting_guideline_mapping.md +39 -0
- package/skills/academic-aio/references/schema_markup_templates/CodeRepository.jsonld +32 -0
- package/skills/academic-aio/references/schema_markup_templates/Dataset.jsonld +36 -0
- package/skills/academic-aio/references/schema_markup_templates/Person.jsonld +30 -0
- package/skills/academic-aio/references/schema_markup_templates/README.md +43 -0
- package/skills/academic-aio/references/schema_markup_templates/ScholarlyArticle.jsonld +55 -0
- package/skills/academic-aio/scripts/batch_metadata_audit.py +169 -0
- package/skills/academic-aio/scripts/validate_schema.py +118 -0
- package/skills/academic-aio/skill.yml +36 -0
- package/skills/academic-aio/templates/aio_audit_checklist.md.j2 +108 -0
- package/skills/add-journal/SKILL.md +482 -0
- package/skills/add-journal/skill.yml +33 -0
- package/skills/analyze-stats/SKILL.md +598 -0
- package/skills/analyze-stats/references/analysis_guides/missing_data.md +109 -0
- package/skills/analyze-stats/references/analysis_guides/nhis_icd10_mapping.md +247 -0
- package/skills/analyze-stats/references/analysis_guides/propensity_score.md +132 -0
- package/skills/analyze-stats/references/analysis_guides/regression.md +115 -0
- package/skills/analyze-stats/references/analysis_guides/repeated_measures.md +160 -0
- package/skills/analyze-stats/references/analysis_guides/survey_weighted.md +366 -0
- package/skills/analyze-stats/references/analysis_guides/test_selection.md +86 -0
- package/skills/analyze-stats/references/style/figure_style.mplstyle +69 -0
- package/skills/analyze-stats/references/style/theme_publication.R +147 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/ajr.yaml +51 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/european_radiology.yaml +55 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/jama.yaml +66 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/lancet.yaml +57 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/nejm.yaml +51 -0
- package/skills/analyze-stats/references/table-standards/journal-profiles/radiology.yaml +66 -0
- package/skills/analyze-stats/references/table-standards/table-standards.md +287 -0
- package/skills/analyze-stats/references/table-standards/table-types/diagnostic_accuracy.md +36 -0
- package/skills/analyze-stats/references/table-standards/table-types/meta_analysis.md +58 -0
- package/skills/analyze-stats/references/table-standards/table-types/model_comparison.md +36 -0
- package/skills/analyze-stats/references/table-standards/table-types/regression_results.md +50 -0
- package/skills/analyze-stats/references/table-standards/table-types/table1_demographics.md +51 -0
- package/skills/analyze-stats/references/table-standards/tool-comparison.md +79 -0
- package/skills/analyze-stats/references/templates/agreement_analysis.py +436 -0
- package/skills/analyze-stats/references/templates/dca_plot.R +237 -0
- package/skills/analyze-stats/references/templates/diagnostic_accuracy.py +401 -0
- package/skills/analyze-stats/references/templates/dta_meta_analysis.R +384 -0
- package/skills/analyze-stats/references/templates/forest_plot.py +412 -0
- package/skills/analyze-stats/references/templates/likert_summary.py +356 -0
- package/skills/analyze-stats/references/templates/meta_analysis.R +365 -0
- package/skills/analyze-stats/references/templates/propensity_score.py +478 -0
- package/skills/analyze-stats/references/templates/regression.py +425 -0
- package/skills/analyze-stats/references/templates/repeated_measures.py +434 -0
- package/skills/analyze-stats/references/templates/sample_size.R +382 -0
- package/skills/analyze-stats/references/templates/survey_weighted_analysis.py +411 -0
- package/skills/analyze-stats/references/templates/survival_analysis.py +325 -0
- package/skills/analyze-stats/references/templates/table1_demographics.py +287 -0
- package/skills/analyze-stats/scripts/check_generated_code.py +335 -0
- package/skills/analyze-stats/skill.yml +38 -0
- package/skills/analyze-stats/tests/fixtures/gen_bad.R +16 -0
- package/skills/analyze-stats/tests/fixtures/gen_bad.py +24 -0
- package/skills/analyze-stats/tests/fixtures/gen_clean.py +21 -0
- package/skills/analyze-stats/tests/test_generated_code.sh +59 -0
- package/skills/analyze-stats/tests/test_survival_template.sh +53 -0
- package/skills/author-strategy/SKILL.md +117 -0
- package/skills/author-strategy/analyze_patterns.py +303 -0
- package/skills/author-strategy/fetch_pubmed.py +374 -0
- package/skills/author-strategy/skill.yml +34 -0
- package/skills/batch-cohort/SKILL.md +223 -0
- package/skills/batch-cohort/references/base_template_knhanes.R +210 -0
- package/skills/batch-cohort/references/batch_template_generator.R +222 -0
- package/skills/batch-cohort/references/variable_coding_registry.md +136 -0
- package/skills/batch-cohort/skill.yml +35 -0
- package/skills/calc-sample-size/SKILL.md +491 -0
- package/skills/calc-sample-size/references/formulas.md +655 -0
- package/skills/calc-sample-size/references/observational_cohort.md +49 -0
- package/skills/calc-sample-size/skill.yml +51 -0
- package/skills/check-reporting/SKILL.md +534 -0
- package/skills/check-reporting/references/LICENSES.md +41 -0
- package/skills/check-reporting/references/checklists/AMSTAR2.md +54 -0
- package/skills/check-reporting/references/checklists/ARRIVE_2.md +234 -0
- package/skills/check-reporting/references/checklists/CARE.md +102 -0
- package/skills/check-reporting/references/checklists/CLAIM_2024.md +128 -0
- package/skills/check-reporting/references/checklists/CLEAR.md +113 -0
- package/skills/check-reporting/references/checklists/CONSORT.md +86 -0
- package/skills/check-reporting/references/checklists/COSMIN_RoB.md +136 -0
- package/skills/check-reporting/references/checklists/GRRAS.md +61 -0
- package/skills/check-reporting/references/checklists/MI_CLEAR_LLM.md +167 -0
- package/skills/check-reporting/references/checklists/MOOSE.md +85 -0
- package/skills/check-reporting/references/checklists/NOS.md +88 -0
- package/skills/check-reporting/references/checklists/PRISMA_2020.md +135 -0
- package/skills/check-reporting/references/checklists/PRISMA_DTA.md +36 -0
- package/skills/check-reporting/references/checklists/PRISMA_P.md +56 -0
- package/skills/check-reporting/references/checklists/PROBAST.md +75 -0
- package/skills/check-reporting/references/checklists/PROBAST_AI.md +130 -0
- package/skills/check-reporting/references/checklists/QUADAS2.md +77 -0
- package/skills/check-reporting/references/checklists/QUADAS_C.md +131 -0
- package/skills/check-reporting/references/checklists/ROBINS_E.md +179 -0
- package/skills/check-reporting/references/checklists/ROBINS_I.md +87 -0
- package/skills/check-reporting/references/checklists/ROBIS.md +114 -0
- package/skills/check-reporting/references/checklists/ROB_ME.md +126 -0
- package/skills/check-reporting/references/checklists/RoB2.md +79 -0
- package/skills/check-reporting/references/checklists/RoB_NMA.md +96 -0
- package/skills/check-reporting/references/checklists/SPIRIT.md +112 -0
- package/skills/check-reporting/references/checklists/SQUIRE_2.md +68 -0
- package/skills/check-reporting/references/checklists/STARD.md +129 -0
- package/skills/check-reporting/references/checklists/STARD_AI.md +211 -0
- package/skills/check-reporting/references/checklists/STROBE.md +80 -0
- package/skills/check-reporting/references/checklists/SWiM.md +33 -0
- package/skills/check-reporting/references/checklists/TRIPOD.md +157 -0
- package/skills/check-reporting/references/checklists/TRIPOD_AI.md +140 -0
- package/skills/check-reporting/references/step4c_registration_timing.md +93 -0
- package/skills/check-reporting/references/step4d_prisma_figure_audit.md +137 -0
- package/skills/check-reporting/scripts/check_checklist_exists.py +183 -0
- package/skills/check-reporting/scripts/check_checklist_version.py +168 -0
- package/skills/check-reporting/scripts/check_framework_naming.py +206 -0
- package/skills/check-reporting/scripts/check_prisma_figure.py +209 -0
- package/skills/check-reporting/scripts/prisma_cascade_check.py +274 -0
- package/skills/check-reporting/skill.yml +41 -0
- package/skills/check-reporting/tests/fixtures/framework_bad.md +8 -0
- package/skills/check-reporting/tests/fixtures/framework_clean.md +7 -0
- package/skills/check-reporting/tests/test_checklist_fail_fast.sh +77 -0
- package/skills/check-reporting/tests/test_checklist_version.sh +72 -0
- package/skills/check-reporting/tests/test_framework_naming.sh +45 -0
- package/skills/check-reporting/tests/test_prisma_cascade.sh +104 -0
- package/skills/clean-data/SKILL.md +180 -0
- package/skills/clean-data/references/cleaning_patterns.md +299 -0
- package/skills/clean-data/references/profiling_template.py +304 -0
- package/skills/clean-data/scripts/check_structural_zero.py +174 -0
- package/skills/clean-data/skill.yml +35 -0
- package/skills/clean-data/tests/fixtures/smoking.csv +8 -0
- package/skills/clean-data/tests/test_structural_zero.sh +49 -0
- package/skills/cross-national/SKILL.md +264 -0
- package/skills/cross-national/skill.yml +37 -0
- package/skills/define-variables/SKILL.md +146 -0
- package/skills/define-variables/references/common_definitions.md +190 -0
- package/skills/define-variables/skill.yml +34 -0
- package/skills/define-variables/templates/variable_operationalization.md +64 -0
- package/skills/deidentify/SKILL.md +203 -0
- package/skills/deidentify/deidentify.py +1224 -0
- package/skills/deidentify/locales/_template.json +45 -0
- package/skills/deidentify/locales/au.json +43 -0
- package/skills/deidentify/locales/ca.json +44 -0
- package/skills/deidentify/locales/cn.json +47 -0
- package/skills/deidentify/locales/de.json +48 -0
- package/skills/deidentify/locales/fr.json +48 -0
- package/skills/deidentify/locales/in.json +48 -0
- package/skills/deidentify/locales/jp.json +48 -0
- package/skills/deidentify/locales/kr.json +48 -0
- package/skills/deidentify/locales/uk.json +45 -0
- package/skills/deidentify/locales/us.json +43 -0
- package/skills/deidentify/references/date_shift_guide.md +82 -0
- package/skills/deidentify/references/hipaa_18_identifiers.md +48 -0
- package/skills/deidentify/references/korean_phi_patterns.md +135 -0
- package/skills/deidentify/skill.yml +43 -0
- package/skills/deidentify/tests/README.md +26 -0
- package/skills/deidentify/tests/test_clean.csv +16 -0
- package/skills/deidentify/tests/test_edge_cases.csv +11 -0
- package/skills/deidentify/tests/test_phi_korean.csv +11 -0
- package/skills/design-ai-benchmarking/SKILL.md +214 -0
- package/skills/design-ai-benchmarking/references/benchmark_export_schema.json +69 -0
- package/skills/design-ai-benchmarking/references/elicitation_rubric_template.md +37 -0
- package/skills/design-ai-benchmarking/skill.yml +38 -0
- package/skills/design-study/SKILL.md +298 -0
- package/skills/design-study/skill.yml +33 -0
- package/skills/fill-icmje-coi/SKILL.md +216 -0
- package/skills/fill-icmje-coi/scripts/fill_icmje_coi.py +140 -0
- package/skills/fill-icmje-coi/skill.yml +35 -0
- package/skills/fill-icmje-coi/templates/icmje_coi_seed_synthetic.docx +0 -0
- package/skills/fill-protocol/SKILL.md +248 -0
- package/skills/fill-protocol/examples/example_irb_template.yaml +53 -0
- package/skills/fill-protocol/references/best_practices.md +121 -0
- package/skills/fill-protocol/scripts/doc_to_docx.py +111 -0
- package/skills/fill-protocol/scripts/fill_form.py +611 -0
- package/skills/fill-protocol/scripts/inspect_template.py +61 -0
- package/skills/fill-protocol/setup.sh +162 -0
- package/skills/fill-protocol/skill.yml +37 -0
- package/skills/find-cohort-gap/SKILL.md +309 -0
- package/skills/find-cohort-gap/references/cohort_profile_template.md +93 -0
- package/skills/find-cohort-gap/references/onepager_template.md +84 -0
- package/skills/find-cohort-gap/references/pattern_scoring_rubric.md +169 -0
- package/skills/find-cohort-gap/references/saturation_query_templates.md +143 -0
- package/skills/find-cohort-gap/skill.yml +35 -0
- package/skills/find-journal/POLICY.md +87 -0
- package/skills/find-journal/SKILL.md +340 -0
- package/skills/find-journal/references/journal_profiles/AJNR.md +29 -0
- package/skills/find-journal/references/journal_profiles/AJR.md +30 -0
- package/skills/find-journal/references/journal_profiles/Abdominal_Radiology.md +30 -0
- package/skills/find-journal/references/journal_profiles/Academic_Radiology.md +30 -0
- package/skills/find-journal/references/journal_profiles/Annals_of_Internal_Medicine.md +33 -0
- package/skills/find-journal/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +28 -0
- package/skills/find-journal/references/journal_profiles/BMC_Medicine.md +31 -0
- package/skills/find-journal/references/journal_profiles/British_Journal_of_Radiology.md +39 -0
- package/skills/find-journal/references/journal_profiles/CVIR.md +30 -0
- package/skills/find-journal/references/journal_profiles/Chest.md +39 -0
- package/skills/find-journal/references/journal_profiles/Clinical_Radiology.md +30 -0
- package/skills/find-journal/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +32 -0
- package/skills/find-journal/references/journal_profiles/Diabetes_Metabolism_Journal.md +36 -0
- package/skills/find-journal/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +32 -0
- package/skills/find-journal/references/journal_profiles/Endocrinology_and_Metabolism.md +37 -0
- package/skills/find-journal/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +39 -0
- package/skills/find-journal/references/journal_profiles/European_Radiology.md +29 -0
- package/skills/find-journal/references/journal_profiles/Hepatology_Communications.md +40 -0
- package/skills/find-journal/references/journal_profiles/Hepatology_International.md +37 -0
- package/skills/find-journal/references/journal_profiles/IEEE_JBHI.md +28 -0
- package/skills/find-journal/references/journal_profiles/IEEE_TMI.md +28 -0
- package/skills/find-journal/references/journal_profiles/INSI.md +29 -0
- package/skills/find-journal/references/journal_profiles/Investigative_Radiology.md +25 -0
- package/skills/find-journal/references/journal_profiles/JACC_Advances.md +41 -0
- package/skills/find-journal/references/journal_profiles/JACC_Asia.md +30 -0
- package/skills/find-journal/references/journal_profiles/JACR.md +28 -0
- package/skills/find-journal/references/journal_profiles/JAMA.md +40 -0
- package/skills/find-journal/references/journal_profiles/JAMA_Network_Open.md +30 -0
- package/skills/find-journal/references/journal_profiles/JCSM.md +39 -0
- package/skills/find-journal/references/journal_profiles/JKMS.md +32 -0
- package/skills/find-journal/references/journal_profiles/JMIR.md +29 -0
- package/skills/find-journal/references/journal_profiles/JMIR_Medical_Education.md +29 -0
- package/skills/find-journal/references/journal_profiles/JNIS.md +35 -0
- package/skills/find-journal/references/journal_profiles/JVIR.md +31 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Biomedical_Informatics.md +29 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +40 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Magnetic_Resonance_Imaging.md +30 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Nuclear_Medicine.md +31 -0
- package/skills/find-journal/references/journal_profiles/Journal_of_Stroke.md +32 -0
- package/skills/find-journal/references/journal_profiles/KJR.md +38 -0
- package/skills/find-journal/references/journal_profiles/Korean_Circulation_Journal.md +38 -0
- package/skills/find-journal/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +36 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Diabetes_and_Endocrinology.md +40 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +49 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Infectious_Diseases.md +38 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Neurology.md +39 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Oncology.md +40 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Psychiatry.md +38 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Public_Health.md +30 -0
- package/skills/find-journal/references/journal_profiles/Lancet_Respiratory_Medicine.md +39 -0
- package/skills/find-journal/references/journal_profiles/Liver_International.md +33 -0
- package/skills/find-journal/references/journal_profiles/Medical_Image_Analysis.md +28 -0
- package/skills/find-journal/references/journal_profiles/NEJM.md +33 -0
- package/skills/find-journal/references/journal_profiles/Nature_Machine_Intelligence.md +31 -0
- package/skills/find-journal/references/journal_profiles/Nature_Medicine.md +39 -0
- package/skills/find-journal/references/journal_profiles/Neuroradiology.md +31 -0
- package/skills/find-journal/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +39 -0
- package/skills/find-journal/references/journal_profiles/PLOS_Medicine.md +32 -0
- package/skills/find-journal/references/journal_profiles/RYAI.md +28 -0
- package/skills/find-journal/references/journal_profiles/Radiology.md +29 -0
- package/skills/find-journal/references/journal_profiles/Skeletal_Radiology.md +31 -0
- package/skills/find-journal/references/journal_profiles/Stroke.md +37 -0
- package/skills/find-journal/references/journal_profiles/The_BMJ.md +31 -0
- package/skills/find-journal/references/journal_profiles/The_Lancet.md +31 -0
- package/skills/find-journal/references/journal_profiles/The_Lancet_Digital_Health.md +29 -0
- package/skills/find-journal/references/journal_profiles/World_Journal_of_Hepatology.md +53 -0
- package/skills/find-journal/references/journal_profiles/npj_Digital_Medicine.md +29 -0
- package/skills/find-journal/skill.yml +34 -0
- package/skills/fulltext-retrieval/SKILL.md +174 -0
- package/skills/fulltext-retrieval/fetch_oa.py +433 -0
- package/skills/fulltext-retrieval/pdf_to_md.py +160 -0
- package/skills/fulltext-retrieval/skill.yml +41 -0
- package/skills/generate-codebook/SKILL.md +155 -0
- package/skills/generate-codebook/references/codebook_schema.md +76 -0
- package/skills/generate-codebook/scripts/generate_codebook.py +278 -0
- package/skills/generate-codebook/skill.yml +35 -0
- package/skills/generate-codebook/tests/test_generate_codebook.sh +76 -0
- package/skills/grant-builder/SKILL.md +251 -0
- package/skills/grant-builder/skill.yml +34 -0
- package/skills/humanize/SKILL.md +251 -0
- package/skills/humanize/references/ai_patterns.md +571 -0
- package/skills/humanize/skill.yml +33 -0
- package/skills/intake-project/SKILL.md +264 -0
- package/skills/intake-project/skill.yml +34 -0
- package/skills/lit-sync/SKILL.md +448 -0
- package/skills/lit-sync/references/locale/ko/note_templates.md +110 -0
- package/skills/lit-sync/skill.yml +52 -0
- package/skills/lit-sync/tests/test_poll_logic.sh +92 -0
- package/skills/ma-scout/SKILL.md +640 -0
- package/skills/ma-scout/references/project_readme_template.md +95 -0
- package/skills/ma-scout/references/project_readme_template_ko.md +82 -0
- package/skills/ma-scout/skill.yml +33 -0
- package/skills/make-figures/SKILL.md +957 -0
- package/skills/make-figures/references/critic_rubrics/data_plot.md +166 -0
- package/skills/make-figures/references/critic_rubrics/flow_diagram.md +169 -0
- package/skills/make-figures/references/design_principles.md +181 -0
- package/skills/make-figures/references/exemplar_diagrams/README.md +65 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/README.md +15 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/template_input.yaml +37 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/template_output.pdf +0 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/template_output.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/consort/template_output_600.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/other/other_02.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/other/other_02.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/other/other_02_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/README.md +15 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_01_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_03_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_04_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_05_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_06_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_07_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_08_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_09_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.meta.yaml +4 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/pipeline/pipeline_10_why.md +13 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/README.md +15 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/template_input.yaml +47 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.pdf +0 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/template_output.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/prisma/template_output_600.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/README.md +15 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/template_input.yaml +40 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/template_output.pdf +0 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/template_output.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/stard/template_output_600.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_input.yaml +43 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_input_pptx.yaml +43 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pdf +0 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.png +0 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_output.pptx +0 -0
- package/skills/make-figures/references/exemplar_diagrams/strobe/template_output_600.png +0 -0
- package/skills/make-figures/references/figure_specs.md +291 -0
- package/skills/make-figures/references/flow_diagram_lessons.md +164 -0
- package/skills/make-figures/references/jacc_central_illustration_principles.md +91 -0
- package/skills/make-figures/references/medical_illustration_sources.md +98 -0
- package/skills/make-figures/references/pipeline_concepts_medical_ai.md +240 -0
- package/skills/make-figures/references/reporting_guideline_figure_map.md +104 -0
- package/skills/make-figures/references/visual_abstract_templates/european_radiology.pptx +0 -0
- package/skills/make-figures/references/visual_abstract_templates/jacc_central_illustration.pptx +0 -0
- package/skills/make-figures/references/visual_abstract_templates/medsci_default.pptx +0 -0
- package/skills/make-figures/references/visual_abstract_templates/template_guide.md +114 -0
- package/skills/make-figures/scripts/build_jacc_template.py +77 -0
- package/skills/make-figures/scripts/build_prisma2020_template.py +371 -0
- package/skills/make-figures/scripts/build_strobe_template.py +351 -0
- package/skills/make-figures/scripts/critic_figure.py +264 -0
- package/skills/make-figures/scripts/derive_figure_legend_counts.py +138 -0
- package/skills/make-figures/scripts/extract_exemplar_from_pdf.py +186 -0
- package/skills/make-figures/scripts/fetch_official_templates.sh +88 -0
- package/skills/make-figures/scripts/fill_prisma_template.py +142 -0
- package/skills/make-figures/scripts/generate_flow_diagram.R +133 -0
- package/skills/make-figures/scripts/generate_image.py +99 -0
- package/skills/make-figures/scripts/generate_visual_abstract.py +438 -0
- package/skills/make-figures/scripts/validate_pptx_mac_compat.py +233 -0
- package/skills/make-figures/skill.yml +52 -0
- package/skills/make-figures/templates/official/NOTES.md +62 -0
- package/skills/make-figures/templates/official/consort2010/CONSORT_2025_editable_checklist.docx +0 -0
- package/skills/make-figures/templates/official/consort2010/CONSORT_2025_flow_diagram.docx +0 -0
- package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v1.pptx +0 -0
- package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_new_v2.pptx +0 -0
- package/skills/make-figures/templates/official/prisma2020/PRISMA_2020_flow_updated_v2.pptx +0 -0
- package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_editable_checklist.docx +0 -0
- package/skills/make-figures/templates/official/spirit2013/SPIRIT_2025_participant_timeline.docx +0 -0
- package/skills/make-figures/templates/official/stard2015/STARD_2015_checklist.docx +0 -0
- package/skills/make-figures/templates/official/stard2015/STARD_2015_flow_diagram.pdf +0 -0
- package/skills/make-figures/tests/fixtures/figure1_flow.yaml +8 -0
- package/skills/make-figures/tests/fixtures/manuscript_ok.md +9 -0
- package/skills/make-figures/tests/fixtures/manuscript_stale.md +4 -0
- package/skills/make-figures/tests/test_legend_reconcile.sh +36 -0
- package/skills/manage-project/SKILL.md +358 -0
- package/skills/manage-project/references/pre_submission_checklist.md +53 -0
- package/skills/manage-project/references/project_state_template.json +37 -0
- package/skills/manage-project/references/scaffold_templates.md +118 -0
- package/skills/manage-project/references/status_output_format.md +44 -0
- package/skills/manage-project/references/timeline_example.md +20 -0
- package/skills/manage-project/skill.yml +36 -0
- package/skills/manage-project/templates/SSOT.yaml.template +41 -0
- package/skills/manage-refs/LICENSE.zotero-mcp +21 -0
- package/skills/manage-refs/NOTICE.md +29 -0
- package/skills/manage-refs/SKILL.md +289 -0
- package/skills/manage-refs/citation_styles/README.md +40 -0
- package/skills/manage-refs/citation_styles/american-journal-of-roentgenology.csl +211 -0
- package/skills/manage-refs/citation_styles/cardiovascular-and-interventional-radiology.csl +19 -0
- package/skills/manage-refs/citation_styles/european-radiology.csl +19 -0
- package/skills/manage-refs/citation_styles/journal-of-cachexia-sarcopenia-and-muscle.csl +150 -0
- package/skills/manage-refs/citation_styles/journal-of-korean-medical-science-strict.csl +533 -0
- package/skills/manage-refs/citation_styles/journal-of-korean-medical-science.csl +16 -0
- package/skills/manage-refs/citation_styles/korean-journal-of-radiology.csl +155 -0
- package/skills/manage-refs/citation_styles/nature.csl +189 -0
- package/skills/manage-refs/citation_styles/nlm-citation-sequence.csl +535 -0
- package/skills/manage-refs/citation_styles/radiology.csl +228 -0
- package/skills/manage-refs/citation_styles/springer-basic-brackets.csl +187 -0
- package/skills/manage-refs/citation_styles/springer-vancouver-brackets.csl +276 -0
- package/skills/manage-refs/citation_styles/vancouver-superscript.csl +536 -0
- package/skills/manage-refs/citation_styles/vancouver.csl +535 -0
- package/skills/manage-refs/references/REFERENCE_STYLE_SPECS.md +59 -0
- package/skills/manage-refs/references/check_xref_symptoms.md +35 -0
- package/skills/manage-refs/scripts/_vendor_citation_writer.py +600 -0
- package/skills/manage-refs/scripts/check_citation_keys.py +112 -0
- package/skills/manage-refs/scripts/check_csl_render.py +102 -0
- package/skills/manage-refs/scripts/check_xref.py +633 -0
- package/skills/manage-refs/scripts/fill_journal_abbrev.py +104 -0
- package/skills/manage-refs/scripts/inject_zotero_cwyw.py +133 -0
- package/skills/manage-refs/scripts/md_marker_convert.py +193 -0
- package/skills/manage-refs/scripts/pre_submission_gate.sh +238 -0
- package/skills/manage-refs/scripts/render_pandoc.sh +88 -0
- package/skills/manage-refs/skill.yml +70 -0
- package/skills/manage-refs/tests/fixtures/pre_submission_gate/README.md +32 -0
- package/skills/manage-refs/tests/fixtures/pre_submission_gate/manuscript.md +10 -0
- package/skills/manage-refs/tests/fixtures/pre_submission_gate/refs.bib +34 -0
- package/skills/manage-refs/tests/fixtures/pre_submission_gate/run.sh +117 -0
- package/skills/manage-refs/tests/test_vN_docx_check.sh +145 -0
- package/skills/meta-analysis/SKILL.md +739 -0
- package/skills/meta-analysis/references/LICENSES.md +21 -0
- package/skills/meta-analysis/references/PROSPERO_template.md +221 -0
- package/skills/meta-analysis/references/ai_pre_screening_template.py +245 -0
- package/skills/meta-analysis/references/checklists/JBI_Case_Series.md +45 -0
- package/skills/meta-analysis/references/checklists/NOS.md +88 -0
- package/skills/meta-analysis/references/checklists/PRISMA_DTA.md +36 -0
- package/skills/meta-analysis/references/checklists/PROBAST.md +75 -0
- package/skills/meta-analysis/references/checklists/QUADAS2.md +77 -0
- package/skills/meta-analysis/references/checklists/ROBINS_I.md +87 -0
- package/skills/meta-analysis/references/checklists/RoB2.md +79 -0
- package/skills/meta-analysis/references/data_integrity_checklist.md +57 -0
- package/skills/meta-analysis/references/icmje_coi_guide.md +181 -0
- package/skills/meta-analysis/references/phase10_recovery.md +136 -0
- package/skills/meta-analysis/references/phase4_km_composite.md +58 -0
- package/skills/meta-analysis/references/phase6_statistical_synthesis.md +148 -0
- package/skills/meta-analysis/references/phase9_circulation.md +84 -0
- package/skills/meta-analysis/references/post_submission_release_ops.md +41 -0
- package/skills/meta-analysis/references/r_templates.md +132 -0
- package/skills/meta-analysis/references/review_orchestration.md +40 -0
- package/skills/meta-analysis/references/submission_package_drift.md +71 -0
- package/skills/meta-analysis/scripts/check_pool_consistency.py +201 -0
- package/skills/meta-analysis/scripts/cohort_overlap_check.py +242 -0
- package/skills/meta-analysis/scripts/dta_extraction_qc.py +137 -0
- package/skills/meta-analysis/scripts/screening_reconcile.py +160 -0
- package/skills/meta-analysis/skill.yml +47 -0
- package/skills/meta-analysis/templates/FINAL_POOL_LOCK.yaml.template +70 -0
- package/skills/meta-analysis/templates/extraction_form_v2.md +129 -0
- package/skills/meta-analysis/templates/supplementary_8file_checklist.md +94 -0
- package/skills/meta-analysis/tests/test_pool_consistency.sh +123 -0
- package/skills/orchestrate/SKILL.md +501 -0
- package/skills/orchestrate/references/dialogue_nodes.md +196 -0
- package/skills/orchestrate/references/report_template.md +109 -0
- package/skills/orchestrate/references/report_template_ko.md +88 -0
- package/skills/orchestrate/skill.yml +44 -0
- package/skills/peer-review/SKILL.md +381 -0
- package/skills/peer-review/references/aczel_2021_reviewer2_patterns.md +88 -0
- package/skills/peer-review/references/domain-probes/ai_overclaiming.md +47 -0
- package/skills/peer-review/references/domain-probes/narrative_review.md +44 -0
- package/skills/peer-review/references/domain-probes/observational_confounding.md +48 -0
- package/skills/peer-review/references/domain-probes/radiomics.md +38 -0
- package/skills/peer-review/references/domain-probes/sr_ma.md +87 -0
- package/skills/peer-review/references/domain-probes/survival_prognostic.md +68 -0
- package/skills/peer-review/references/exemplar_reviews/README.md +43 -0
- package/skills/peer-review/references/exemplar_reviews/ai_overclaiming.md +47 -0
- package/skills/peer-review/references/exemplar_reviews/calibration_missing.md +44 -0
- package/skills/peer-review/references/exemplar_reviews/data_leakage.md +48 -0
- package/skills/peer-review/references/exemplar_reviews/reference_standard_validity.md +45 -0
- package/skills/peer-review/references/narrative_review_audit.md +67 -0
- package/skills/peer-review/references/reviewer_calibration/README.md +34 -0
- package/skills/peer-review/references/reviewer_calibration/compliance_floor.md +52 -0
- package/skills/peer-review/references/reviewer_profiles/AJR.md +82 -0
- package/skills/peer-review/references/reviewer_profiles/EURE.md +64 -0
- package/skills/peer-review/references/reviewer_profiles/INSI.md +57 -0
- package/skills/peer-review/references/reviewer_profiles/KJR.md +100 -0
- package/skills/peer-review/references/reviewer_profiles/README.md +32 -0
- package/skills/peer-review/references/reviewer_profiles/RYAI.md +86 -0
- package/skills/peer-review/skill.yml +39 -0
- package/skills/present-paper/SKILL.md +675 -0
- package/skills/present-paper/references/critic_rubrics/slide.md +155 -0
- package/skills/present-paper/references/generate_pptx_templates.py +604 -0
- package/skills/present-paper/references/medical_presentation_templates.md +277 -0
- package/skills/present-paper/references/slide_design_principles.md +202 -0
- package/skills/present-paper/references/slide_visual_styles/nature_lancet.md +168 -0
- package/skills/present-paper/references/workflow-checklist.md +109 -0
- package/skills/present-paper/scripts/extract_pdf_figures.py +243 -0
- package/skills/present-paper/scripts/inject_pronunciation_notes.py +178 -0
- package/skills/present-paper/scripts/inject_speaker_notes.py +133 -0
- package/skills/present-paper/scripts/strip_notes_for_sharing.py +140 -0
- package/skills/present-paper/scripts/trim_caption.py +271 -0
- package/skills/present-paper/skill.yml +41 -0
- package/skills/present-paper/templates/build_pptx_nature_lancet.py +688 -0
- package/skills/publish-skill/SKILL.md +370 -0
- package/skills/publish-skill/references/license-compatibility-matrix.md +132 -0
- package/skills/publish-skill/references/pii-patterns.md +130 -0
- package/skills/publish-skill/scripts/audit_skill.sh +278 -0
- package/skills/publish-skill/skill.yml +35 -0
- package/skills/render-pdf-doc/SKILL.md +146 -0
- package/skills/render-pdf-doc/references/known_pitfalls.md +53 -0
- package/skills/render-pdf-doc/references/pandoc_korean_cheatsheet.md +77 -0
- package/skills/render-pdf-doc/scripts/check_deps.sh +42 -0
- package/skills/render-pdf-doc/scripts/infer_colwidths.py +164 -0
- package/skills/render-pdf-doc/scripts/render_pdf.sh +98 -0
- package/skills/render-pdf-doc/skill.yml +57 -0
- package/skills/render-pdf-doc/templates/anchor-doc.md +27 -0
- package/skills/render-pdf-doc/templates/anchor-doc_ko.md +25 -0
- package/skills/render-pdf-doc/templates/briefing-handout.md +33 -0
- package/skills/render-pdf-doc/templates/briefing-handout_ko.md +31 -0
- package/skills/render-pdf-doc/templates/proposal-cover.md +33 -0
- package/skills/render-pdf-doc/templates/proposal-cover_ko.md +31 -0
- package/skills/render-pdf-doc/templates/reference-table.md +22 -0
- package/skills/render-pdf-doc/templates/reference-table_ko.md +20 -0
- package/skills/replicate-study/SKILL.md +150 -0
- package/skills/replicate-study/references/harmonization_3country.csv +47 -0
- package/skills/replicate-study/references/harmonization_knhanes_nhanes.csv +68 -0
- package/skills/replicate-study/references/methodology_extraction_template.md +134 -0
- package/skills/replicate-study/skill.yml +37 -0
- package/skills/review-paper/SKILL.md +104 -0
- package/skills/review-paper/references/macro_skeleton.md +6 -0
- package/skills/review-paper/skill.yml +25 -0
- package/skills/revise/SKILL.md +515 -0
- package/skills/revise/references/r2r_voice.md +346 -0
- package/skills/revise/skill.yml +43 -0
- package/skills/search-lit/SKILL.md +443 -0
- package/skills/search-lit/references/parse_pubmed.py +326 -0
- package/skills/search-lit/references/pubmed_eutils.sh +111 -0
- package/skills/search-lit/skill.yml +46 -0
- package/skills/self-review/SKILL.md +1045 -0
- package/skills/self-review/references/domain-probes/ai_overclaiming.md +47 -0
- package/skills/self-review/references/domain-probes/narrative_review.md +44 -0
- package/skills/self-review/references/domain-probes/observational_confounding.md +48 -0
- package/skills/self-review/references/domain-probes/radiomics.md +38 -0
- package/skills/self-review/references/domain-probes/sr_ma.md +87 -0
- package/skills/self-review/references/domain-probes/survival_prognostic.md +68 -0
- package/skills/self-review/references/exemplar_findings/README.md +43 -0
- package/skills/self-review/references/exemplar_findings/cohort_arithmetic_mismatch.md +35 -0
- package/skills/self-review/references/exemplar_findings/estimand_drift_posthoc_primary.md +39 -0
- package/skills/self-review/references/exemplar_findings/scope_overreach_cross_sectional.md +35 -0
- package/skills/self-review/references/exemplar_findings/unadjusted_confounder.md +36 -0
- package/skills/self-review/references/panel_review_template.md +177 -0
- package/skills/self-review/scripts/check_artifact_coverage.py +301 -0
- package/skills/self-review/scripts/check_claim_artifact.py +248 -0
- package/skills/self-review/scripts/check_classical_style.py +185 -0
- package/skills/self-review/scripts/check_cohort_arithmetic.py +481 -0
- package/skills/self-review/scripts/check_confounding_completeness.py +287 -0
- package/skills/self-review/scripts/check_panel_diversity.py +336 -0
- package/skills/self-review/scripts/check_reference_adequacy.py +392 -0
- package/skills/self-review/scripts/check_reviewer_team_consistency.py +412 -0
- package/skills/self-review/scripts/check_scope_coherence.py +177 -0
- package/skills/self-review/skill.yml +47 -0
- package/skills/self-review/tests/fixtures/claim_manuscript.md +17 -0
- package/skills/self-review/tests/fixtures/claim_prereg.md +6 -0
- package/skills/self-review/tests/fixtures/cohort_bad.md +21 -0
- package/skills/self-review/tests/fixtures/cohort_clean.md +21 -0
- package/skills/self-review/tests/fixtures/cohort_partition.csv +5 -0
- package/skills/self-review/tests/fixtures/coverage_analysis/31_delong_nested_added_value.csv +3 -0
- package/skills/self-review/tests/fixtures/coverage_analysis/table1_demographics.csv +3 -0
- package/skills/self-review/tests/fixtures/coverage_clean.md +13 -0
- package/skills/self-review/tests/fixtures/coverage_manuscript.md +11 -0
- package/skills/self-review/tests/fixtures/panel_collapse.json +27 -0
- package/skills/self-review/tests/fixtures/panel_good.json +32 -0
- package/skills/self-review/tests/fixtures/panel_monoculture.json +32 -0
- package/skills/self-review/tests/fixtures/refadeq_letter.md +13 -0
- package/skills/self-review/tests/fixtures/refadeq_original_fixed.md +42 -0
- package/skills/self-review/tests/fixtures/refadeq_original_uncited.md +40 -0
- package/skills/self-review/tests/fixtures/scope_bad.md +9 -0
- package/skills/self-review/tests/fixtures/scope_clean.md +8 -0
- package/skills/self-review/tests/fixtures/scope_surrogate.md +8 -0
- package/skills/self-review/tests/fixtures/style_bad.md +13 -0
- package/skills/self-review/tests/fixtures/style_clean.md +11 -0
- package/skills/self-review/tests/fixtures/table1_by_exposure.csv +11 -0
- package/skills/self-review/tests/test_artifact_coverage.sh +44 -0
- package/skills/self-review/tests/test_claim_artifact.sh +50 -0
- package/skills/self-review/tests/test_classical_style.sh +44 -0
- package/skills/self-review/tests/test_cohort_arithmetic.sh +49 -0
- package/skills/self-review/tests/test_confounding_completeness.sh +66 -0
- package/skills/self-review/tests/test_panel_diversity.sh +55 -0
- package/skills/self-review/tests/test_panel_mode.sh +69 -0
- package/skills/self-review/tests/test_reference_adequacy.sh +68 -0
- package/skills/self-review/tests/test_reviewer_team_consistency.sh +138 -0
- package/skills/self-review/tests/test_scope_coherence.sh +46 -0
- package/skills/setup-medsci/SKILL.md +110 -0
- package/skills/setup-medsci/references/setup-checklist.md +51 -0
- package/skills/setup-medsci/skill.yml +30 -0
- package/skills/sync-submission/SKILL.md +382 -0
- package/skills/sync-submission/scripts/author_registry_example.yaml +36 -0
- package/skills/sync-submission/scripts/blind_sweep.py +203 -0
- package/skills/sync-submission/scripts/check_asset_anonymization.py +300 -0
- package/skills/sync-submission/scripts/check_cross_artifact_stale.py +211 -0
- package/skills/sync-submission/scripts/cover_letter_drift_check.py +451 -0
- package/skills/sync-submission/scripts/cross_document_n_check.py +486 -0
- package/skills/sync-submission/scripts/detect_copy_divergence.py +136 -0
- package/skills/sync-submission/scripts/preflight_gate.py +458 -0
- package/skills/sync-submission/scripts/scope_drift_check.py +362 -0
- package/skills/sync-submission/scripts/sync_submission.py +169 -0
- package/skills/sync-submission/skill.yml +43 -0
- package/skills/sync-submission/tests/fixtures/copy_ok.md +5 -0
- package/skills/sync-submission/tests/fixtures/copy_stale.md +5 -0
- package/skills/sync-submission/tests/fixtures/ssot.md +5 -0
- package/skills/sync-submission/tests/test_asset_anonymization.sh +99 -0
- package/skills/sync-submission/tests/test_copy_divergence.sh +44 -0
- package/skills/sync-submission/tests/test_cross_artifact_stale.sh +80 -0
- package/skills/sync-submission/tests/test_cross_document_n.sh +132 -0
- package/skills/sync-submission/tests/test_preflight_gate.sh +112 -0
- package/skills/sync-submission/tests/test_scope_drift.sh +122 -0
- package/skills/sync-submission/tests/test_vN_docx_assertion.sh +51 -0
- package/skills/verify-refs/SKILL.md +177 -0
- package/skills/verify-refs/references/manual_checkpoint_guide.md +100 -0
- package/skills/verify-refs/scripts/verify_cli.sh +62 -0
- package/skills/verify-refs/scripts/verify_refs.py +782 -0
- package/skills/verify-refs/skill.yml +44 -0
- package/skills/verify-refs/tests/fixtures/pagination_placeholder.bib +17 -0
- package/skills/verify-refs/tests/test_pagination_placeholder.sh +42 -0
- package/skills/version-dataset/SKILL.md +143 -0
- package/skills/version-dataset/references/manifest_schema.md +72 -0
- package/skills/version-dataset/scripts/version_dataset.py +242 -0
- package/skills/version-dataset/skill.yml +35 -0
- package/skills/version-dataset/tests/test_version_dataset.sh +52 -0
- package/skills/write-paper/SKILL.md +1148 -0
- package/skills/write-paper/references/exemplar_methods/README.md +38 -0
- package/skills/write-paper/references/exemplar_methods/ai_validation_tripod_claim.md +47 -0
- package/skills/write-paper/references/exemplar_methods/diagnostic_accuracy_stard.md +50 -0
- package/skills/write-paper/references/exemplar_methods/observational_cohort_strobe.md +43 -0
- package/skills/write-paper/references/journal_profiles/AJNR.md +185 -0
- package/skills/write-paper/references/journal_profiles/AJR.md +149 -0
- package/skills/write-paper/references/journal_profiles/Abdominal_Radiology.md +139 -0
- package/skills/write-paper/references/journal_profiles/Academic_Radiology.md +90 -0
- package/skills/write-paper/references/journal_profiles/Annals_of_Internal_Medicine.md +150 -0
- package/skills/write-paper/references/journal_profiles/Artificial_Intelligence_in_Medicine.md +82 -0
- package/skills/write-paper/references/journal_profiles/British_Journal_of_Radiology.md +161 -0
- package/skills/write-paper/references/journal_profiles/CVIR.md +157 -0
- package/skills/write-paper/references/journal_profiles/Chest.md +270 -0
- package/skills/write-paper/references/journal_profiles/Clinical_Radiology.md +160 -0
- package/skills/write-paper/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +147 -0
- package/skills/write-paper/references/journal_profiles/Diabetes_Metabolism_Journal.md +163 -0
- package/skills/write-paper/references/journal_profiles/Diagnostic_and_Interventional_Radiology.md +216 -0
- package/skills/write-paper/references/journal_profiles/Endocrinology_and_Metabolism.md +167 -0
- package/skills/write-paper/references/journal_profiles/European_Journal_of_Preventive_Cardiology.md +192 -0
- package/skills/write-paper/references/journal_profiles/European_Radiology.md +159 -0
- package/skills/write-paper/references/journal_profiles/Hepatology_Communications.md +110 -0
- package/skills/write-paper/references/journal_profiles/Hepatology_International.md +106 -0
- package/skills/write-paper/references/journal_profiles/IEEE_TMI.md +180 -0
- package/skills/write-paper/references/journal_profiles/INSI.md +163 -0
- package/skills/write-paper/references/journal_profiles/Investigative_Radiology.md +86 -0
- package/skills/write-paper/references/journal_profiles/JACC_Advances.md +197 -0
- package/skills/write-paper/references/journal_profiles/JACC_Asia.md +168 -0
- package/skills/write-paper/references/journal_profiles/JACR.md +87 -0
- package/skills/write-paper/references/journal_profiles/JAMA.md +188 -0
- package/skills/write-paper/references/journal_profiles/JAMA_Network_Open.md +170 -0
- package/skills/write-paper/references/journal_profiles/JCSM.md +266 -0
- package/skills/write-paper/references/journal_profiles/JKMS.md +201 -0
- package/skills/write-paper/references/journal_profiles/JMIR.md +88 -0
- package/skills/write-paper/references/journal_profiles/JMIR_Medical_Education.md +86 -0
- package/skills/write-paper/references/journal_profiles/JNIS.md +227 -0
- package/skills/write-paper/references/journal_profiles/JVIR.md +158 -0
- package/skills/write-paper/references/journal_profiles/Journal_of_Clinical_Endocrinology_and_Metabolism.md +191 -0
- package/skills/write-paper/references/journal_profiles/Journal_of_Stroke.md +176 -0
- package/skills/write-paper/references/journal_profiles/KJR.md +185 -0
- package/skills/write-paper/references/journal_profiles/Korean_Circulation_Journal.md +184 -0
- package/skills/write-paper/references/journal_profiles/Korean_Journal_of_Internal_Medicine.md +178 -0
- package/skills/write-paper/references/journal_profiles/Lancet_Gastroenterology_and_Hepatology.md +127 -0
- package/skills/write-paper/references/journal_profiles/Liver_International.md +165 -0
- package/skills/write-paper/references/journal_profiles/Medical_Image_Analysis.md +147 -0
- package/skills/write-paper/references/journal_profiles/NEJM.md +147 -0
- package/skills/write-paper/references/journal_profiles/Nature_Medicine.md +181 -0
- package/skills/write-paper/references/journal_profiles/Neuroradiology.md +151 -0
- package/skills/write-paper/references/journal_profiles/Nutrition_Metabolism_and_Cardiovascular_Diseases.md +184 -0
- package/skills/write-paper/references/journal_profiles/PLOS_Medicine.md +166 -0
- package/skills/write-paper/references/journal_profiles/RYAI.md +124 -0
- package/skills/write-paper/references/journal_profiles/Radiology.md +173 -0
- package/skills/write-paper/references/journal_profiles/Skeletal_Radiology.md +135 -0
- package/skills/write-paper/references/journal_profiles/Stroke.md +210 -0
- package/skills/write-paper/references/journal_profiles/The_BMJ.md +121 -0
- package/skills/write-paper/references/journal_profiles/The_Lancet.md +112 -0
- package/skills/write-paper/references/journal_profiles/The_Lancet_Digital_Health.md +104 -0
- package/skills/write-paper/references/journal_profiles/World_Journal_of_Hepatology.md +106 -0
- package/skills/write-paper/references/journal_profiles/npj_Digital_Medicine.md +93 -0
- package/skills/write-paper/references/paper_types/ai_validation.md +270 -0
- package/skills/write-paper/references/paper_types/animal_study.md +194 -0
- package/skills/write-paper/references/paper_types/case_report.md +237 -0
- package/skills/write-paper/references/paper_types/cross_national.md +328 -0
- package/skills/write-paper/references/paper_types/letter.md +127 -0
- package/skills/write-paper/references/paper_types/meta_analysis.md +181 -0
- package/skills/write-paper/references/paper_types/nhis_cohort.md +297 -0
- package/skills/write-paper/references/paper_types/original_article.md +221 -0
- package/skills/write-paper/references/paper_types/technical_note.md +131 -0
- package/skills/write-paper/references/section_guides/discussion.md +155 -0
- package/skills/write-paper/references/section_guides/introduction.md +108 -0
- package/skills/write-paper/references/section_guides/methods.md +144 -0
- package/skills/write-paper/references/section_guides/results.md +113 -0
- package/skills/write-paper/references/section_guides/step7_1_classical_qc.md +67 -0
- package/skills/write-paper/references/section_guides/step7_4a_audit_recovery.md +74 -0
- package/skills/write-paper/references/section_guides/title_abstract.md +123 -0
- package/skills/write-paper/references/section_templates/methods_statistical.md +147 -0
- package/skills/write-paper/scripts/check_placeholders.py +182 -0
- package/skills/write-paper/skill.yml +48 -0
- package/skills/write-paper/tests/test_placeholders.sh +107 -0
- package/skills/write-protocol/SKILL.md +243 -0
- package/skills/write-protocol/references/ethics_checklist.md +150 -0
- package/skills/write-protocol/references/protocol_template.md +304 -0
- package/skills/write-protocol/skill.yml +34 -0
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Open-access full-text PDF batch retrieval.
|
|
4
|
+
|
|
5
|
+
Pipeline: Unpaywall → PMC (Europe PMC REST / OA FTP / web) →
|
|
6
|
+
OpenAlex → Crossref → landing-page scrape.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python fetch_oa.py dois.txt --output pdfs/ --email user@example.com
|
|
10
|
+
python fetch_oa.py dois.txt -o pdfs/ -e user@example.com --verbose
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import argparse
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
import re
|
|
18
|
+
import sys
|
|
19
|
+
import time
|
|
20
|
+
import urllib.error
|
|
21
|
+
import urllib.parse
|
|
22
|
+
import urllib.request
|
|
23
|
+
import xml.etree.ElementTree as ET
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
MIN_PDF_BYTES = 10 * 1024
|
|
27
|
+
USER_AGENT = "medsci-skills/1.0"
|
|
28
|
+
|
|
29
|
+
log = logging.getLogger("fetch_oa")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ============================================================
|
|
33
|
+
# Helpers
|
|
34
|
+
# ============================================================
|
|
35
|
+
|
|
36
|
+
def _ua(email: str) -> str:
|
|
37
|
+
"""Build a polite User-Agent string with contact email."""
|
|
38
|
+
return f"{USER_AGENT} (mailto:{email})"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def is_valid_pdf(data: bytes) -> bool:
|
|
42
|
+
return data.startswith(b"%PDF-") and len(data) >= MIN_PDF_BYTES
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def fetch_bytes(url: str, email: str, accept: str = "*/*",
|
|
46
|
+
timeout: int = 30) -> tuple[bytes, str, str]:
|
|
47
|
+
req = urllib.request.Request(url, headers={
|
|
48
|
+
"User-Agent": _ua(email),
|
|
49
|
+
"Accept": accept,
|
|
50
|
+
})
|
|
51
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
52
|
+
return resp.read(), resp.geturl(), resp.headers.get("Content-Type", "")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def save_pdf(data: bytes, path: Path) -> bool:
|
|
56
|
+
if not is_valid_pdf(data):
|
|
57
|
+
return False
|
|
58
|
+
path.write_bytes(data)
|
|
59
|
+
return True
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def existing_pdf_ok(path: Path) -> bool:
|
|
63
|
+
if not path.exists():
|
|
64
|
+
return False
|
|
65
|
+
try:
|
|
66
|
+
return is_valid_pdf(path.read_bytes())
|
|
67
|
+
except OSError:
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ============================================================
|
|
72
|
+
# 1. Unpaywall
|
|
73
|
+
# ============================================================
|
|
74
|
+
|
|
75
|
+
def unpaywall_lookup(doi: str, email: str) -> str | None:
|
|
76
|
+
url = f"https://api.unpaywall.org/v2/{urllib.parse.quote(doi, safe='/')}" \
|
|
77
|
+
f"?email={urllib.parse.quote(email)}"
|
|
78
|
+
try:
|
|
79
|
+
req = urllib.request.Request(url, headers={"User-Agent": _ua(email)})
|
|
80
|
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
81
|
+
data = json.loads(resp.read())
|
|
82
|
+
best = data.get("best_oa_location")
|
|
83
|
+
if best and best.get("url_for_pdf"):
|
|
84
|
+
return best["url_for_pdf"]
|
|
85
|
+
for loc in data.get("oa_locations", []):
|
|
86
|
+
if loc.get("url_for_pdf"):
|
|
87
|
+
return loc["url_for_pdf"]
|
|
88
|
+
if best and best.get("url"):
|
|
89
|
+
return best["url"]
|
|
90
|
+
except urllib.error.HTTPError as e:
|
|
91
|
+
if e.code == 422:
|
|
92
|
+
log.warning("Unpaywall rejected email '%s' (HTTP 422). "
|
|
93
|
+
"Use a real email address, not example.com.", email)
|
|
94
|
+
else:
|
|
95
|
+
log.debug("Unpaywall error for %s: %s", doi, e)
|
|
96
|
+
except (urllib.error.URLError, json.JSONDecodeError) as e:
|
|
97
|
+
log.debug("Unpaywall error for %s: %s", doi, e)
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ============================================================
|
|
102
|
+
# 2. PMC (3-method fallback, JS-challenge resistant)
|
|
103
|
+
# ============================================================
|
|
104
|
+
|
|
105
|
+
def id_to_pmcid(identifier: str, email: str) -> str | None:
|
|
106
|
+
"""Convert PMID or DOI to PMCID via NCBI ID converter."""
|
|
107
|
+
if not identifier:
|
|
108
|
+
return None
|
|
109
|
+
url = (f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/"
|
|
110
|
+
f"?ids={urllib.parse.quote(identifier, safe='/')}&format=json")
|
|
111
|
+
try:
|
|
112
|
+
req = urllib.request.Request(url, headers={"User-Agent": _ua(email)})
|
|
113
|
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
114
|
+
data = json.loads(resp.read())
|
|
115
|
+
records = data.get("records", [])
|
|
116
|
+
if records and records[0].get("pmcid"):
|
|
117
|
+
return records[0]["pmcid"]
|
|
118
|
+
except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError) as e:
|
|
119
|
+
log.debug("NCBI ID converter error for %s: %s", identifier, e)
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def download_pmc_pdf(pmcid: str, outpath: Path, email: str) -> bool:
|
|
124
|
+
"""Download PDF from PMC via Europe PMC → OA FTP → web fallback."""
|
|
125
|
+
|
|
126
|
+
# Method A: Europe PMC REST API (most reliable, no JS)
|
|
127
|
+
try:
|
|
128
|
+
url = (f"https://europepmc.org/backend/ptpmcrender.fcgi"
|
|
129
|
+
f"?accid={pmcid}&blobtype=pdf")
|
|
130
|
+
data, _, _ = fetch_bytes(url, email, accept="application/pdf,*/*", timeout=30)
|
|
131
|
+
if save_pdf(data, outpath):
|
|
132
|
+
log.debug("PMC Method A (Europe PMC) succeeded for %s", pmcid)
|
|
133
|
+
return True
|
|
134
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e:
|
|
135
|
+
log.debug("PMC Method A failed for %s: %s", pmcid, e)
|
|
136
|
+
|
|
137
|
+
# Method B: PMC OA FTP service (XML with direct PDF link)
|
|
138
|
+
try:
|
|
139
|
+
url = f"https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?id={pmcid}"
|
|
140
|
+
xml_data, _, _ = fetch_bytes(url, email, timeout=15)
|
|
141
|
+
root = ET.fromstring(xml_data)
|
|
142
|
+
# Check for error response (non-OA articles)
|
|
143
|
+
if root.find(".//error") is not None:
|
|
144
|
+
log.debug("PMC Method B: %s is not in OA subset", pmcid)
|
|
145
|
+
else:
|
|
146
|
+
for link in root.iter("link"):
|
|
147
|
+
href = link.get("href", "")
|
|
148
|
+
if href.endswith(".pdf"):
|
|
149
|
+
if href.startswith("ftp://"):
|
|
150
|
+
href = href.replace(
|
|
151
|
+
"ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/",
|
|
152
|
+
"https://ftp.ncbi.nlm.nih.gov/pub/pmc/", 1)
|
|
153
|
+
data, _, _ = fetch_bytes(
|
|
154
|
+
href, email, accept="application/pdf,*/*", timeout=30)
|
|
155
|
+
if save_pdf(data, outpath):
|
|
156
|
+
log.debug("PMC Method B (OA FTP) succeeded for %s", pmcid)
|
|
157
|
+
return True
|
|
158
|
+
except (urllib.error.URLError, urllib.error.HTTPError,
|
|
159
|
+
ET.ParseError, OSError) as e:
|
|
160
|
+
log.debug("PMC Method B failed for %s: %s", pmcid, e)
|
|
161
|
+
|
|
162
|
+
# Method C: Direct PMC web URL (may hit JS PoW challenge)
|
|
163
|
+
try:
|
|
164
|
+
url = f"https://www.ncbi.nlm.nih.gov/pmc/articles/{pmcid}/pdf/"
|
|
165
|
+
data, final_url, ct = fetch_bytes(
|
|
166
|
+
url, email, accept="application/pdf,*/*")
|
|
167
|
+
if "pdf" in ct.lower() or final_url.endswith(".pdf"):
|
|
168
|
+
if save_pdf(data, outpath):
|
|
169
|
+
log.debug("PMC Method C (web) succeeded for %s", pmcid)
|
|
170
|
+
return True
|
|
171
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e:
|
|
172
|
+
log.debug("PMC Method C failed for %s: %s", pmcid, e)
|
|
173
|
+
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ============================================================
|
|
178
|
+
# 3. OpenAlex + Crossref
|
|
179
|
+
# ============================================================
|
|
180
|
+
|
|
181
|
+
def openalex_lookup(doi: str, email: str) -> list[str]:
|
|
182
|
+
url = (f"https://api.openalex.org/works/"
|
|
183
|
+
f"https://doi.org/{urllib.parse.quote(doi, safe='/')}")
|
|
184
|
+
candidates = []
|
|
185
|
+
try:
|
|
186
|
+
req = urllib.request.Request(url, headers={"User-Agent": _ua(email)})
|
|
187
|
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
188
|
+
data = json.loads(resp.read())
|
|
189
|
+
oa = data.get("open_access", {}) or {}
|
|
190
|
+
primary = data.get("primary_location", {}) or {}
|
|
191
|
+
for v in [primary.get("pdf_url"), oa.get("oa_url"),
|
|
192
|
+
primary.get("landing_page_url")]:
|
|
193
|
+
if v and v not in candidates:
|
|
194
|
+
candidates.append(v)
|
|
195
|
+
except (urllib.error.URLError, urllib.error.HTTPError,
|
|
196
|
+
json.JSONDecodeError) as e:
|
|
197
|
+
log.debug("OpenAlex error for %s: %s", doi, e)
|
|
198
|
+
return candidates
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def crossref_lookup(doi: str, email: str) -> list[str]:
|
|
202
|
+
url = f"https://api.crossref.org/works/{urllib.parse.quote(doi, safe='/')}"
|
|
203
|
+
candidates = []
|
|
204
|
+
try:
|
|
205
|
+
req = urllib.request.Request(url, headers={"User-Agent": _ua(email)})
|
|
206
|
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
207
|
+
data = json.loads(resp.read())
|
|
208
|
+
msg = data.get("message", {}) or {}
|
|
209
|
+
for link in msg.get("link", []) or []:
|
|
210
|
+
v = link.get("URL")
|
|
211
|
+
if v and v not in candidates:
|
|
212
|
+
candidates.append(v)
|
|
213
|
+
primary = ((msg.get("resource") or {}).get("primary") or {}).get("URL")
|
|
214
|
+
if primary and primary not in candidates:
|
|
215
|
+
candidates.append(primary)
|
|
216
|
+
except (urllib.error.URLError, urllib.error.HTTPError,
|
|
217
|
+
json.JSONDecodeError) as e:
|
|
218
|
+
log.debug("Crossref error for %s: %s", doi, e)
|
|
219
|
+
return candidates
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# ============================================================
|
|
223
|
+
# 4. Landing page scraper
|
|
224
|
+
# ============================================================
|
|
225
|
+
|
|
226
|
+
def scrape_pdf_candidates(html: str) -> list[str]:
|
|
227
|
+
patterns = [
|
|
228
|
+
r'citation_pdf_url"\s+content="([^"]+)"',
|
|
229
|
+
r"name=\"citation_pdf_url\"\s+content=\"([^\"]+)\"",
|
|
230
|
+
r'href="([^"]+\.pdf[^"]*)"',
|
|
231
|
+
]
|
|
232
|
+
found = []
|
|
233
|
+
for pat in patterns:
|
|
234
|
+
for m in re.findall(pat, html, flags=re.IGNORECASE):
|
|
235
|
+
if m not in found:
|
|
236
|
+
found.append(m)
|
|
237
|
+
return found
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def download_from_landing(url: str, outpath: Path, email: str) -> bool:
|
|
241
|
+
try:
|
|
242
|
+
raw, final_url, ct = fetch_bytes(url, email, accept="text/html,*/*")
|
|
243
|
+
if "pdf" in ct.lower():
|
|
244
|
+
return save_pdf(raw, outpath)
|
|
245
|
+
html = raw.decode("utf-8", errors="ignore")
|
|
246
|
+
for candidate in scrape_pdf_candidates(html):
|
|
247
|
+
absolute = urllib.parse.urljoin(final_url, candidate)
|
|
248
|
+
try:
|
|
249
|
+
data, _, _ = fetch_bytes(
|
|
250
|
+
absolute, email, accept="application/pdf,*/*")
|
|
251
|
+
if save_pdf(data, outpath):
|
|
252
|
+
return True
|
|
253
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError):
|
|
254
|
+
continue
|
|
255
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e:
|
|
256
|
+
log.debug("Landing page error for %s: %s", url, e)
|
|
257
|
+
return False
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def download_pdf(url: str, outpath: Path, email: str) -> bool:
|
|
261
|
+
try:
|
|
262
|
+
data, _, _ = fetch_bytes(url, email, accept="application/pdf,*/*")
|
|
263
|
+
return save_pdf(data, outpath)
|
|
264
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e:
|
|
265
|
+
log.debug("Direct download error for %s: %s", url, e)
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# ============================================================
|
|
270
|
+
# 5. Main pipeline
|
|
271
|
+
# ============================================================
|
|
272
|
+
|
|
273
|
+
def gather_candidates(doi: str, email: str) -> list[str]:
|
|
274
|
+
"""Collect OA PDF candidate URLs from multiple sources."""
|
|
275
|
+
urls: list[str] = []
|
|
276
|
+
|
|
277
|
+
def add(v: str | None):
|
|
278
|
+
if v and v not in urls:
|
|
279
|
+
urls.append(v)
|
|
280
|
+
|
|
281
|
+
add(unpaywall_lookup(doi, email))
|
|
282
|
+
for v in openalex_lookup(doi, email):
|
|
283
|
+
add(v)
|
|
284
|
+
for v in crossref_lookup(doi, email):
|
|
285
|
+
add(v)
|
|
286
|
+
add(f"https://doi.org/{doi}")
|
|
287
|
+
return urls
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def process_doi(doi: str, outdir: Path, email: str,
|
|
291
|
+
pmid: str = "") -> str:
|
|
292
|
+
"""Try to download a PDF for one DOI. Returns status string."""
|
|
293
|
+
safe_name = re.sub(r"[^\w\-.]", "_", doi)
|
|
294
|
+
outpath = outdir / f"{safe_name}.pdf"
|
|
295
|
+
|
|
296
|
+
if existing_pdf_ok(outpath):
|
|
297
|
+
return "skip"
|
|
298
|
+
|
|
299
|
+
# Remove stale stub
|
|
300
|
+
if outpath.exists():
|
|
301
|
+
outpath.unlink(missing_ok=True)
|
|
302
|
+
|
|
303
|
+
# Step 1: Unpaywall direct PDF URL (fastest path)
|
|
304
|
+
uw_url = unpaywall_lookup(doi, email)
|
|
305
|
+
if uw_url and ".pdf" in uw_url.lower():
|
|
306
|
+
if download_pdf(uw_url, outpath, email):
|
|
307
|
+
return "oa"
|
|
308
|
+
time.sleep(0.3)
|
|
309
|
+
|
|
310
|
+
# Step 2: PMC (try before slow landing-page scraping)
|
|
311
|
+
pmcid = id_to_pmcid(pmid, email) if pmid else None
|
|
312
|
+
if not pmcid:
|
|
313
|
+
pmcid = id_to_pmcid(doi, email)
|
|
314
|
+
if pmcid and download_pmc_pdf(pmcid, outpath, email):
|
|
315
|
+
return "pmc"
|
|
316
|
+
|
|
317
|
+
# Step 3: OA candidates from OpenAlex, Crossref, landing pages
|
|
318
|
+
candidates: list[str] = []
|
|
319
|
+
if uw_url and uw_url not in candidates:
|
|
320
|
+
candidates.append(uw_url)
|
|
321
|
+
for v in openalex_lookup(doi, email):
|
|
322
|
+
if v not in candidates:
|
|
323
|
+
candidates.append(v)
|
|
324
|
+
for v in crossref_lookup(doi, email):
|
|
325
|
+
if v not in candidates:
|
|
326
|
+
candidates.append(v)
|
|
327
|
+
candidates.append(f"https://doi.org/{doi}")
|
|
328
|
+
|
|
329
|
+
for url in candidates:
|
|
330
|
+
if ".pdf" in url.lower():
|
|
331
|
+
ok = download_pdf(url, outpath, email)
|
|
332
|
+
else:
|
|
333
|
+
ok = download_from_landing(url, outpath, email)
|
|
334
|
+
if ok:
|
|
335
|
+
return "oa"
|
|
336
|
+
time.sleep(0.3)
|
|
337
|
+
|
|
338
|
+
return "fail"
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def read_doi_file(path: Path) -> list[dict]:
|
|
342
|
+
"""Read DOI list. Supports plain DOIs or TSV with DOI/PMID columns."""
|
|
343
|
+
records = []
|
|
344
|
+
with open(path, encoding="utf-8") as f:
|
|
345
|
+
first_line = f.readline().strip()
|
|
346
|
+
f.seek(0)
|
|
347
|
+
|
|
348
|
+
# TSV with header containing DOI column
|
|
349
|
+
if "\t" in first_line and "doi" in first_line.lower():
|
|
350
|
+
import csv
|
|
351
|
+
reader = csv.DictReader(f, delimiter="\t")
|
|
352
|
+
for row in reader:
|
|
353
|
+
doi = ""
|
|
354
|
+
pmid = ""
|
|
355
|
+
for k, v in row.items():
|
|
356
|
+
if k.lower().strip() == "doi":
|
|
357
|
+
doi = (v or "").strip()
|
|
358
|
+
elif k.lower().strip() == "pmid":
|
|
359
|
+
pmid = (v or "").strip()
|
|
360
|
+
if doi:
|
|
361
|
+
records.append({"doi": doi, "pmid": pmid})
|
|
362
|
+
else:
|
|
363
|
+
# Plain text: one DOI per line
|
|
364
|
+
for line in f:
|
|
365
|
+
line = line.strip()
|
|
366
|
+
if line and not line.startswith("#"):
|
|
367
|
+
records.append({"doi": line, "pmid": ""})
|
|
368
|
+
return records
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def main():
|
|
372
|
+
parser = argparse.ArgumentParser(
|
|
373
|
+
description="Batch download open-access PDFs by DOI.")
|
|
374
|
+
parser.add_argument("input", type=Path,
|
|
375
|
+
help="File with DOIs (one per line, or TSV with DOI column)")
|
|
376
|
+
parser.add_argument("-o", "--output", type=Path, default=Path("pdfs"),
|
|
377
|
+
help="Output directory (default: pdfs/)")
|
|
378
|
+
parser.add_argument("-e", "--email", required=True,
|
|
379
|
+
help="Contact email (required by Unpaywall TOS)")
|
|
380
|
+
parser.add_argument("-v", "--verbose", action="store_true",
|
|
381
|
+
help="Show debug messages")
|
|
382
|
+
args = parser.parse_args()
|
|
383
|
+
|
|
384
|
+
logging.basicConfig(
|
|
385
|
+
level=logging.DEBUG if args.verbose else logging.WARNING,
|
|
386
|
+
format="%(levelname)s: %(message)s",
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
args.output.mkdir(parents=True, exist_ok=True)
|
|
390
|
+
records = read_doi_file(args.input)
|
|
391
|
+
print(f"Loaded {len(records)} DOIs from {args.input}")
|
|
392
|
+
|
|
393
|
+
stats = {"oa": 0, "pmc": 0, "fail": 0, "skip": 0}
|
|
394
|
+
|
|
395
|
+
for i, rec in enumerate(records, 1):
|
|
396
|
+
doi = rec["doi"]
|
|
397
|
+
pmid = rec.get("pmid", "")
|
|
398
|
+
print(f" [{i}/{len(records)}] {doi}", end=" … ", flush=True)
|
|
399
|
+
|
|
400
|
+
status = process_doi(doi, args.output, args.email, pmid)
|
|
401
|
+
stats[status] += 1
|
|
402
|
+
|
|
403
|
+
labels = {"oa": "OK (OA)", "pmc": "OK (PMC)",
|
|
404
|
+
"fail": "FAIL", "skip": "SKIP"}
|
|
405
|
+
print(labels[status])
|
|
406
|
+
time.sleep(0.5)
|
|
407
|
+
|
|
408
|
+
print(f"\n--- Summary ---")
|
|
409
|
+
print(f" OA: {stats['oa']}")
|
|
410
|
+
print(f" PMC: {stats['pmc']}")
|
|
411
|
+
print(f" Failed: {stats['fail']}")
|
|
412
|
+
print(f" Skipped: {stats['skip']}")
|
|
413
|
+
total = stats["oa"] + stats["pmc"] + stats["fail"]
|
|
414
|
+
if total > 0:
|
|
415
|
+
pct = (stats["oa"] + stats["pmc"]) / total * 100
|
|
416
|
+
print(f" Success: {pct:.0f}%")
|
|
417
|
+
|
|
418
|
+
# Write failed DOIs for manual retrieval
|
|
419
|
+
if stats["fail"] > 0:
|
|
420
|
+
fail_path = args.output / "manual_needed.txt"
|
|
421
|
+
with open(fail_path, "w") as f:
|
|
422
|
+
f.write("# DOIs needing manual retrieval\n")
|
|
423
|
+
f.write("# Options: institutional access, ILL\n\n")
|
|
424
|
+
for rec in records:
|
|
425
|
+
safe = re.sub(r"[^\w\-.]", "_", rec["doi"])
|
|
426
|
+
pdf = args.output / f"{safe}.pdf"
|
|
427
|
+
if not existing_pdf_ok(pdf):
|
|
428
|
+
f.write(f"{rec['doi']}\n")
|
|
429
|
+
print(f" Manual list: {fail_path}")
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
if __name__ == "__main__":
|
|
433
|
+
main()
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Convert research paper PDFs to LLM-friendly Markdown.
|
|
4
|
+
|
|
5
|
+
Uses pymupdf4llm for high-quality extraction optimized for academic papers:
|
|
6
|
+
two-column layout handling, table preservation, header/footer removal.
|
|
7
|
+
|
|
8
|
+
Requires: pip install pymupdf4llm
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python pdf_to_md.py pdfs/ # convert all PDFs in directory
|
|
12
|
+
python pdf_to_md.py paper.pdf # convert single file
|
|
13
|
+
python pdf_to_md.py pdfs/ -o markdown/ # custom output directory
|
|
14
|
+
python pdf_to_md.py pdfs/ --pages 0-9 # first 10 pages only
|
|
15
|
+
python pdf_to_md.py pdfs/ --force # overwrite existing .md files
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import re
|
|
20
|
+
import sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import pymupdf4llm
|
|
25
|
+
except ImportError:
|
|
26
|
+
print("Error: pymupdf4llm is not installed.", file=sys.stderr)
|
|
27
|
+
print("Install with: pip install pymupdf4llm", file=sys.stderr)
|
|
28
|
+
sys.exit(1)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def parse_page_range(spec: str) -> list[int]:
|
|
32
|
+
"""Parse page range string like '0-9' or '0,2,5-7' into list of ints."""
|
|
33
|
+
pages = []
|
|
34
|
+
for part in spec.split(","):
|
|
35
|
+
part = part.strip()
|
|
36
|
+
if "-" in part:
|
|
37
|
+
start, end = part.split("-", 1)
|
|
38
|
+
pages.extend(range(int(start), int(end) + 1))
|
|
39
|
+
else:
|
|
40
|
+
pages.append(int(part))
|
|
41
|
+
return pages
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def clean_markdown(text: str) -> str:
|
|
45
|
+
"""Post-process pymupdf4llm output for cleaner LLM consumption."""
|
|
46
|
+
# Collapse excessive blank lines (3+ → 2)
|
|
47
|
+
text = re.sub(r"\n{4,}", "\n\n\n", text)
|
|
48
|
+
# Strip trailing whitespace per line
|
|
49
|
+
text = "\n".join(line.rstrip() for line in text.splitlines())
|
|
50
|
+
return text.strip() + "\n"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def convert_pdf(pdf_path: Path, out_dir: Path, *,
|
|
54
|
+
pages: list[int] | None = None,
|
|
55
|
+
force: bool = False,
|
|
56
|
+
verbose: bool = False) -> bool:
|
|
57
|
+
"""Convert a single PDF to Markdown. Returns True on success."""
|
|
58
|
+
md_path = out_dir / pdf_path.with_suffix(".md").name
|
|
59
|
+
|
|
60
|
+
if md_path.exists() and md_path.stat().st_size > 0 and not force:
|
|
61
|
+
if verbose:
|
|
62
|
+
print(f" SKIP: {md_path.name} (exists, use --force to overwrite)")
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
kwargs = {
|
|
67
|
+
"show_progress": False,
|
|
68
|
+
# Academic paper defaults: skip images (saves tokens),
|
|
69
|
+
# strict table detection for grid-line tables
|
|
70
|
+
"write_images": False,
|
|
71
|
+
"ignore_images": True,
|
|
72
|
+
"table_strategy": "lines_strict",
|
|
73
|
+
}
|
|
74
|
+
if pages is not None:
|
|
75
|
+
kwargs["pages"] = pages
|
|
76
|
+
|
|
77
|
+
# Suppress pymupdf's C-level OCR/parser messages (stdout + stderr)
|
|
78
|
+
if not verbose:
|
|
79
|
+
import os as _os
|
|
80
|
+
_devnull = _os.open(_os.devnull, _os.O_WRONLY)
|
|
81
|
+
_old_stdout = _os.dup(1)
|
|
82
|
+
_old_stderr = _os.dup(2)
|
|
83
|
+
_os.dup2(_devnull, 1)
|
|
84
|
+
_os.dup2(_devnull, 2)
|
|
85
|
+
try:
|
|
86
|
+
md_text = pymupdf4llm.to_markdown(str(pdf_path), **kwargs)
|
|
87
|
+
finally:
|
|
88
|
+
if not verbose:
|
|
89
|
+
_os.dup2(_old_stdout, 1)
|
|
90
|
+
_os.dup2(_old_stderr, 2)
|
|
91
|
+
_os.close(_devnull)
|
|
92
|
+
_os.close(_old_stdout)
|
|
93
|
+
_os.close(_old_stderr)
|
|
94
|
+
md_text = clean_markdown(md_text)
|
|
95
|
+
|
|
96
|
+
md_path.write_text(md_text, encoding="utf-8")
|
|
97
|
+
if verbose:
|
|
98
|
+
kb = len(md_text.encode("utf-8")) / 1024
|
|
99
|
+
print(f" OK: {md_path.name} ({kb:.1f} KB)")
|
|
100
|
+
return True
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f" FAIL: {pdf_path.name}: {e}", file=sys.stderr)
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def main():
|
|
107
|
+
parser = argparse.ArgumentParser(
|
|
108
|
+
description="Convert research PDFs to LLM-friendly Markdown "
|
|
109
|
+
"(via pymupdf4llm).")
|
|
110
|
+
parser.add_argument("input", type=Path,
|
|
111
|
+
help="PDF file or directory containing PDFs")
|
|
112
|
+
parser.add_argument("-o", "--output", type=Path, default=None,
|
|
113
|
+
help="Output directory (default: same as input)")
|
|
114
|
+
parser.add_argument("--pages", type=str, default=None,
|
|
115
|
+
help="Page range, e.g. '0-9' for first 10 pages")
|
|
116
|
+
parser.add_argument("--force", action="store_true",
|
|
117
|
+
help="Overwrite existing .md files")
|
|
118
|
+
parser.add_argument("-v", "--verbose", action="store_true",
|
|
119
|
+
help="Show per-file progress")
|
|
120
|
+
args = parser.parse_args()
|
|
121
|
+
|
|
122
|
+
# Resolve input
|
|
123
|
+
if args.input.is_file():
|
|
124
|
+
pdfs = [args.input]
|
|
125
|
+
default_out = args.input.parent
|
|
126
|
+
elif args.input.is_dir():
|
|
127
|
+
pdfs = sorted(args.input.glob("*.pdf"))
|
|
128
|
+
default_out = args.input
|
|
129
|
+
else:
|
|
130
|
+
print(f"Error: {args.input} not found", file=sys.stderr)
|
|
131
|
+
sys.exit(1)
|
|
132
|
+
|
|
133
|
+
out_dir = args.output or default_out
|
|
134
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
135
|
+
|
|
136
|
+
if not pdfs:
|
|
137
|
+
print("No PDF files found.")
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
# Parse pages
|
|
141
|
+
pages = parse_page_range(args.pages) if args.pages else None
|
|
142
|
+
|
|
143
|
+
print(f"Converting {len(pdfs)} PDF(s) → Markdown", flush=True)
|
|
144
|
+
ok = 0
|
|
145
|
+
fail = 0
|
|
146
|
+
for pdf in pdfs:
|
|
147
|
+
if convert_pdf(pdf, out_dir, pages=pages, force=args.force,
|
|
148
|
+
verbose=args.verbose):
|
|
149
|
+
ok += 1
|
|
150
|
+
else:
|
|
151
|
+
fail += 1
|
|
152
|
+
|
|
153
|
+
print(f"\n--- Summary ---")
|
|
154
|
+
print(f" Converted: {ok}")
|
|
155
|
+
print(f" Failed: {fail}")
|
|
156
|
+
print(f" Total: {len(pdfs)}")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
if __name__ == "__main__":
|
|
160
|
+
main()
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
schema_version: 2
|
|
2
|
+
name: fulltext-retrieval
|
|
3
|
+
layer: A
|
|
4
|
+
owner_domain: literature_discovery
|
|
5
|
+
|
|
6
|
+
when_to_use: "Batch-download open-access full-text PDFs from a DOI list; optionally convert them to Markdown for token-efficient analysis."
|
|
7
|
+
when_NOT_to_use: "Finding or verifying citations (use search-lit / verify-refs). Retrieving paywalled or non-open-access content."
|
|
8
|
+
|
|
9
|
+
inputs:
|
|
10
|
+
- path: "DOI list (.txt, one per line, or .tsv with a DOI column)"
|
|
11
|
+
schema: csv
|
|
12
|
+
required: true
|
|
13
|
+
outputs:
|
|
14
|
+
- path: "downloaded open-access PDFs (pdfs/)"
|
|
15
|
+
- path: "optional PDF-to-Markdown conversions"
|
|
16
|
+
|
|
17
|
+
deterministic_scripts:
|
|
18
|
+
- fetch_oa.py
|
|
19
|
+
- pdf_to_md.py
|
|
20
|
+
side_effects:
|
|
21
|
+
- downloads_files
|
|
22
|
+
- network_access_oa_apis
|
|
23
|
+
downstream_consumers:
|
|
24
|
+
- meta-analysis
|
|
25
|
+
- obsidian-paper-vault
|
|
26
|
+
forbidden_actions:
|
|
27
|
+
- download_paywalled_content
|
|
28
|
+
- bypass_publisher_access_controls
|
|
29
|
+
|
|
30
|
+
# v2.1 quality card
|
|
31
|
+
purpose: "Resolve a DOI list to open-access full-text PDFs via legitimate OA APIs, with optional Markdown conversion."
|
|
32
|
+
safety_boundaries:
|
|
33
|
+
- "Uses legitimate open-access sources only (Unpaywall, PMC / Europe PMC, OpenAlex, Crossref); never circumvents paywalls or access controls."
|
|
34
|
+
- "Validates each download (>=10 KB and a %PDF- header) before accepting it."
|
|
35
|
+
known_limitations:
|
|
36
|
+
- "Only open-access content is retrievable; non-OA DOIs fail by design rather than fetching from unauthorized sources."
|
|
37
|
+
- "PDF-to-Markdown conversion requires the optional pymupdf4llm dependency (AGPL-3.0 or commercial license)."
|
|
38
|
+
validation_commands:
|
|
39
|
+
- "python fetch_oa.py dois.txt -o pdfs/ -e <email> --verbose # per-DOI source trace"
|
|
40
|
+
- "verify each output begins with %PDF- and is at least 10 KB"
|
|
41
|
+
evidence_surface: bundled_script
|