medsci-skills 4.9.0 → 4.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -22
- package/installers/install-macos.command +11 -2
- package/installers/install-windows.cmd +6 -2
- package/installers/install-windows.ps1 +5 -2
- package/installers/install.py +13 -0
- package/installers/update.py +19 -0
- package/metadata/distribution_files.json +454 -54
- package/metadata/distribution_manifest.json +7 -1
- package/metadata/skills_catalog.json +61 -1
- package/package.json +1 -1
- package/skills/analyze-stats/SKILL.md +40 -0
- package/skills/analyze-stats/references/analysis_guides/mendelian_randomization.md +89 -0
- package/skills/analyze-stats/references/analysis_guides/multiplicity.md +103 -0
- package/skills/analyze-stats/references/analysis_guides/network_meta_analysis.md +74 -0
- package/skills/analyze-stats/references/analysis_guides/polygenic_risk_score.md +80 -0
- package/skills/architecture-zoo/SKILL.md +96 -0
- package/skills/architecture-zoo/references/classification.md +102 -0
- package/skills/architecture-zoo/references/foundation_models.md +87 -0
- package/skills/architecture-zoo/references/index.md +55 -0
- package/skills/architecture-zoo/references/segmentation.md +110 -0
- package/skills/architecture-zoo/skill.yml +36 -0
- package/skills/check-reporting/SKILL.md +6 -2
- package/skills/check-reporting/references/appraisal_tools/METRICS_RELOADED.md +36 -0
- package/skills/check-reporting/references/checklists/PGS_RS.md +65 -0
- package/skills/check-reporting/references/checklists/STROBE_MR.md +68 -0
- package/skills/check-reporting/scripts/check_checklist_exists.py +3 -0
- package/skills/clean-data/SKILL.md +3 -1
- package/skills/clean-data/references/cleaning_patterns.md +6 -0
- package/skills/clean-data/references/implausible_value_rules.md +109 -0
- package/skills/find-journal/POLICY.md +9 -0
- package/skills/find-journal/SKILL.md +147 -13
- package/skills/find-journal/references/acceptance_signals_schema.md +126 -0
- package/skills/find-journal/references/journal_profiles/AJR.md +8 -0
- package/skills/find-journal/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +2 -2
- package/skills/find-journal/references/journal_profiles/European_Radiology.md +8 -0
- package/skills/find-journal/references/journal_profiles/Investigative_Radiology.md +8 -0
- package/skills/find-journal/references/journal_profiles/KJR.md +8 -0
- package/skills/find-journal/references/journal_profiles/RYAI.md +8 -0
- package/skills/find-journal/scripts/acceptance_readiness_challenge/expected/report_ceiling.txt +18 -0
- package/skills/find-journal/scripts/acceptance_readiness_challenge/expected/report_clean.txt +10 -0
- package/skills/find-journal/scripts/acceptance_readiness_challenge/fixture_ceiling/manuscript.md +16 -0
- package/skills/find-journal/scripts/acceptance_readiness_challenge/fixture_clean/manuscript.md +15 -0
- package/skills/find-journal/scripts/acceptance_readiness_challenge/problem.md +38 -0
- package/skills/find-journal/scripts/acceptance_readiness_challenge/verify.sh +28 -0
- package/skills/find-journal/scripts/assess_acceptance_readiness.py +235 -0
- package/skills/find-journal/skill.yml +5 -1
- package/skills/make-figures/references/critic_rubrics/data_plot.md +8 -1
- package/skills/make-figures/references/exemplar_plots/README.md +5 -0
- package/skills/make-figures/references/exemplar_plots/manhattan_plot.md +63 -0
- package/skills/make-figures/references/reporting_guideline_figure_map.md +2 -2
- package/skills/mllm-eval/SKILL.md +108 -0
- package/skills/mllm-eval/scripts/check_mllm_eval_completeness.py +184 -0
- package/skills/mllm-eval/scripts/mllm_eval_completeness_challenge/fixture/plan_bad.md +3 -0
- package/skills/mllm-eval/scripts/mllm_eval_completeness_challenge/fixture/plan_good.md +9 -0
- package/skills/mllm-eval/scripts/mllm_eval_completeness_challenge/problem.md +29 -0
- package/skills/mllm-eval/scripts/mllm_eval_completeness_challenge/verify.sh +25 -0
- package/skills/mllm-eval/skill.yml +44 -0
- package/skills/mllm-eval/tests/test_mllm_eval_completeness.sh +49 -0
- package/skills/model-card/SKILL.md +103 -0
- package/skills/model-card/references/datasheet_template.md +38 -0
- package/skills/model-card/references/metric_dimensions.md +32 -0
- package/skills/model-card/references/model_card_template.md +62 -0
- package/skills/model-card/scripts/check_model_card_complete.py +202 -0
- package/skills/model-card/scripts/check_model_card_complete_challenge/fixture/complete/DATASHEET.md +31 -0
- package/skills/model-card/scripts/check_model_card_complete_challenge/fixture/complete/MODEL_CARD.md +48 -0
- package/skills/model-card/scripts/check_model_card_complete_challenge/fixture/incomplete/MODEL_CARD.md +28 -0
- package/skills/model-card/scripts/check_model_card_complete_challenge/problem.md +31 -0
- package/skills/model-card/scripts/check_model_card_complete_challenge/verify.sh +34 -0
- package/skills/model-card/skill.yml +41 -0
- package/skills/model-card/tests/test_model_card_complete.sh +75 -0
- package/skills/model-evaluation/SKILL.md +88 -0
- package/skills/model-evaluation/references/metric_guide.md +44 -0
- package/skills/model-evaluation/scripts/check_metric_reporting.py +196 -0
- package/skills/model-evaluation/scripts/metric_reporting_challenge/fixture/clf_bad.md +2 -0
- package/skills/model-evaluation/scripts/metric_reporting_challenge/fixture/clf_good.md +3 -0
- package/skills/model-evaluation/scripts/metric_reporting_challenge/fixture/seg_bad.md +2 -0
- package/skills/model-evaluation/scripts/metric_reporting_challenge/fixture/seg_good.md +3 -0
- package/skills/model-evaluation/scripts/metric_reporting_challenge/problem.md +27 -0
- package/skills/model-evaluation/scripts/metric_reporting_challenge/verify.sh +29 -0
- package/skills/model-evaluation/skill.yml +43 -0
- package/skills/model-evaluation/tests/test_metric_reporting.sh +47 -0
- package/skills/model-scaffold/SKILL.md +120 -0
- package/skills/model-scaffold/references/training_guide.md +46 -0
- package/skills/model-scaffold/scripts/check_training_hygiene.py +254 -0
- package/skills/model-scaffold/scripts/scaffold.py +495 -0
- package/skills/model-scaffold/scripts/scaffold_challenge/expected/split_assignment.csv +13 -0
- package/skills/model-scaffold/scripts/scaffold_challenge/fixture/manifest.csv +19 -0
- package/skills/model-scaffold/scripts/scaffold_challenge/problem.md +44 -0
- package/skills/model-scaffold/scripts/scaffold_challenge/verify.sh +89 -0
- package/skills/model-scaffold/skill.yml +46 -0
- package/skills/model-scaffold/tests/fixtures/bad_evaluate.py +10 -0
- package/skills/model-scaffold/tests/fixtures/bad_train.py +9 -0
- package/skills/model-scaffold/tests/test_training_hygiene.sh +70 -0
- package/skills/model-validation/SKILL.md +146 -0
- package/skills/model-validation/scripts/check_split_leakage.py +269 -0
- package/skills/model-validation/scripts/check_split_leakage_challenge/expected/clean.txt +9 -0
- package/skills/model-validation/scripts/check_split_leakage_challenge/expected/leak.txt +9 -0
- package/skills/model-validation/scripts/check_split_leakage_challenge/fixture/split_seed.txt +1 -0
- package/skills/model-validation/scripts/check_split_leakage_challenge/fixture/splits_clean.csv +14 -0
- package/skills/model-validation/scripts/check_split_leakage_challenge/fixture/splits_leak.csv +15 -0
- package/skills/model-validation/scripts/check_split_leakage_challenge/problem.md +39 -0
- package/skills/model-validation/scripts/check_split_leakage_challenge/verify.sh +37 -0
- package/skills/model-validation/skill.yml +43 -0
- package/skills/model-validation/tests/fixtures/leak_subject.csv +6 -0
- package/skills/model-validation/tests/fixtures/noseed_clean.csv +5 -0
- package/skills/model-validation/tests/fixtures/single_partition.csv +4 -0
- package/skills/model-validation/tests/test_split_leakage.sh +78 -0
- package/skills/orchestrate/SKILL.md +1 -1
- package/skills/peer-review/SKILL.md +30 -0
- package/skills/peer-review/references/domain-probes/mendelian_randomization.md +62 -0
- package/skills/peer-review/references/domain-probes/mllm_evaluation.md +102 -0
- package/skills/peer-review/references/domain-probes/model_development.md +52 -0
- package/skills/peer-review/references/domain-probes/network_meta_analysis.md +55 -0
- package/skills/peer-review/references/domain-probes/observational_confounding.md +14 -2
- package/skills/peer-review/references/domain-probes/polygenic_risk_score.md +54 -0
- package/skills/self-review/SKILL.md +5 -0
- package/skills/self-review/references/domain-probes/mendelian_randomization.md +62 -0
- package/skills/self-review/references/domain-probes/mllm_evaluation.md +102 -0
- package/skills/self-review/references/domain-probes/model_development.md +52 -0
- package/skills/self-review/references/domain-probes/network_meta_analysis.md +55 -0
- package/skills/self-review/references/domain-probes/observational_confounding.md +14 -2
- package/skills/self-review/references/domain-probes/polygenic_risk_score.md +54 -0
- package/skills/write-paper/references/journal_profiles/Clinical_and_Molecular_Hepatology.md +2 -1
package/README.md
CHANGED
|
@@ -2,14 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
# MedSci Skills
|
|
4
4
|
|
|
5
|
-
**
|
|
5
|
+
**51 skills that actually work.** Built by a physician-researcher, tested on real publications.
|
|
6
6
|
|
|
7
|
-
*MedSci Skills is a submission-grade clinical manuscript workflow, not a generic biomedical skill catalog. Its moat is the compliance layer —
|
|
7
|
+
*MedSci Skills is a submission-grade clinical manuscript workflow, not a generic biomedical skill catalog. Its moat is the compliance layer — 38 reporting guidelines and risk-of-bias tools, reference/citation verification, and deterministic integrity gates, before peer review sees the manuscript. It competes on clinical submission reliability, not skill count.*
|
|
8
8
|
|
|
9
9
|
[](LICENSE)
|
|
10
10
|
[](https://github.com/Aperivue/medsci-skills/releases/latest)
|
|
11
11
|
[](https://github.com/Aperivue/medsci-skills/actions/workflows/validate.yml)
|
|
12
|
-

|
|
13
13
|
[](https://www.npmjs.com/package/medsci-skills)
|
|
14
14
|
[](https://youtu.be/MclQ_RIofpE)
|
|
15
15
|
[](https://github.com/Aperivue/medsci-skills/contribute)
|
|
@@ -64,6 +64,12 @@ every output requires human-expert verification. New here? See the
|
|
|
64
64
|
npx medsci-skills install # copies every skill into your agent's folder
|
|
65
65
|
```
|
|
66
66
|
|
|
67
|
+
**Recommended (especially for clinicians):** add `--enable-update-notify` so Claude Code shows a one-line *"update available"* notice when a new version ships — otherwise you stay on the version you installed and are never told. (No terminal at all? The classroom installer below turns this on for you.)
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
npx medsci-skills install --enable-update-notify # install + in-app update reminders
|
|
71
|
+
```
|
|
72
|
+
|
|
67
73
|
**Have git?** Install every skill in three commands:
|
|
68
74
|
|
|
69
75
|
```bash
|
|
@@ -268,48 +274,54 @@ The E2E pipeline (`orchestrate --e2e`) produces everything up to `qc/`. The `sub
|
|
|
268
274
|
|
|
269
275
|
## What's New
|
|
270
276
|
|
|
271
|
-
**v4.
|
|
277
|
+
**v4.10** — reviewer-coverage expansion reverse-engineered from high-IF, CC-BY papers (learn-only under the `reverse_engineer/` license firewall), plus a clinician-friendly update path. Additive and backward-compatible; 45 skills / **38 guidelines** / 36 detectors / **15 domain-probe modules** (was 12):
|
|
278
|
+
|
|
279
|
+
- **Three new reviewer domain-probe modules** (`/peer-review` + `/self-review`, vendored byte-identical): **Mendelian randomization** (MR1–MR8 — IV assumptions, pleiotropy-robust sensitivity suite, Steiger, sample overlap, NLMR, drug-target colocalization), **polygenic risk score** (PG1–PG8 — ancestry portability, base/target leakage, incremental value over the clinical model, screening-vs-discrimination, calibration), and **network meta-analysis** (NM1–NM8 — transitivity, incoherence, SUCRA over-interpretation, CINeMA/GRADE-NMA, component-NMA additivity). Plus observational **O17** (agnostic many-exposure-scan multiplicity: ExWAS/EWAS/MWAS).
|
|
280
|
+
- **Two reporting-guideline checklists** (36 → 38): **STROBE-MR** and **PGS-RS / PRS-RS**, with study-type routing. Four new `/analyze-stats` analysis guides (multiplicity, MR, PRS, NMA) and a `/clean-data` implausible-value + cross-field validity reference.
|
|
281
|
+
- **Clinician-friendly update reminders** — the classroom installers enable the in-app "update available" notice + one-click Desktop updater by default; the `npx`/manual paths print how to turn it on; the install guide recommends `npx medsci-skills install --enable-update-notify`.
|
|
282
|
+
|
|
283
|
+
**v4.9** — analysis-integrity hardening promoted from real review cycles, plus journal-mechanics additions. Additive and backward-compatible; still 45 skills / 38 guidelines, analysis-integrity detectors **32 → 36**:
|
|
272
284
|
|
|
273
285
|
- **Four new gates** — a **duplicate-bibliography** check (`check_reference_duplication.py`) for the hybrid `[@key]` + hand-typed `## References` build that renders the list twice; a **cross-script binning / composite-indicator** consistency check (`check_binning_consistency.py`, `BINNING_DRIFT` / `DERIVED_DEF_DRIFT`) for a derived categorical or composite indicator defined inconsistently across analysis scripts; a **float citation-order** check (`check_citation_order.py`) for numbered Tables/Figures not first cited in ascending order per series; and an **audit-dump leak** gate (`/sync-submission`) that blocks a `/check-reporting` output mistakenly attached as a submission file.
|
|
274
286
|
- **KJR technical-check conventions + percentage-decimal style**, reader-allocation-under-burden and generative-image-as-study-object reporting (`/design-ai-benchmarking`, `/check-reporting`), and a **Liver International** CSL with that journal's submission mechanics (`/manage-refs`).
|
|
275
287
|
|
|
276
|
-
**v4.8** is the **review-harvest batch** — deterministic detector hardening promoted from real-manuscript review cycles. Additive and backward-compatible; still 45 skills /
|
|
288
|
+
**v4.8** is the **review-harvest batch** — deterministic detector hardening promoted from real-manuscript review cycles. Additive and backward-compatible; still 45 skills / 38 guidelines, analysis-integrity detectors **30 → 32**:
|
|
277
289
|
|
|
278
290
|
- **Two new gates** — `check_supplement_hygiene.py` lints the rendered supplement / tables / caption files (not just the manuscript) for §-labels, placeholders, build markers, response-letter framing, and unresolved body↔supplement cross-references; `check_null_calibration.py` flags a headline negative/equivalence claim made without a minimum-detectable-effect / power / equivalence statement.
|
|
279
291
|
- **Four detector false-positive fixes** — gates no longer fire on a recommended colorblind-safe palette, author-footnote `§` daggers, a correctly-hedged disclaimer, or a tier-label digit; each with a regression fixture and three newly CI-wired test suites.
|
|
280
292
|
- **Nine reviewer-side domain probes** (SR/MA, observational, diagnostic, AI-overclaiming, survival) plus a `/design-study` design-stage ceiling gate for perceptual/reader-AI studies and a reusable confidence-weighted-rating→AUC monotonicity template.
|
|
281
293
|
|
|
282
|
-
**v4.7** is the **self-update foundation** — physician-researchers stay current without GitHub, git, or a terminal. Additive and backward-compatible; still 45 skills /
|
|
294
|
+
**v4.7** is the **self-update foundation** — physician-researchers stay current without GitHub, git, or a terminal. Additive and backward-compatible; still 45 skills / 38 guidelines / 30 detectors:
|
|
283
295
|
|
|
284
296
|
- **Transactional, crash-recoverable installer.** Each install runs through a durable journal state machine recovered on the next run (roll back / forward-clean / fail-closed), with per-target SHA-256 inventories — your modified or third-party skills are backed up and never clobbered or auto-deleted.
|
|
285
297
|
- **One-click self-updater** (`~/.medsci-skills/updater/`, `install.py --check-update`). Verifies the download against the github.com API digest and **never `extractall()`s** (per-entry rejection of traversal / symlink / duplicate / zip-bomb + an allowlist & per-file hash). The release pipeline injects a verified `provenance.json`, attests build provenance, runs on a protected `release` environment, and verifies each ZIP round-trips through the updater's own safe-extract before publishing.
|
|
286
298
|
- **Opt-in update notice (off by default):** `install.py --enable-update-notify` shows a one-line "update available" message at Claude Code session start — no telemetry, reads nothing about your session, installs nothing. `--disable-update-notify` / `MEDSCI_NO_UPDATE_CHECK=1` turn it off. *(Honest scope: the digest/attestation detect transport tampering, not a compromised publisher account — see `SECURITY.md`.)*
|
|
287
299
|
|
|
288
|
-
**v4.6** is a maintainability, governance, and review-depth release — still 45 skills /
|
|
300
|
+
**v4.6** is a maintainability, governance, and review-depth release — still 45 skills / 38 guidelines; analysis-integrity detectors **28 → 30**, domain probes 11 → 12:
|
|
289
301
|
|
|
290
302
|
- **Fairness / equity / subgroup-performance probe (EQ0–EQ6)** for AI/prediction/diagnostic studies that claim cross-population performance, plus two new detectors: an **AI-disclosure + data/code-availability** check (`/sync-submission`) and a **structured-summary-box conformance** check (`/academic-aio`).
|
|
291
303
|
- **Governance + answer-engine layer:** `ROADMAP.md`, `MAINTAINERS.md`, `SECURITY.md`, a maintainer workflow + release checklist, an AEO/GEO `docs/faq.md`, a "Start here: 3 workflows" + "Validation status" section in this README, and a new `maturity` field (official / experimental / community) on every skill.
|
|
292
304
|
- **Token diet (pilot):** `write-paper` Phase 7 integrity audits moved to a load-on-demand reference (~2,559 tokens saved per invocation). Positioning now leads with the compliance moat rather than skill count.
|
|
293
305
|
|
|
294
|
-
**v4.5** deepens the review + submission surface with no new skill or reporting-guideline count (still 45 skills /
|
|
306
|
+
**v4.5** deepens the review + submission surface with no new skill or reporting-guideline count (still 45 skills / 38 guidelines); analysis-integrity detectors **27 → 28**:
|
|
295
307
|
|
|
296
308
|
- **`/clean-data` + `/analyze-stats` — reverse-coded-item / negative-alpha detector.** A multi-item Likert scale with a negatively-worded item must be recoded `(min+max) − x` before the scale total or Cronbach's alpha is computed; left un-recoded, the item correlates negatively with the rest of the scale and alpha collapses (often negative). A negative alpha is a coding bug, not a "multidimensional construct." New stdlib-only `check_reverse_coding.py` returns `REVERSE_CODING_LIKELY` / `REVERSE_CODING_SUSPECT` / `OK` from per-item item-rest correlations + raw alpha; the Likert summary template gains a `--reverse-items` recode flag.
|
|
297
309
|
- **`/peer-review` + `/self-review` — SR/MA + DTA + prediction-model probe batch.** `sr_ma.md` **P12** risk-of-bias table row-sum ↔ traffic-light figure-matrix reconciliation and **P13** included-study ↔ reference-list completeness; `diagnostic_accuracy.md` **D7** index-test-as-enrollment-criterion circularity; `clinical_prediction_model.md` **CP5** intended-use horizon leakage and **CP6** development/CV vs held-out/external validation-nomenclature conflation. Vendored byte-identical into `/self-review`.
|
|
298
310
|
- **`/sync-submission` — embedded absolute-path leak scan.** A `word/*.xml` attribute (e.g. a pandoc-embedded image's `<pic:cNvPr descr="…">`) carrying an absolute home-dir path (`/Users/…`, `/home/…`) is a username leak invisible to a rendered-text scan; now flagged as `docx_embedded_abs_path` under `check_asset_anonymization.py`.
|
|
299
311
|
|
|
300
|
-
**v4.4** adds reviewer/analysis depth with no new skill or reporting-guideline count (still 45 skills /
|
|
312
|
+
**v4.4** adds reviewer/analysis depth with no new skill or reporting-guideline count (still 45 skills / 38 guidelines / 27 detectors):
|
|
301
313
|
|
|
302
314
|
- **`/author-strategy` — trajectory-archetype classification (optional).** Classifies a queried author's PubMed trajectory into abstract career archetypes (A1 infrastructure builder, A2 methodology rule-maker, A3 clinical→AI hybrid, A4 SR/MA volume engine, A5 large-consortium participation, A6 device/technique depth, + a computed composite) as an **explainable, multi-label, confidence-scored heuristic — not an objective verdict**. The rubric is a single canonical YAML (the narrative doc is generated from it); scores exclude `unavailable` signals (h-index/citation/venue-tier → `[VERIFY]`, never fabricated); a **disambiguation gate** binds an approved `corpus_manifest.json` to the CSV (csv + PMID-set hashes) so a surname alone never classifies, and target-author attribution never borrows a co-author's ORCID/affiliation.
|
|
303
315
|
- **`/peer-review` + `/self-review` — Image-Synthesis / cross-modality probe (IS1–IS4)** for studies that synthesize one imaging modality from another and claim the output carries the target's information, plus a reviewer-side reference-integrity spot-check.
|
|
304
316
|
- **`/verify-refs` — OpenAlex tertiary index** recovers conference-proceedings / non-DOI citations (NeurIPS/ICLR/ACL) that fall through PubMed and CrossRef, the free analogue of a portal's second index.
|
|
305
317
|
|
|
306
|
-
**v4.3** hardens the **cross-sectional / observational cohort** review surface end-to-end, much of it reverse-engineered from real CC-BY cohort papers (learn-only under the license firewall) — no new skill or reporting-guideline count (still 45 skills /
|
|
318
|
+
**v4.3** hardens the **cross-sectional / observational cohort** review surface end-to-end, much of it reverse-engineered from real CC-BY cohort papers (learn-only under the license firewall) — no new skill or reporting-guideline count (still 45 skills / 38 guidelines); analysis-integrity detectors **25 → 27**:
|
|
307
319
|
|
|
308
320
|
- **Observational probes O1 → O14** (`/peer-review` + `/self-review`, vendored) — over-adjustment / analysis-unit clustering / outcome construct-validity (O7–O9), overlapping-subset gradient (O10), **complex-survey design & weighting** for NHANES/KNHANES (O11), **data-driven threshold / "inflection-point" mining** (O12), **cross-sectional mediation** temporal-order & sequential-ignorability (O13), and **interaction scale** — additive RERI/AP/S vs multiplicative (O14). Plus a new **clinical-prediction-model** probe module **CP1–CP4** and survival **S9** (panel-data / multistate variance).
|
|
309
321
|
- **Two new detectors (25 → 27)** — `check_wordcount_cap.py` (the revision-inflation trap: body vs journal cap) and `check_paren_spans.py` (em-dash→paren conversions that wrap a whole sentence). Plus a `check_confounding_completeness.py` upgrade (DB-code↔prose alias map, SMD-from-mean±SD, exposure-defining-covariate exemption), a `check_cohort_arithmetic.py` `ANALYSIS_UNIT_UNDISCLOSED` check, a `check_scope_coherence.py` cross-sectional-yield lexicon, and a verify-refs corporate/collective-author render-abort fix.
|
|
310
322
|
- **Analysis & submission tooling** — `/analyze-stats` gains **mediation** and **interaction & effect-modification** guides; `/sync-submission` gains `assemble_supplement.py` (S{N} index↔file integrity) and a `/revise` body-word-count exit gate; `/render-pdf-doc` gains a `scan_glyph_coverage.py` xelatex silent-glyph-drop scan.
|
|
311
323
|
|
|
312
|
-
**v4.2** builds out the case-report capability end-to-end, grounded in real CC-BY case reports (learn-only under the license firewall) — no new skill or reporting-guideline count (still 45 skills /
|
|
324
|
+
**v4.2** builds out the case-report capability end-to-end, grounded in real CC-BY case reports (learn-only under the license firewall) — no new skill or reporting-guideline count (still 45 skills / 38 guidelines); journal profiles **68 → 73**:
|
|
313
325
|
|
|
314
326
|
- **Case-report + case-series writing** — `/write-paper` gains a CARE narrative + 150-word-abstract case-report exemplar, a **case-series** paper type (methods-light mini-cohort, all-cases summary table, counts-not-rates), and **adverse-event/pharmacovigilance** (Naranjo/WHO-UMC causality) and **diagnostic-pitfall/mimic** subtypes.
|
|
315
327
|
- **Radiology / imaging-led track** — a dedicated `exemplar_case_report_radiology.md` (per-modality technique→findings→impression, structured-reporting lexicons BI-RADS/LI-RADS/PI-RADS/TI-RADS/Lung-RADS/O-RADS, quantitative threshold honesty, an interventional-radiology procedure/complication subtype, DICOM de-identification) plus a `/make-figures` annotated multimodality imaging-panel exemplar.
|
|
@@ -433,12 +445,18 @@ ma-scout -> search-lit -> fulltext-retrieval -> design-study ──> write-proto
|
|
|
433
445
|
| **search-lit** | PubMed + Semantic Scholar + bioRxiv search with anti-hallucination citation verification. Token-efficient error handling -- CrossRef failures are silently batched, not repeated. BibTeX output tags each entry with `verified`/`verified_by`/`verified_on` fields so downstream skills can trust the citation provenance. |
|
|
434
446
|
| **verify-refs** | Pre-submission reference audit for `.md`, `.docx`, `.bib`, or `.tsv` inputs. Extracts references, verifies DOI/PMID via CrossRef/PubMed when available, and writes `qc/reference_audit.json` as the sole output — row-level status (OK / MISMATCH / UNVERIFIED / FABRICATED) lives inside the JSON `records[]` block. `/search-lit` produces candidate BibTeX; `/lit-sync` owns `manuscript/_src/refs.bib`. |
|
|
435
447
|
| **fulltext-retrieval** | Batch open-access PDF downloader. Unpaywall → PMC → OpenAlex → CrossRef pipeline. OA-only -- no paywall bypass. Input: DOI list or TSV. Optional PDF→Markdown conversion via [pymupdf4llm](https://pymupdf.readthedocs.io/en/latest/pymupdf4llm/) for token-efficient LLM analysis of academic papers. |
|
|
436
|
-
| **check-reporting** | Manuscript compliance audit against
|
|
448
|
+
| **check-reporting** | Manuscript compliance audit against 38 reporting guidelines and risk of bias tools (STROBE, STROBE-MR, STARD, STARD-AI, TRIPOD, TRIPOD+AI, TRIPOD-LLM, PGS-RS, PRISMA, PRISMA-DTA, PRISMA-P, MOOSE, ARRIVE, CONSORT, CONSORT-AI, CARE, SPIRIT, SPIRIT-AI, CLAIM, DECIDE-AI, SQUIRE 2.0, CLEAR, GRRAS, MI-CLEAR-LLM, SWiM, AMSTAR 2, QUADAS-2, QUADAS-C, RoB 2, ROBINS-I, ROBINS-E, ROBIS, ROB-ME, PROBAST, PROBAST+AI, NOS, COSMIN, RoB NMA). Machine-readable JSON summary with `compliance_pct` and `fixable_by_ai` flags for automated pipeline integration. |
|
|
437
449
|
| **analyze-stats** | Statistical analysis code generation (Python/R) for diagnostic accuracy, DTA meta-analysis (bivariate/HSROC), inter-rater agreement, survival analysis, demographics tables, regression (logistic/linear), propensity score (matching/IPTW/overlap weighting), and repeated measures (RM ANOVA/GEE/mixed models). Calibration mandatory for prediction models. |
|
|
438
450
|
| **meta-analysis** | Full systematic review and meta-analysis pipeline (8 phases). DTA (bivariate/HSROC) and intervention meta-analysis. Protocol to submission-ready manuscript with PRISMA-DTA compliance. |
|
|
439
451
|
| **make-figures** | Publication-ready figures and visual abstracts: ROC curves, forest plots, PRISMA/CONSORT/STARD flow diagrams, Kaplan-Meier curves, Bland-Altman plots, confusion matrices, and journal-specific visual/graphical abstracts (python-pptx template-based). Communication-first design principles (Nat Hum Behav 2026 — key message, audience, cognitive load, figure-vs-table decision) and five flow-diagram production lessons (official-template fidelity, VML fallback PDF export, docx XML escape, sequential placeholder mapping, version freeze); critic rubric Section G adds 5 communication-first checks. `--study-type` auto-generates the full required figure set; structured `_figure_manifest.md` output for downstream pipeline consumption; D2 enforced as default for flow diagrams. |
|
|
440
452
|
| **design-study** | Study design review: identifies analysis unit, cohort logic, data leakage risks, comparator design, validation strategy, and reporting guideline fit. |
|
|
441
453
|
| **design-ai-benchmarking** | Design and validity review for benchmarking AI system(s) against a human-expert panel: evaluation-question and arm definition, decoupled multi-dimensional rubrics with anchors, planted calibration probes (positive-control / known-bad / instability / mechanism-contradiction), reviewer-panel construction with per-reviewer randomization, inter-rater reliability targets with separate control-item reliability, LLM-as-judge vs human-as-judge adjudication, construct-independence guards, and a structured JSON rating-export schema. Locks the rubric before data collection. |
|
|
454
|
+
| **model-validation** | Design or audit the clinical-validation study for an engineer-built medical-imaging model (segmentation / classification / detection): patient-level split disjointness and the data-leakage taxonomy, tuning-on-test, internal vs genuine external validation, comparator design, single-run vs multi-seed variance, task-correct metric selection (Metrics Reloaded), test-set sizing, and CLAIM 2024 / TRIPOD+AI / STARD-AI reporting fit. Ships a deterministic split-leakage gate that proves patient disjointness by set arithmetic on the emitted split table. Integrates with MONAI / nnU-Net — does not replace them. |
|
|
455
|
+
| **model-scaffold** | Generate a reproducible, runnable PyTorch training repo for a medical-imaging segmentation task — the missing middle link between choosing an architecture and validating a trained model. Emits a patient-level seed-locked split as an auditable artifact, a configurable U-Net, train/evaluate scripts that seed every RNG and infer under eval mode, a config, requirements, a reproducibility record, and a Methods stub with VERIFY placeholders (no fabricated numbers). Reproducibility holds by construction; ships a `check_training_hygiene` AST gate + a network-free build→validate challenge. Integrates with MONAI / nnU-Net / TorchIO — does not reimplement them. |
|
|
456
|
+
| **architecture-zoo** | "Which architecture for which research question" decision tool: maps task (classification / segmentation / detection / transfer), modality, data scale, and class imbalance to a paper-grounded architecture shortlist. Curates the foundational curriculum (ResNet / DenseNet / EfficientNet / ViT / Swin; U-Net / 3-D U-Net / Attention & Residual U-Net / nnU-Net / Mask R-CNN; SAM/MedSAM / TotalSegmentator / BiomedCLIP / DINO / MAE / SimCLR) — each with core idea, when-to-use, medical-imaging use, reference implementation, validation setup, and the matching model-scaffold template. Advisory; teaches archetypes, not a live SOTA leaderboard. |
|
|
457
|
+
| **model-card** | Generate the documentation an engineer-built medical-imaging model must carry — a Model Card (Mitchell et al. 2019), a Datasheet for its dataset (Gebru et al. 2021), and a METRIC-informed data-quality pass — filled from user-supplied facts (never fabricated), then verify every required section is present and non-empty with a deterministic completeness gate (`check_model_card_complete`). Model Card / Datasheet are documentation standards vendored as templates, not counted reporting checklists. |
|
|
458
|
+
| **model-evaluation** | Compute and report task-correct held-out metrics for a trained medical-imaging model — segmentation (Dice + a boundary metric HD95/NSD, per structure), classification (AUROC + AUPRC + sensitivity/specificity with bootstrap CIs at the deployment prevalence), or detection (FROC/mAP with a stated IoU criterion) — plus calibration and subgroup slices. Emits a per-case table for analyze-stats and gates the metric choice against Metrics Reloaded / CLAIM 2024 (`check_metric_reporting`). Numbers come only from executed code. |
|
|
459
|
+
| **mllm-eval** | Model-agnostic evaluation harness (closed API or open weights) for an LLM/MLLM on a clinical task — radiology report generation, VQA, clinical text extraction — covering the adjudicated reference standard, clinical-efficacy metrics (RadGraph-F1 / CheXbert-F1 beyond BLEU/ROUGE), faithfulness/hallucination, pretraining-contamination, prompt sensitivity, and a reader study; gates the plan with `check_mllm_eval_completeness` and routes the reviewer audit to the MLLM probe. |
|
|
442
460
|
| **intake-project** | Classifies new research projects, summarizes current state, identifies missing inputs, and recommends next steps. |
|
|
443
461
|
| **grant-builder** | Structures grant proposals: significance, innovation, approach, milestones, and consortium roles. |
|
|
444
462
|
| **present-paper** | Academic presentation preparation: paper analysis, supporting research, speaker scripts, slide note injection, and Q&A prep. |
|
|
@@ -497,6 +515,8 @@ After unzipping:
|
|
|
497
515
|
- Windows: double-click `installers/install-windows.cmd`
|
|
498
516
|
- macOS: double-click `installers/install-macos.command`
|
|
499
517
|
|
|
518
|
+
This turnkey install also **turns on in-app update reminders** and adds an **"Update MedSci Skills"** Desktop icon, so you are told when a new version ships and can update with one click — no terminal needed (see [Updating](#updating)).
|
|
519
|
+
|
|
500
520
|
Then restart Claude Code Desktop, Codex Desktop, or Cursor and test with:
|
|
501
521
|
|
|
502
522
|
```text
|
|
@@ -548,19 +568,22 @@ See [docs/classroom_distribution_plan.md](docs/classroom_distribution_plan.md) a
|
|
|
548
568
|
|
|
549
569
|
MedSci Skills updates often. You do **not** need GitHub, git, or the command line to stay current.
|
|
550
570
|
|
|
551
|
-
- **One click (recommended for the classroom install).**
|
|
552
|
-
|
|
553
|
-
icon on your Desktop
|
|
554
|
-
|
|
571
|
+
- **One click (recommended for the classroom install).** The classroom installer (Option 1) now
|
|
572
|
+
sets this up for you automatically — it places an updater at `~/.medsci-skills/updater/`, drops an
|
|
573
|
+
**"Update MedSci Skills"** icon on your Desktop (`--desktop-launcher`), and **turns on the in-app
|
|
574
|
+
update reminder** (below). Double-click the icon: it downloads the latest release from GitHub,
|
|
575
|
+
verifies it, and re-installs — transactionally, so an interrupted update never corrupts your install.
|
|
555
576
|
- **Already installed an old copy?** Re-download the latest classroom ZIP **once** and double-click
|
|
556
577
|
the installer; from then on the one-click updater is in place for every future update.
|
|
557
578
|
- **Terminal users:** `npx medsci-skills@latest install` always installs the latest.
|
|
558
579
|
- **Just checking:** `python3 installers/install.py --check-update` reports whether a newer version
|
|
559
580
|
is available and installs nothing.
|
|
560
|
-
- **Get reminded (
|
|
561
|
-
shows a one-line *"update available"* notice
|
|
562
|
-
|
|
563
|
-
|
|
581
|
+
- **Get reminded (Claude Code):** `python3 installers/install.py --enable-update-notify` (or
|
|
582
|
+
`npx medsci-skills install --enable-update-notify`) shows a one-line *"update available"* notice
|
|
583
|
+
when a Claude Code session starts. **The classroom installer enables this for you;** for the
|
|
584
|
+
`npx`/manual paths it is **off by default** (the installer prints how to turn it on). It checks at
|
|
585
|
+
most once a day, reads nothing about your session, and never installs anything. Turn it off with
|
|
586
|
+
`--disable-update-notify`, or silence it with `MEDSCI_NO_UPDATE_CHECK=1`.
|
|
564
587
|
- **Claude Code plugin marketplace:** third-party marketplace **auto-update is off by default** —
|
|
565
588
|
enable it in Claude Code or run a manual plugin update.
|
|
566
589
|
|
|
@@ -591,8 +614,8 @@ Projects declare their source-of-truth layout in `SSOT.yaml`, and a `qc/migratio
|
|
|
591
614
|
### Meta-Analysis Failure Modes
|
|
592
615
|
`/meta-analysis` ships empirical failure-mode references (data integrity, review orchestration, submission package drift, post-submission release ops) with four automation hooks: `scripts/prisma_5way_consistency.py` (DI-6 PRISMA number consistency), `scripts/extraction_consensus_log_init.py` (DI-1 dual-extraction scaffold), `scripts/tag_cleanup_gate.sh` (DI-8 placeholder tag gate), and `scripts/verify_package_integrity.py` (SPD SHA-256 manifest for submission bundles).
|
|
593
616
|
|
|
594
|
-
###
|
|
595
|
-
`check-reporting` includes bundled checklists for
|
|
617
|
+
### 38 Reporting Guidelines & RoB Tools Built-in
|
|
618
|
+
`check-reporting` includes bundled checklists for 38 guidelines and risk-of-bias tools: STROBE, STROBE-MR, STARD, STARD-AI, TRIPOD, TRIPOD+AI, TRIPOD-LLM, PGS-RS, PRISMA 2020, PRISMA-DTA, PRISMA-P, MOOSE, ARRIVE, CONSORT, CONSORT-AI, CARE, SPIRIT, SPIRIT-AI, CLAIM, DECIDE-AI, SQUIRE 2.0, CLEAR, GRRAS, MI-CLEAR-LLM, SWiM, AMSTAR 2, QUADAS-2, QUADAS-C, RoB 2, ROBINS-I, ROBINS-E, ROBIS, ROB-ME, PROBAST, PROBAST+AI, NOS, COSMIN, RoB NMA. Includes Results/Discussion section boundary checks and machine-readable JSON summary for pipeline integration.
|
|
596
619
|
|
|
597
620
|
### Publication-Ready Output
|
|
598
621
|
`analyze-stats` generates reproducible Python/R code for 13 analysis types -- including regression, propensity score, and repeated measures -- with mandatory calibration for prediction models. `make-figures` produces journal-specification figures (300 DPI, colorblind-safe palettes, proper dimensions), visual/graphical abstracts, and a tool selection guide (D2 for flow diagrams, matplotlib for data plots). `--study-type` auto-generates the complete figure set for each study design.
|
|
@@ -6,10 +6,19 @@ cd "$(dirname "$0")/.."
|
|
|
6
6
|
echo "MedSci Skills Installer for macOS"
|
|
7
7
|
echo
|
|
8
8
|
|
|
9
|
+
PY=""
|
|
9
10
|
if command -v python3 >/dev/null 2>&1; then
|
|
10
|
-
python3
|
|
11
|
+
PY=python3
|
|
11
12
|
elif command -v python >/dev/null 2>&1; then
|
|
12
|
-
python
|
|
13
|
+
PY=python
|
|
14
|
+
fi
|
|
15
|
+
|
|
16
|
+
if [ -n "$PY" ]; then
|
|
17
|
+
"$PY" installers/install.py --target all --desktop-launcher
|
|
18
|
+
# Turn on the in-app "update available" reminder for this turnkey install so you are told when a
|
|
19
|
+
# new version is out (no terminal needed afterward). Best-effort; turn off later with
|
|
20
|
+
# `install.py --disable-update-notify` or MEDSCI_NO_UPDATE_CHECK=1.
|
|
21
|
+
"$PY" installers/install.py --enable-update-notify || true
|
|
13
22
|
else
|
|
14
23
|
echo "Python was not found."
|
|
15
24
|
echo "Install Python 3 from https://www.python.org/downloads/ and run this installer again."
|
|
@@ -7,13 +7,17 @@ echo.
|
|
|
7
7
|
|
|
8
8
|
where py >nul 2>nul
|
|
9
9
|
if %errorlevel%==0 (
|
|
10
|
-
py -3 installers\install.py --target all
|
|
10
|
+
py -3 installers\install.py --target all --desktop-launcher
|
|
11
|
+
rem Turn on the in-app "update available" reminder for this turnkey install (disable later with --disable-update-notify).
|
|
12
|
+
py -3 installers\install.py --enable-update-notify
|
|
11
13
|
goto done
|
|
12
14
|
)
|
|
13
15
|
|
|
14
16
|
where python >nul 2>nul
|
|
15
17
|
if %errorlevel%==0 (
|
|
16
|
-
python installers\install.py --target all
|
|
18
|
+
python installers\install.py --target all --desktop-launcher
|
|
19
|
+
rem Turn on the in-app "update available" reminder for this turnkey install (disable later with --disable-update-notify).
|
|
20
|
+
python installers\install.py --enable-update-notify
|
|
17
21
|
goto done
|
|
18
22
|
)
|
|
19
23
|
|
|
@@ -5,9 +5,12 @@ Write-Host "MedSci Skills Installer for Windows"
|
|
|
5
5
|
Write-Host ""
|
|
6
6
|
|
|
7
7
|
if (Get-Command py -ErrorAction SilentlyContinue) {
|
|
8
|
-
py -3 installers/install.py --target all
|
|
8
|
+
py -3 installers/install.py --target all --desktop-launcher
|
|
9
|
+
# Turn on the in-app "update available" reminder for this turnkey install (disable later with --disable-update-notify).
|
|
10
|
+
try { py -3 installers/install.py --enable-update-notify } catch {}
|
|
9
11
|
} elseif (Get-Command python -ErrorAction SilentlyContinue) {
|
|
10
|
-
python installers/install.py --target all
|
|
12
|
+
python installers/install.py --target all --desktop-launcher
|
|
13
|
+
try { python installers/install.py --enable-update-notify } catch {}
|
|
11
14
|
} else {
|
|
12
15
|
Write-Host "Python was not found."
|
|
13
16
|
Write-Host "Please install Python 3 from https://www.python.org/downloads/ and run this installer again."
|
package/installers/install.py
CHANGED
|
@@ -286,6 +286,19 @@ def main() -> int:
|
|
|
286
286
|
except Exception as exc: # noqa: BLE001
|
|
287
287
|
log(f"\n[updater] could not install the one-click updater ({exc}); updates still work via re-running the installer.", log_lines)
|
|
288
288
|
|
|
289
|
+
# One-time nudge: if the in-app update reminder is not enabled, surface how to turn it on.
|
|
290
|
+
# (The classroom installers enable it automatically; this covers npx / manual installs so a
|
|
291
|
+
# clinician who installed via "install this repo" is told how to get update notices.) Read-only.
|
|
292
|
+
if not args.dry_run:
|
|
293
|
+
try:
|
|
294
|
+
import update # noqa: PLC0415
|
|
295
|
+
if not update.session_hook_enabled(medsci_txn.state_home(), update.default_settings_path()):
|
|
296
|
+
log("\n[update reminders] OFF — Claude Code will not tell you when a new version is out.", log_lines)
|
|
297
|
+
log(" Turn on with: npx medsci-skills install --enable-update-notify", log_lines)
|
|
298
|
+
log(" (or: python3 installers/install.py --enable-update-notify)", log_lines)
|
|
299
|
+
except Exception: # noqa: BLE001 - nudge is best-effort, never block the install
|
|
300
|
+
pass
|
|
301
|
+
|
|
289
302
|
if failures:
|
|
290
303
|
log(f"\nCompleted with errors on: {', '.join(failures)}. Other targets are fully installed.", log_lines)
|
|
291
304
|
log("If this happened during class, send the install log to the instructor.", log_lines)
|
package/installers/update.py
CHANGED
|
@@ -517,6 +517,25 @@ def register_session_hook(home: Path, settings_path: Path) -> str:
|
|
|
517
517
|
return "enabled"
|
|
518
518
|
|
|
519
519
|
|
|
520
|
+
def session_hook_enabled(home: Path, settings_path: Path) -> bool:
|
|
521
|
+
"""Read-only: True iff our SessionStart update-notify hook is currently registered in
|
|
522
|
+
settings.json. Never writes; tolerant of an absent/empty/unreadable settings file (False).
|
|
523
|
+
Used by the installer to decide whether to print the one-time enable-reminders nudge."""
|
|
524
|
+
try:
|
|
525
|
+
if not settings_path.is_file():
|
|
526
|
+
return False
|
|
527
|
+
settings = _load_settings(settings_path)
|
|
528
|
+
if not isinstance(settings, dict):
|
|
529
|
+
return False
|
|
530
|
+
hooks = settings.get("hooks")
|
|
531
|
+
ss = hooks.get("SessionStart") if isinstance(hooks, dict) else None
|
|
532
|
+
if not isinstance(ss, list):
|
|
533
|
+
return False
|
|
534
|
+
return any(_entry_owns_hook(e, home) for e in ss)
|
|
535
|
+
except Exception: # noqa: BLE001 - read-only nudge gate, never block install
|
|
536
|
+
return False
|
|
537
|
+
|
|
538
|
+
|
|
520
539
|
def unregister_session_hook(home: Path, settings_path: Path) -> str:
|
|
521
540
|
"""Opt-out: remove ONLY our SessionStart hook (even if it shares an entry with other hooks),
|
|
522
541
|
preserving everything else; drop emptied containers. Returns 'disabled' or 'not-enabled'."""
|