cryoem-halfmap-qc 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. cryoem_halfmap_qc-0.3.2/LICENSE +21 -0
  2. cryoem_halfmap_qc-0.3.2/PKG-INFO +268 -0
  3. cryoem_halfmap_qc-0.3.2/README.md +237 -0
  4. cryoem_halfmap_qc-0.3.2/cryoem_halfmap_qc.egg-info/PKG-INFO +268 -0
  5. cryoem_halfmap_qc-0.3.2/cryoem_halfmap_qc.egg-info/SOURCES.txt +88 -0
  6. cryoem_halfmap_qc-0.3.2/cryoem_halfmap_qc.egg-info/dependency_links.txt +1 -0
  7. cryoem_halfmap_qc-0.3.2/cryoem_halfmap_qc.egg-info/entry_points.txt +2 -0
  8. cryoem_halfmap_qc-0.3.2/cryoem_halfmap_qc.egg-info/requires.txt +13 -0
  9. cryoem_halfmap_qc-0.3.2/cryoem_halfmap_qc.egg-info/top_level.txt +2 -0
  10. cryoem_halfmap_qc-0.3.2/cryoem_mrc/__init__.py +135 -0
  11. cryoem_halfmap_qc-0.3.2/cryoem_mrc/__main__.py +218 -0
  12. cryoem_halfmap_qc-0.3.2/cryoem_mrc/analysis.py +815 -0
  13. cryoem_halfmap_qc-0.3.2/cryoem_mrc/analysis_driver.py +369 -0
  14. cryoem_halfmap_qc-0.3.2/cryoem_mrc/cli.py +159 -0
  15. cryoem_halfmap_qc-0.3.2/cryoem_mrc/cohort_composition.py +75 -0
  16. cryoem_halfmap_qc-0.3.2/cryoem_mrc/cohort_emdb.py +130 -0
  17. cryoem_halfmap_qc-0.3.2/cryoem_mrc/cohort_labels.py +81 -0
  18. cryoem_halfmap_qc-0.3.2/cryoem_mrc/cohort_pipeline.py +334 -0
  19. cryoem_halfmap_qc-0.3.2/cryoem_mrc/cohort_resolution.py +156 -0
  20. cryoem_halfmap_qc-0.3.2/cryoem_mrc/conformation_coupling.py +284 -0
  21. cryoem_halfmap_qc-0.3.2/cryoem_mrc/conformation_pair.py +780 -0
  22. cryoem_halfmap_qc-0.3.2/cryoem_mrc/density_source.py +49 -0
  23. cryoem_halfmap_qc-0.3.2/cryoem_mrc/feature_ml.py +88 -0
  24. cryoem_halfmap_qc-0.3.2/cryoem_mrc/figure_cleanup.py +87 -0
  25. cryoem_halfmap_qc-0.3.2/cryoem_mrc/guinier_benchmark.py +338 -0
  26. cryoem_halfmap_qc-0.3.2/cryoem_mrc/guinier_sharpening.py +521 -0
  27. cryoem_halfmap_qc-0.3.2/cryoem_mrc/half_map_repro.py +180 -0
  28. cryoem_halfmap_qc-0.3.2/cryoem_mrc/hessian.py +116 -0
  29. cryoem_halfmap_qc-0.3.2/cryoem_mrc/incremental_prediction.py +370 -0
  30. cryoem_halfmap_qc-0.3.2/cryoem_mrc/io.py +166 -0
  31. cryoem_halfmap_qc-0.3.2/cryoem_mrc/local_fsc.py +458 -0
  32. cryoem_halfmap_qc-0.3.2/cryoem_mrc/local_resolution.py +284 -0
  33. cryoem_halfmap_qc-0.3.2/cryoem_mrc/local_resolution_io.py +359 -0
  34. cryoem_halfmap_qc-0.3.2/cryoem_mrc/local_stats.py +235 -0
  35. cryoem_halfmap_qc-0.3.2/cryoem_mrc/map_grid.py +374 -0
  36. cryoem_halfmap_qc-0.3.2/cryoem_mrc/mask_bbox.py +116 -0
  37. cryoem_halfmap_qc-0.3.2/cryoem_mrc/mechanics.py +398 -0
  38. cryoem_halfmap_qc-0.3.2/cryoem_mrc/metric_comparison.py +236 -0
  39. cryoem_halfmap_qc-0.3.2/cryoem_mrc/model_building_export.py +227 -0
  40. cryoem_halfmap_qc-0.3.2/cryoem_mrc/model_map.py +97 -0
  41. cryoem_halfmap_qc-0.3.2/cryoem_mrc/multiscale.py +124 -0
  42. cryoem_halfmap_qc-0.3.2/cryoem_mrc/pipeline.py +196 -0
  43. cryoem_halfmap_qc-0.3.2/cryoem_mrc/placement_decoupling.py +349 -0
  44. cryoem_halfmap_qc-0.3.2/cryoem_mrc/placement_supplement.py +193 -0
  45. cryoem_halfmap_qc-0.3.2/cryoem_mrc/placement_utility.py +1844 -0
  46. cryoem_halfmap_qc-0.3.2/cryoem_mrc/qscore_validation.py +427 -0
  47. cryoem_halfmap_qc-0.3.2/cryoem_mrc/reliability.py +205 -0
  48. cryoem_halfmap_qc-0.3.2/cryoem_mrc/reliability_driver.py +638 -0
  49. cryoem_halfmap_qc-0.3.2/cryoem_mrc/repo_paths.py +245 -0
  50. cryoem_halfmap_qc-0.3.2/cryoem_mrc/rigidity.py +206 -0
  51. cryoem_halfmap_qc-0.3.2/cryoem_mrc/structure_validation.py +1185 -0
  52. cryoem_halfmap_qc-0.3.2/cryoem_mrc/tui.py +240 -0
  53. cryoem_halfmap_qc-0.3.2/cryoem_mrc/tv_curvature.py +332 -0
  54. cryoem_halfmap_qc-0.3.2/cryoem_mrc/visualize.py +203 -0
  55. cryoem_halfmap_qc-0.3.2/cryoem_mrc/volume_slices.py +139 -0
  56. cryoem_halfmap_qc-0.3.2/pyproject.toml +46 -0
  57. cryoem_halfmap_qc-0.3.2/setup.cfg +4 -0
  58. cryoem_halfmap_qc-0.3.2/style/__init__.py +25 -0
  59. cryoem_halfmap_qc-0.3.2/style/nature.py +194 -0
  60. cryoem_halfmap_qc-0.3.2/style/thesis_palette.py +151 -0
  61. cryoem_halfmap_qc-0.3.2/tests/test_blocres_status.py +70 -0
  62. cryoem_halfmap_qc-0.3.2/tests/test_cli.py +46 -0
  63. cryoem_halfmap_qc-0.3.2/tests/test_cohort_emdb.py +44 -0
  64. cryoem_halfmap_qc-0.3.2/tests/test_cohort_gap_audit.py +113 -0
  65. cryoem_halfmap_qc-0.3.2/tests/test_cohort_resolution.py +56 -0
  66. cryoem_halfmap_qc-0.3.2/tests/test_conformation_pair_rmsd.py +138 -0
  67. cryoem_halfmap_qc-0.3.2/tests/test_conformation_pair_summary.py +53 -0
  68. cryoem_halfmap_qc-0.3.2/tests/test_density_source.py +44 -0
  69. cryoem_halfmap_qc-0.3.2/tests/test_guinier_sharpening.py +96 -0
  70. cryoem_halfmap_qc-0.3.2/tests/test_guinier_sharpening_compare.py +36 -0
  71. cryoem_halfmap_qc-0.3.2/tests/test_hessian.py +24 -0
  72. cryoem_halfmap_qc-0.3.2/tests/test_incremental_prediction.py +106 -0
  73. cryoem_halfmap_qc-0.3.2/tests/test_local_fsc.py +127 -0
  74. cryoem_halfmap_qc-0.3.2/tests/test_local_resolution_aggregate.py +93 -0
  75. cryoem_halfmap_qc-0.3.2/tests/test_local_resolution_export.py +107 -0
  76. cryoem_halfmap_qc-0.3.2/tests/test_mask_bbox.py +56 -0
  77. cryoem_halfmap_qc-0.3.2/tests/test_mechanics.py +93 -0
  78. cryoem_halfmap_qc-0.3.2/tests/test_mechanics_tv.py +15 -0
  79. cryoem_halfmap_qc-0.3.2/tests/test_metric_comparison.py +76 -0
  80. cryoem_halfmap_qc-0.3.2/tests/test_model_building_export.py +63 -0
  81. cryoem_halfmap_qc-0.3.2/tests/test_model_placement_audit.py +80 -0
  82. cryoem_halfmap_qc-0.3.2/tests/test_pipeline_embed.py +23 -0
  83. cryoem_halfmap_qc-0.3.2/tests/test_placement_lomo.py +140 -0
  84. cryoem_halfmap_qc-0.3.2/tests/test_placement_supplement.py +35 -0
  85. cryoem_halfmap_qc-0.3.2/tests/test_placement_utility.py +121 -0
  86. cryoem_halfmap_qc-0.3.2/tests/test_qscore_validation.py +43 -0
  87. cryoem_halfmap_qc-0.3.2/tests/test_reliability.py +48 -0
  88. cryoem_halfmap_qc-0.3.2/tests/test_structure_validation_sphere.py +67 -0
  89. cryoem_halfmap_qc-0.3.2/tests/test_thesis_palette.py +38 -0
  90. cryoem_halfmap_qc-0.3.2/tests/test_tv_curvature.py +85 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sarthak Mohanty
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,268 @@
1
+ Metadata-Version: 2.4
2
+ Name: cryoem-halfmap-qc
3
+ Version: 0.3.2
4
+ Summary: Local map reliability from cryo-EM density and half-maps
5
+ Author: Sarthak Mohanty
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/sarthaktexas/cryoem-halfmap-qc
8
+ Project-URL: Zenodo, https://doi.org/10.5281/zenodo.20618526
9
+ Keywords: cryo-em,half-map,reliability,mrc,structural-biology
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: numpy>=1.24
19
+ Requires-Dist: scipy>=1.10
20
+ Requires-Dist: mrcfile>=1.4
21
+ Requires-Dist: matplotlib>=3.7
22
+ Requires-Dist: gemmi>=0.6
23
+ Requires-Dist: seaborn>=0.13
24
+ Requires-Dist: pandas>=2.0
25
+ Requires-Dist: biopython>=1.80
26
+ Requires-Dist: tqdm>=4.60
27
+ Provides-Extra: dev
28
+ Requires-Dist: build; extra == "dev"
29
+ Requires-Dist: twine; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # cryoem-halfmap-qc
33
+
34
+ [![DOI](https://zenodo.org/badge/1262218538.svg)](https://doi.org/10.5281/zenodo.20618526)
35
+
36
+ Python tools for **local map reliability** in cryo-EM reconstructions: density statistics, half-map reproducibility, windowed local FSC (Å), a reproducibility score (H_repro), and build/caution/omit zones.
37
+
38
+ The goal is to test whether inexpensive map features track **half-map cross-correlation** and **local FSC** well enough to guide modeling. This is **not** a claim that density alone defines molecular flexibility.
39
+
40
+ All volumes use NumPy 3D arrays in `(Z, Y, X)` order (section, row, column), consistent with typical `mrcfile` layouts.
41
+
42
+ ---
43
+
44
+ ## Install
45
+
46
+ **PyPI:** not published yet (`pip install cryoem-halfmap-qc` will fail until the first release is uploaded). See [Publishing to PyPI](#publishing-to-pypi) below.
47
+
48
+ Until then, install from GitHub or a local checkout:
49
+
50
+ ```bash
51
+ git clone https://github.com/sarthaktexas/cryoem-halfmap-qc.git
52
+ cd cryoem-halfmap-qc
53
+ pip install -e .
54
+
55
+ # or without cloning:
56
+ pip install "git+https://github.com/sarthaktexas/cryoem-halfmap-qc.git@v0.3.2"
57
+ ```
58
+
59
+ This installs the **`halfmap-qc`** command on your PATH (PyPI package name will be `cryoem-halfmap-qc` once published).
60
+
61
+ **Help & interactive mode:**
62
+
63
+ ```bash
64
+ halfmap-qc # interactive menu (when run in a terminal)
65
+ halfmap-qc help # full command reference
66
+ halfmap-qc --help # argparse summary + examples
67
+ halfmap-qc cohort --help # flags for one subcommand
68
+ halfmap-qc interactive # menu explicitly
69
+ ```
70
+
71
+ **Dependencies:** NumPy, SciPy, mrcfile, Matplotlib, gemmi, pandas.
72
+
73
+ ---
74
+
75
+ ## Data layout
76
+
77
+ Cryo-EM maps are **not** stored in this repository (too large for git). After cloning, create local directories:
78
+
79
+ ```text
80
+ data/emd_<ID>-<label>/ # deposited map + half-maps (.map or .mrc)
81
+ outputs/emd_<ID>/ # pipeline products (created by scripts)
82
+ cohort/manifest.csv # EMDB IDs, relative paths, contours, validation labels
83
+ ```
84
+
85
+ Download deposited and half maps from [EMDB](https://www.ebi.ac.uk/emdb/). Use the depositor-recommended contour for each entry (listed in `cohort/manifest.csv`). See [docs/COHORT.md](docs/COHORT.md) for download status and pipeline progress.
86
+
87
+ ---
88
+
89
+ ## Quick start
90
+
91
+ Run from the project root (where `data/` and `cohort/manifest.csv` live).
92
+
93
+ **Single-map features:**
94
+
95
+ ```bash
96
+ halfmap-qc features path/to/map.mrc --out map_features.npz --float32
97
+ # shorthand (legacy): halfmap-qc path/to/map.mrc --out map_features.npz --float32
98
+ ```
99
+
100
+ **Typical workflow** (features on avg-of-halves; reliability MRCs on deposited primary grid):
101
+
102
+ ```bash
103
+ EMD=49450
104
+ CONTOUR=0.116
105
+ DATA=data/emd_${EMD}-mgtA_e2p+e1
106
+
107
+ halfmap-qc analyze \
108
+ --features "${DATA}/emd_${EMD}_avg_features_t0116.npz" \
109
+ --half1 "${DATA}/emd_${EMD}_half_map_1.map" \
110
+ --half2 "${DATA}/emd_${EMD}_half_map_2.map" \
111
+ --reference "${DATA}/emd_${EMD}.map" \
112
+ --contour "${CONTOUR}" \
113
+ --out-dir "outputs/emd_${EMD}/analysis"
114
+
115
+ halfmap-qc reliability --emd-id "${EMD}" --contour "${CONTOUR}" \
116
+ --features "${DATA}/emd_${EMD}_avg_features_t0116.npz" \
117
+ --halfmap-npz "outputs/emd_${EMD}/analysis/halfmap_metrics.npz"
118
+ ```
119
+
120
+ **Cohort batch** (all active manifest entries with local data):
121
+
122
+ ```bash
123
+ halfmap-qc cohort --pending
124
+ ```
125
+
126
+ **ARC / SLURM** (one map per array task; save a local `*.sbatch` — not in git):
127
+
128
+ ```bash
129
+ # After pip install -e . and rsync data/ + cohort/manifest.csv to $SCRATCH/thesis
130
+ N=$(($(halfmap-qc cohort-ids | wc -l) - 1))
131
+ sbatch --account=wrz135 --array=0-${N} --cpus-per-task=4 --mem=32G --time=00:45:00 \
132
+ --wrap='halfmap-qc cohort --emd-id $(halfmap-qc cohort-ids | sed -n "$((SLURM_ARRAY_TASK_ID+1))p")'
133
+ ```
134
+
135
+ Or save a multi-line script as e.g. `~/halfmap-qc_array.sbatch` (gitignored) and `sbatch --array=0-${N} ~/halfmap-qc_array.sbatch`.
136
+
137
+ ---
138
+
139
+ ## CLI (`halfmap-qc`)
140
+
141
+ | Command | Purpose |
142
+ | --- | --- |
143
+ | *(no args, TTY)* | Interactive menu |
144
+ | `halfmap-qc help` | Full reference + install notes |
145
+ | `halfmap-qc features` | Local density / multiscale features → `.npz` |
146
+ | `halfmap-qc analyze` | Windowed half-map CC + feature correlations |
147
+ | `halfmap-qc reliability` | Reliability score, build zones, MRC export |
148
+ | `halfmap-qc cohort` | Batch pipeline from `cohort/manifest.csv` |
149
+ | `halfmap-qc cohort-ids` | Print EMDB IDs (for SLURM array jobs) |
150
+ | `halfmap-qc interactive` | Interactive menu (same as bare `halfmap-qc`) |
151
+
152
+ Legacy: `python -m cryoem_mrc` still works (same as `halfmap-qc features`).
153
+
154
+ ## Publishing to PyPI
155
+
156
+ One-time setup:
157
+
158
+ 1. Create an account at [pypi.org](https://pypi.org/account/register/) (and optionally [test.pypi.org](https://test.pypi.org/) for a dry run).
159
+ 2. On PyPI → **Your projects** → **Add new project** → name it `cryoem-halfmap-qc` (or claim it when uploading).
160
+ 3. On PyPI → **Account settings** → **Publishing** → **Add a new pending publisher**:
161
+ - PyPI project: `cryoem-halfmap-qc`
162
+ - Owner: `sarthaktexas` (your GitHub user/org)
163
+ - Repository: `cryoem-halfmap-qc`
164
+ - Workflow: `publish.yml`
165
+ - Environment: (leave blank unless you use one)
166
+
167
+ Release:
168
+
169
+ ```bash
170
+ # bump version in pyproject.toml first, then:
171
+ git add pyproject.toml cryoem_mrc/__init__.py
172
+ git commit -m "Release v0.3.2"
173
+ git tag v0.3.2
174
+ git push origin main --tags
175
+ ```
176
+
177
+ On GitHub → **Releases** → **Draft a new release** → choose tag `v0.3.2` → **Publish release**. The [`.github/workflows/publish.yml`](.github/workflows/publish.yml) workflow builds the wheel and uploads to PyPI.
178
+
179
+ Test install after publish:
180
+
181
+ ```bash
182
+ pip install cryoem-halfmap-qc
183
+ halfmap-qc --version
184
+ ```
185
+
186
+ Manual upload (without GitHub Actions):
187
+
188
+ ```bash
189
+ pip install build twine
190
+ python -m build
191
+ twine upload dist/*
192
+ ```
193
+
194
+ ## Scripts (thesis / optional)
195
+
196
+ Thesis figure runners (`scripts/rerun_all_figures.py`, `scripts/run_cohort_summary_figures.py`, Figma export scripts, etc.) and `cryoem_mrc/thesis_figures.py` are **local-only** (gitignored) like `figma-plugins/`. Clone the repo on a machine that already has those files, or keep a local copy from before they were untracked.
197
+
198
+ ---
199
+
200
+ ## Python API (high level)
201
+
202
+ ```python
203
+ import numpy as np
204
+ from cryoem_mrc import load_full_and_half_maps, run_pipeline, half_map_local_metrics
205
+ from cryoem_mrc.reliability import compute_reliability_maps, classify_build_zones
206
+
207
+ bundle = load_full_and_half_maps(
208
+ "full.mrc", "half1.mrc", "half2.mrc", dtype=np.float32, resample_if_needed=True
209
+ )
210
+ metrics = half_map_local_metrics(bundle.half1, bundle.half2, window=5)
211
+ # metrics["windowed_halfmap_correlation"], etc.
212
+
213
+ features = run_pipeline("map.mrc", use_float32=True)
214
+ reliability = compute_reliability_maps(
215
+ bundle.half1, bundle.half2,
216
+ density_normalized=features["density_normalized"],
217
+ window=5,
218
+ )
219
+ zones = classify_build_zones(reliability["reliability_score"])
220
+ ```
221
+
222
+ **Package modules:** `io`, `map_grid`, `local_stats`, `multiscale`, `half_map_repro`, `local_fsc`, `mechanics`, `reliability`, `analysis`, `structure_validation`. Path helpers: `cryoem_mrc/repo_paths.py`.
223
+
224
+ ---
225
+
226
+ ## Methods summary
227
+
228
+ - **Windowed half-map correlation** is the fast internal reproducibility target for feature validation; **local FSC resolution (Å)** is the field-standard reference.
229
+ - **Local FSC** is computed in-repo (`cryoem_mrc.local_fsc`); external BlocRes / ResMap / MonoRes maps are not loaded.
230
+ - **H_repro** is the windowed gradient-constraint map *V* (legacy export name; ranked as **reliability_score**); **reliability_score** is an in-mask percentile used for build/caution/omit terciles. Resolvability gating uses windowed half-map CC or local FSC, not a separate disagreement map.
231
+ - **Local variance** is often the strongest single feature predictor of windowed half-map correlation.
232
+
233
+ **Thesis prose:** full narrative draft in [docs/THESIS_NARRATIVE.md](docs/THESIS_NARRATIVE.md). Writing guide and defense notes in [docs/THESIS_AND_PUBLICATION.md](docs/THESIS_AND_PUBLICATION.md).
234
+
235
+ ---
236
+
237
+ ## Tests
238
+
239
+ ```bash
240
+ python -m unittest discover -s tests -v
241
+ ```
242
+
243
+ ---
244
+
245
+ ## Citation
246
+
247
+ **Before the manuscript is published**, cite the software with the Zenodo concept DOI (resolves to the latest release; pin `v0.3.2` or a commit hash for exact reproducibility):
248
+
249
+ ```bibtex
250
+ @software{mohanty2026cryoem_halfmap_qc,
251
+ author = {Mohanty, Sarthak},
252
+ title = {cryoem-halfmap-qc: local map reliability from cryo-EM density and half-maps},
253
+ year = {2026},
254
+ doi = {10.5281/zenodo.20618526},
255
+ url = {https://doi.org/10.5281/zenodo.20618526},
256
+ version = {0.3.2}
257
+ }
258
+ ```
259
+
260
+ GitHub also reads [CITATION.cff](CITATION.cff) for the **Cite this repository** button.
261
+
262
+ **After publication**, cite the paper as the primary reference. Also cite this Zenodo archive when you need the exact pipeline version used in the work.
263
+
264
+ When the manuscript exists, add a `preferred-citation` block to `CITATION.cff` (template included there) and drop the BibTeX for the article into this section.
265
+
266
+ ## License
267
+
268
+ MIT License. See [LICENSE](LICENSE).
@@ -0,0 +1,237 @@
1
+ # cryoem-halfmap-qc
2
+
3
+ [![DOI](https://zenodo.org/badge/1262218538.svg)](https://doi.org/10.5281/zenodo.20618526)
4
+
5
+ Python tools for **local map reliability** in cryo-EM reconstructions: density statistics, half-map reproducibility, windowed local FSC (Å), a reproducibility score (H_repro), and build/caution/omit zones.
6
+
7
+ The goal is to test whether inexpensive map features track **half-map cross-correlation** and **local FSC** well enough to guide modeling. This is **not** a claim that density alone defines molecular flexibility.
8
+
9
+ All volumes use NumPy 3D arrays in `(Z, Y, X)` order (section, row, column), consistent with typical `mrcfile` layouts.
10
+
11
+ ---
12
+
13
+ ## Install
14
+
15
+ **PyPI:** not published yet (`pip install cryoem-halfmap-qc` will fail until the first release is uploaded). See [Publishing to PyPI](#publishing-to-pypi) below.
16
+
17
+ Until then, install from GitHub or a local checkout:
18
+
19
+ ```bash
20
+ git clone https://github.com/sarthaktexas/cryoem-halfmap-qc.git
21
+ cd cryoem-halfmap-qc
22
+ pip install -e .
23
+
24
+ # or without cloning:
25
+ pip install "git+https://github.com/sarthaktexas/cryoem-halfmap-qc.git@v0.3.2"
26
+ ```
27
+
28
+ This installs the **`halfmap-qc`** command on your PATH (PyPI package name will be `cryoem-halfmap-qc` once published).
29
+
30
+ **Help & interactive mode:**
31
+
32
+ ```bash
33
+ halfmap-qc # interactive menu (when run in a terminal)
34
+ halfmap-qc help # full command reference
35
+ halfmap-qc --help # argparse summary + examples
36
+ halfmap-qc cohort --help # flags for one subcommand
37
+ halfmap-qc interactive # menu explicitly
38
+ ```
39
+
40
+ **Dependencies:** NumPy, SciPy, mrcfile, Matplotlib, gemmi, pandas.
41
+
42
+ ---
43
+
44
+ ## Data layout
45
+
46
+ Cryo-EM maps are **not** stored in this repository (too large for git). After cloning, create local directories:
47
+
48
+ ```text
49
+ data/emd_<ID>-<label>/ # deposited map + half-maps (.map or .mrc)
50
+ outputs/emd_<ID>/ # pipeline products (created by scripts)
51
+ cohort/manifest.csv # EMDB IDs, relative paths, contours, validation labels
52
+ ```
53
+
54
+ Download deposited and half maps from [EMDB](https://www.ebi.ac.uk/emdb/). Use the depositor-recommended contour for each entry (listed in `cohort/manifest.csv`). See [docs/COHORT.md](docs/COHORT.md) for download status and pipeline progress.
55
+
56
+ ---
57
+
58
+ ## Quick start
59
+
60
+ Run from the project root (where `data/` and `cohort/manifest.csv` live).
61
+
62
+ **Single-map features:**
63
+
64
+ ```bash
65
+ halfmap-qc features path/to/map.mrc --out map_features.npz --float32
66
+ # shorthand (legacy): halfmap-qc path/to/map.mrc --out map_features.npz --float32
67
+ ```
68
+
69
+ **Typical workflow** (features on avg-of-halves; reliability MRCs on deposited primary grid):
70
+
71
+ ```bash
72
+ EMD=49450
73
+ CONTOUR=0.116
74
+ DATA=data/emd_${EMD}-mgtA_e2p+e1
75
+
76
+ halfmap-qc analyze \
77
+ --features "${DATA}/emd_${EMD}_avg_features_t0116.npz" \
78
+ --half1 "${DATA}/emd_${EMD}_half_map_1.map" \
79
+ --half2 "${DATA}/emd_${EMD}_half_map_2.map" \
80
+ --reference "${DATA}/emd_${EMD}.map" \
81
+ --contour "${CONTOUR}" \
82
+ --out-dir "outputs/emd_${EMD}/analysis"
83
+
84
+ halfmap-qc reliability --emd-id "${EMD}" --contour "${CONTOUR}" \
85
+ --features "${DATA}/emd_${EMD}_avg_features_t0116.npz" \
86
+ --halfmap-npz "outputs/emd_${EMD}/analysis/halfmap_metrics.npz"
87
+ ```
88
+
89
+ **Cohort batch** (all active manifest entries with local data):
90
+
91
+ ```bash
92
+ halfmap-qc cohort --pending
93
+ ```
94
+
95
+ **ARC / SLURM** (one map per array task; save a local `*.sbatch` — not in git):
96
+
97
+ ```bash
98
+ # After pip install -e . and rsync data/ + cohort/manifest.csv to $SCRATCH/thesis
99
+ N=$(($(halfmap-qc cohort-ids | wc -l) - 1))
100
+ sbatch --account=wrz135 --array=0-${N} --cpus-per-task=4 --mem=32G --time=00:45:00 \
101
+ --wrap='halfmap-qc cohort --emd-id $(halfmap-qc cohort-ids | sed -n "$((SLURM_ARRAY_TASK_ID+1))p")'
102
+ ```
103
+
104
+ Or save a multi-line script as e.g. `~/halfmap-qc_array.sbatch` (gitignored) and `sbatch --array=0-${N} ~/halfmap-qc_array.sbatch`.
105
+
106
+ ---
107
+
108
+ ## CLI (`halfmap-qc`)
109
+
110
+ | Command | Purpose |
111
+ | --- | --- |
112
+ | *(no args, TTY)* | Interactive menu |
113
+ | `halfmap-qc help` | Full reference + install notes |
114
+ | `halfmap-qc features` | Local density / multiscale features → `.npz` |
115
+ | `halfmap-qc analyze` | Windowed half-map CC + feature correlations |
116
+ | `halfmap-qc reliability` | Reliability score, build zones, MRC export |
117
+ | `halfmap-qc cohort` | Batch pipeline from `cohort/manifest.csv` |
118
+ | `halfmap-qc cohort-ids` | Print EMDB IDs (for SLURM array jobs) |
119
+ | `halfmap-qc interactive` | Interactive menu (same as bare `halfmap-qc`) |
120
+
121
+ Legacy: `python -m cryoem_mrc` still works (same as `halfmap-qc features`).
122
+
123
+ ## Publishing to PyPI
124
+
125
+ One-time setup:
126
+
127
+ 1. Create an account at [pypi.org](https://pypi.org/account/register/) (and optionally [test.pypi.org](https://test.pypi.org/) for a dry run).
128
+ 2. On PyPI → **Your projects** → **Add new project** → name it `cryoem-halfmap-qc` (or claim it when uploading).
129
+ 3. On PyPI → **Account settings** → **Publishing** → **Add a new pending publisher**:
130
+ - PyPI project: `cryoem-halfmap-qc`
131
+ - Owner: `sarthaktexas` (your GitHub user/org)
132
+ - Repository: `cryoem-halfmap-qc`
133
+ - Workflow: `publish.yml`
134
+ - Environment: (leave blank unless you use one)
135
+
136
+ Release:
137
+
138
+ ```bash
139
+ # bump version in pyproject.toml first, then:
140
+ git add pyproject.toml cryoem_mrc/__init__.py
141
+ git commit -m "Release v0.3.2"
142
+ git tag v0.3.2
143
+ git push origin main --tags
144
+ ```
145
+
146
+ On GitHub → **Releases** → **Draft a new release** → choose tag `v0.3.2` → **Publish release**. The [`.github/workflows/publish.yml`](.github/workflows/publish.yml) workflow builds the wheel and uploads to PyPI.
147
+
148
+ Test install after publish:
149
+
150
+ ```bash
151
+ pip install cryoem-halfmap-qc
152
+ halfmap-qc --version
153
+ ```
154
+
155
+ Manual upload (without GitHub Actions):
156
+
157
+ ```bash
158
+ pip install build twine
159
+ python -m build
160
+ twine upload dist/*
161
+ ```
162
+
163
+ ## Scripts (thesis / optional)
164
+
165
+ Thesis figure runners (`scripts/rerun_all_figures.py`, `scripts/run_cohort_summary_figures.py`, Figma export scripts, etc.) and `cryoem_mrc/thesis_figures.py` are **local-only** (gitignored) like `figma-plugins/`. Clone the repo on a machine that already has those files, or keep a local copy from before they were untracked.
166
+
167
+ ---
168
+
169
+ ## Python API (high level)
170
+
171
+ ```python
172
+ import numpy as np
173
+ from cryoem_mrc import load_full_and_half_maps, run_pipeline, half_map_local_metrics
174
+ from cryoem_mrc.reliability import compute_reliability_maps, classify_build_zones
175
+
176
+ bundle = load_full_and_half_maps(
177
+ "full.mrc", "half1.mrc", "half2.mrc", dtype=np.float32, resample_if_needed=True
178
+ )
179
+ metrics = half_map_local_metrics(bundle.half1, bundle.half2, window=5)
180
+ # metrics["windowed_halfmap_correlation"], etc.
181
+
182
+ features = run_pipeline("map.mrc", use_float32=True)
183
+ reliability = compute_reliability_maps(
184
+ bundle.half1, bundle.half2,
185
+ density_normalized=features["density_normalized"],
186
+ window=5,
187
+ )
188
+ zones = classify_build_zones(reliability["reliability_score"])
189
+ ```
190
+
191
+ **Package modules:** `io`, `map_grid`, `local_stats`, `multiscale`, `half_map_repro`, `local_fsc`, `mechanics`, `reliability`, `analysis`, `structure_validation`. Path helpers: `cryoem_mrc/repo_paths.py`.
192
+
193
+ ---
194
+
195
+ ## Methods summary
196
+
197
+ - **Windowed half-map correlation** is the fast internal reproducibility target for feature validation; **local FSC resolution (Å)** is the field-standard reference.
198
+ - **Local FSC** is computed in-repo (`cryoem_mrc.local_fsc`); external BlocRes / ResMap / MonoRes maps are not loaded.
199
+ - **H_repro** is the windowed gradient-constraint map *V* (legacy export name; ranked as **reliability_score**); **reliability_score** is an in-mask percentile used for build/caution/omit terciles. Resolvability gating uses windowed half-map CC or local FSC, not a separate disagreement map.
200
+ - **Local variance** is often the strongest single feature predictor of windowed half-map correlation.
201
+
202
+ **Thesis prose:** full narrative draft in [docs/THESIS_NARRATIVE.md](docs/THESIS_NARRATIVE.md). Writing guide and defense notes in [docs/THESIS_AND_PUBLICATION.md](docs/THESIS_AND_PUBLICATION.md).
203
+
204
+ ---
205
+
206
+ ## Tests
207
+
208
+ ```bash
209
+ python -m unittest discover -s tests -v
210
+ ```
211
+
212
+ ---
213
+
214
+ ## Citation
215
+
216
+ **Before the manuscript is published**, cite the software with the Zenodo concept DOI (resolves to the latest release; pin `v0.3.2` or a commit hash for exact reproducibility):
217
+
218
+ ```bibtex
219
+ @software{mohanty2026cryoem_halfmap_qc,
220
+ author = {Mohanty, Sarthak},
221
+ title = {cryoem-halfmap-qc: local map reliability from cryo-EM density and half-maps},
222
+ year = {2026},
223
+ doi = {10.5281/zenodo.20618526},
224
+ url = {https://doi.org/10.5281/zenodo.20618526},
225
+ version = {0.3.2}
226
+ }
227
+ ```
228
+
229
+ GitHub also reads [CITATION.cff](CITATION.cff) for the **Cite this repository** button.
230
+
231
+ **After publication**, cite the paper as the primary reference. Also cite this Zenodo archive when you need the exact pipeline version used in the work.
232
+
233
+ When the manuscript exists, add a `preferred-citation` block to `CITATION.cff` (template included there) and drop the BibTeX for the article into this section.
234
+
235
+ ## License
236
+
237
+ MIT License. See [LICENSE](LICENSE).