pycmplot 0.2.3__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {pycmplot-0.2.3/pycmplot.egg-info → pycmplot-0.2.5}/PKG-INFO +17 -23
  2. {pycmplot-0.2.3 → pycmplot-0.2.5}/README.md +11 -22
  3. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/__init__.py +7 -2
  4. pycmplot-0.2.5/pycmplot/__main__.py +6 -0
  5. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/_core.py +43 -38
  6. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/annotation.py +26 -38
  7. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/cli.py +143 -226
  8. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/constants.py +1 -1
  9. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/data/hg19ToHg38.over.chain +0 -0
  10. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/io.py +170 -123
  11. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/liftover.py +33 -8
  12. pycmplot-0.2.5/pycmplot/plotting/__init__.py +31 -0
  13. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/plotting/circular.py +19 -14
  14. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/plotting/linear.py +9 -9
  15. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/plotting/qq.py +50 -3
  16. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/resources.py +26 -31
  17. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/stats.py +6 -6
  18. {pycmplot-0.2.3 → pycmplot-0.2.5/pycmplot.egg-info}/PKG-INFO +17 -23
  19. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot.egg-info/SOURCES.txt +3 -10
  20. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot.egg-info/top_level.txt +1 -1
  21. {pycmplot-0.2.3 → pycmplot-0.2.5}/pyproject.toml +13 -1
  22. {pycmplot-0.2.3 → pycmplot-0.2.5}/setup.cfg +1 -1
  23. pycmplot-0.2.3/docs/conf.py +0 -91
  24. pycmplot-0.2.3/pycmplot_docs/docs/conf.py +0 -91
  25. pycmplot-0.2.3/pycmplot_docs/docstrings_annotation.py +0 -289
  26. pycmplot-0.2.3/pycmplot_docs/docstrings_core_cli.py +0 -347
  27. pycmplot-0.2.3/pycmplot_docs/docstrings_io.py +0 -468
  28. pycmplot-0.2.3/pycmplot_docs/docstrings_liftover.py +0 -156
  29. pycmplot-0.2.3/pycmplot_docs/docstrings_plotting.py +0 -587
  30. pycmplot-0.2.3/pycmplot_docs/docstrings_resources_constants.py +0 -170
  31. pycmplot-0.2.3/pycmplot_docs/docstrings_stats.py +0 -135
  32. {pycmplot-0.2.3 → pycmplot-0.2.5}/LICENSE +0 -0
  33. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz +0 -0
  34. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz +0 -0
  35. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot.egg-info/dependency_links.txt +0 -0
  36. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot.egg-info/entry_points.txt +0 -0
  37. {pycmplot-0.2.3 → pycmplot-0.2.5}/pycmplot.egg-info/requires.txt +0 -0
  38. {pycmplot-0.2.3 → pycmplot-0.2.5}/setup.py +0 -0
@@ -1,10 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycmplot
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
5
5
  Author: Kevin Esoh
6
6
  Author-email: Kevin Esoh <kesohku1@jh.edu>
7
7
  License-Expression: CC-BY-NC-SA-4.0
8
+ Project-URL: Homepage, https://github.com/esohkevin/pycmplot
9
+ Project-URL: Issues, https://github.com/esohkevin/pycmplot/issues
10
+ Project-URL: Docs, https://pycmplot.readthedocs.io/en/latest/
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Operating System :: OS Independent
8
13
  Requires-Python: >=3.9
9
14
  Description-Content-Type: text/markdown
10
15
  License-File: LICENSE
@@ -101,6 +106,9 @@ pip install pycmplot
101
106
  ```bash
102
107
  git clone https://github.com/esohkevin/pycmplot.git
103
108
 
109
+ # or with most recent updates from development branch
110
+ # git clone -b dev https://github.com/esohkevin/pycmplot.git
111
+
104
112
  cd pycmplot
105
113
 
106
114
  pip install -e .
@@ -207,26 +215,12 @@ A demonstration of how to use the python API is provided in this notebook: https
207
215
 
208
216
  ---
209
217
 
210
- ## Package structure
218
+ ## Contributing
219
+
220
+ See how to contribute here https://github.com/esohkevin/pycmplot?tab=contributing-ov-file
221
+
222
+
223
+ ## Contributors
224
+
225
+ - [Kevin Esoh](https://github.com/esohkevin)
211
226
 
212
- ```
213
- pycmplot/
214
- ├── pyproject.toml
215
- ├── setup.py
216
- ├── setup.cfg
217
- ├── README.md
218
- └── pycmplot/
219
- ├── __init__.py # public API exports
220
- ├── __main__.py # python -m pycmplot
221
- ├── _core.py # main() orchestration
222
- ├── cli.py # argparse definitions
223
- ├── constants.py # chromosome lengths, biotype weights
224
- ├── resources.py # external resource path config
225
- ├── io.py # sumstat loading, delimiter detection
226
- ├── stats.py # get_lead_snps, get_highlight_snps
227
- ├── liftover.py # lazy hg19→hg38 liftover
228
- ├── annotation.py # nearest-gene annotation, hits table
229
- └── plotting/
230
- ├── __init__.py
231
- ├── linear.py # plot_linear
232
- └── circular.py # plot_circular, compute_track_radii_dict
@@ -75,6 +75,9 @@ pip install pycmplot
75
75
  ```bash
76
76
  git clone https://github.com/esohkevin/pycmplot.git
77
77
 
78
+ # or with most recent updates from development branch
79
+ # git clone -b dev https://github.com/esohkevin/pycmplot.git
80
+
78
81
  cd pycmplot
79
82
 
80
83
  pip install -e .
@@ -181,26 +184,12 @@ A demonstration of how to use the python API is provided in this notebook: https
181
184
 
182
185
  ---
183
186
 
184
- ## Package structure
187
+ ## Contributing
188
+
189
+ See how to contribute here https://github.com/esohkevin/pycmplot?tab=contributing-ov-file
190
+
191
+
192
+ ## Contributors
193
+
194
+ - [Kevin Esoh](https://github.com/esohkevin)
185
195
 
186
- ```
187
- pycmplot/
188
- ├── pyproject.toml
189
- ├── setup.py
190
- ├── setup.cfg
191
- ├── README.md
192
- └── pycmplot/
193
- ├── __init__.py # public API exports
194
- ├── __main__.py # python -m pycmplot
195
- ├── _core.py # main() orchestration
196
- ├── cli.py # argparse definitions
197
- ├── constants.py # chromosome lengths, biotype weights
198
- ├── resources.py # external resource path config
199
- ├── io.py # sumstat loading, delimiter detection
200
- ├── stats.py # get_lead_snps, get_highlight_snps
201
- ├── liftover.py # lazy hg19→hg38 liftover
202
- ├── annotation.py # nearest-gene annotation, hits table
203
- └── plotting/
204
- ├── __init__.py
205
- ├── linear.py # plot_linear
206
- └── circular.py # plot_circular, compute_track_radii_dict
@@ -12,7 +12,7 @@ Command-line::
12
12
  Python API::
13
13
 
14
14
  from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
15
- from pycmplot.plotting import plot_linear, plot_circular
15
+ from pycmplot.plotting import plot_linear, plot_circular, plot_qq_single, plot_qq_separate, plot_qq_overlay, plot_qq_combined
16
16
  from pycmplot.stats import get_lead_snps
17
17
  from pycmplot.annotation import get_hits_summary_table
18
18
 
@@ -22,6 +22,7 @@ Public surface
22
22
 
23
23
  from pycmplot.plotting.linear import plot_linear
24
24
  from pycmplot.plotting.circular import plot_circular, compute_track_radii_dict
25
+ from pycmplot.plotting.qq import plot_qq_single, plot_qq_separate, plot_qq_overlay, plot_qq_combined
25
26
  from pycmplot.stats import get_lead_snps, get_highlight_snps
26
27
  from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
27
28
  from pycmplot.annotation import get_hits_summary_table
@@ -31,6 +32,10 @@ from pycmplot.resources import ResourceConfig
31
32
  __all__ = [
32
33
  "plot_linear",
33
34
  "plot_circular",
35
+ "plot_qq_single",
36
+ "plot_qq_separate",
37
+ "plot_qq_overlay",
38
+ "plot_qq_combined",
34
39
  "compute_track_radii_dict",
35
40
  "get_lead_snps",
36
41
  "get_highlight_snps",
@@ -42,4 +47,4 @@ __all__ = [
42
47
  "ResourceConfig",
43
48
  ]
44
49
 
45
- __version__ = "0.2.3"
50
+ __version__ = "0.2.5"
@@ -0,0 +1,6 @@
1
+ """Entry point for ``python -m pycmplot`` invocation."""
2
+
3
+ from pycmplot._core import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -1,62 +1,62 @@
1
- from __future__ import annotations
2
-
3
- CORE_MODULE = """
1
+ """
4
2
  pycmplot._core
5
3
  ==============
6
4
 
7
5
  Main entry point that orchestrates CLI argument parsing, data loading, and
8
6
  plot dispatch. This module is intentionally thin: it delegates all heavy
9
- work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`, and
10
- :mod:`pycmplot.plotting.circular`.
7
+ work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`,
8
+ :mod:`pycmplot.plotting.circular`, and :mod:`pycmplot.plotting.qq`.
11
9
 
12
10
  All imports are deferred inside :func:`main` so that
13
11
  ``import pycmplot`` remains fast regardless of the size of the dependency
14
12
  tree.
15
13
  """
16
14
 
15
+ from __future__ import annotations
16
+
17
17
  import logging
18
18
  import warnings
19
+ import sys
19
20
 
20
21
  # Suppress noisy font-manager warnings before any matplotlib import
21
22
  logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)
22
23
  warnings.filterwarnings("ignore")
23
24
 
24
- logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
25
+ logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s", stream=sys.stdout)
25
26
  logger = logging.getLogger(__name__)
26
27
 
27
28
 
28
29
  def main() -> None:
29
- MAIN = """Orchestrate the full pycmplot pipeline from the command line.
30
+ """Orchestrate the full pycmplot pipeline from the command line.
30
31
 
31
32
  This function is registered as the ``pycmplot`` console-script entry point
32
33
  in ``pyproject.toml`` / ``setup.cfg``. It performs the following steps in
33
34
  order:
34
35
 
35
36
  1. **Parse CLI arguments** via :func:`~pycmplot.cli.get_arguments`.
36
- 2. **Parse comma-separated inputs** (files, labels, colours, track heights)
37
- into Python lists via
38
- :func:`~pycmplot.io.strip_comma_separated_input_streams`.
37
+ 2. **Parse comma-separated inputs** (files, labels, colours, track heights,
38
+ builds) into Python lists via
39
+ :func:`~pycmplot.io.strip_comma_separated_input_streams`.
39
40
  3. **Construct output paths** (plot image and locus summary table TSV) via
40
- :func:`~pycmplot.io.get_output_paths`.
41
+ :func:`~pycmplot.io.get_output_paths`.
41
42
  4. **Resolve column names** for every input file via
42
- :func:`~pycmplot.io.prep_pycmplot_input_info`.
43
+ :func:`~pycmplot.io.prep_pycmplot_input_info`.
43
44
  5. **Load data** — reads summary statistics, normalises chromosome names,
44
- runs hg19 → hg38 liftover if needed, extracts lead SNPs, generates the
45
- hits summary table, and computes merged Circos sector sizes via
46
- :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`.
47
- 6. **Dispatch plotting** — calls
48
- :func:`~pycmplot.plotting.circular.plot_circular` when ``--mode cm``,
49
- or :func:`~pycmplot.plotting.linear.plot_linear` otherwise.
50
-
51
- Parameters
52
- ----------
53
- None
54
- All input is taken from ``sys.argv`` via :mod:`argparse`.
45
+ runs hg19 → hg38 liftover if needed, extracts lead SNPs, generates the
46
+ hits summary table, and computes merged Circos sector sizes via
47
+ :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`.
48
+ 6. **Dispatch Manhattan plot** — calls
49
+ :func:`~pycmplot.plotting.circular.plot_circular` when ``--mode cm``,
50
+ or :func:`~pycmplot.plotting.linear.plot_linear` otherwise.
51
+ 7. **Optional QQ plot** — when ``--qq_plot`` is set, dispatches to one of
52
+ :func:`~pycmplot.plotting.qq.plot_qq_combined` (default),
53
+ :func:`~pycmplot.plotting.qq.plot_qq_separate` (``--qq_separate``), or
54
+ :func:`~pycmplot.plotting.qq.plot_qq_overlay` (``--qq_overlay``).
55
55
 
56
56
  Returns
57
57
  -------
58
58
  None
59
- Saves the plot image and locus summary table to the directory
59
+ Saves the plot image(s) and locus summary table to the directory
60
60
  specified by ``--output_dir``.
61
61
 
62
62
  Raises
@@ -194,8 +194,8 @@ def main() -> None:
194
194
  pos = pos_arg,
195
195
  snp = snp_arg,
196
196
  pcol = pcol_arg,
197
- buildc = buildc_arg,
198
- build = builds
197
+ build_column = buildc_arg,
198
+ build_list = builds
199
199
  )
200
200
 
201
201
  # ------------------------------------------------------------------
@@ -206,38 +206,42 @@ def main() -> None:
206
206
  # ------------------------------------------------------------------
207
207
  # Load data, compute sectors, get hits table
208
208
  # ------------------------------------------------------------------
209
- (
210
- merged_assoc_sector_sizes,
211
- sumstats_loaded,
212
- hits_table,
213
- signif_lines,
214
- pval_dict,
215
- ) = get_sumstats_and_merged_sector_list(
209
+ pycmplot_dict = get_sumstats_and_merged_sector_list(
216
210
  sum_stats=sum_stats,
217
211
  labels=labels,
218
212
  trim_pval=trim_pval,
219
213
  logp=logp,
220
214
  file_info=sumstats_hdr_dic,
221
215
  sort_tracks=sort_track,
222
- table_out=table_out,
216
+ table_out=plt_base,
223
217
  signif_threshold=signif_threshold,
224
218
  signif_line=signif_line,
225
219
  suggest_threshold=suggest_threshold,
226
220
  resources=resources,
227
221
  )
228
222
 
223
+ merged_assoc_sector_sizes = pycmplot_dict["sectors"]
224
+ sumstats_loaded = pycmplot_dict["dfs"]
225
+ hits_table = pycmplot_dict["annot"]
226
+ signif_lines = pycmplot_dict["lines"]
227
+ pval_dict = pycmplot_dict["pvals"]
228
+
229
229
  # ------------------------------------------------------------------
230
230
  # ANNOTATE BY
231
231
  # ------------------------------------------------------------------
232
232
  label_col = 'SNP'
233
- if annotate:
233
+ if annotate and not hits_table.empty:
234
234
  if str(annotate).upper() == "GENE" and 'top_gene' in hits_table.columns:
235
235
  label_col = 'top_gene'
236
- elif label_col in hits_table.columns:
236
+ elif annotate in hits_table.columns:
237
237
  label_col = annotate
238
-
238
+ else:
239
+ logger.warning(
240
+ "Annotation column '%s' not found in hits table; "
241
+ "falling back to 'SNP'.", annotate,
242
+ )
239
243
 
240
- logger.info(f"Anotate by: {label_col}")
244
+ logger.info("Annotate by: %s", label_col)
241
245
 
242
246
  # ------------------------------------------------------------------
243
247
  # CIRCULAR MANHATTAN
@@ -316,6 +320,7 @@ def main() -> None:
316
320
  if qq_separate:
317
321
  plot_qq_separate(
318
322
  pval_dict=pval_dict,
323
+ base_name=plot_title,
319
324
  thin=qq_thin,
320
325
  thin_below=thin_below,
321
326
  max_points=qq_max_points,
@@ -1,6 +1,4 @@
1
- from __future__ import annotations
2
-
3
- MODULE_DOCSTRING = """
1
+ """
4
2
  pycmplot.annotation
5
3
  ====================
6
4
 
@@ -22,6 +20,8 @@ paths can be supplied via the ``PYCMPLOT_GENEINFO_HG38`` /
22
20
  ``PYCMPLOT_GENEINFO_HG19`` environment variables.
23
21
  """
24
22
 
23
+ from __future__ import annotations
24
+
25
25
  import bisect
26
26
  import logging
27
27
  from typing import Optional
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
41
41
  # ---------------------------------------------------------------------------
42
42
 
43
43
  def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
44
- BUILD_GENES_DICT = """Build a chromosome-keyed interval dictionary with sorted start positions.
44
+ """Build a chromosome-keyed interval dictionary with sorted start positions.
45
45
 
46
46
  Pre-processes the gene reference DataFrame into a structure that supports
47
47
  efficient O(log N) binary-search lookup of genes near a query position.
@@ -98,7 +98,7 @@ def _annotate_variant(
98
98
  window: int = 500_000,
99
99
  promoter_window: int = 2_000,
100
100
  ) -> dict:
101
- ANNOTATE_VARIANT = """Return strand-aware nearest-gene annotation for a single variant.
101
+ """Return strand-aware nearest-gene annotation for a single variant.
102
102
 
103
103
  Searches the pre-built *genes_dict* within *window* bp of *pos* on
104
104
  *chrom*. Reports the nearest upstream and downstream genes (relative to
@@ -238,8 +238,7 @@ def _annotate_and_prioritize_variant(
238
238
  promoter_window: int = 2_000,
239
239
  biotype_weights: Optional[dict] = None,
240
240
  ) -> Optional[dict]:
241
- ANNOTATE_PRIORITIZE = """Score and rank candidate genes for a single variant using a composite
242
- priority metric.
241
+ """Score and rank candidate genes for a single variant using a composite priority metric.
243
242
 
244
243
  Builds a candidate gene set within *window* bp of *pos* on *chrom*, then
245
244
  scores each candidate on four additive components:
@@ -386,7 +385,7 @@ def _annotate_and_prioritize_variant(
386
385
  # ---------------------------------------------------------------------------
387
386
 
388
387
  def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
389
- CLUMP_BY_DISTANCE = """Reduce a lead-SNP table to one representative SNP per locus.
388
+ """Reduce a lead-SNP table to one representative SNP per locus.
390
389
 
391
390
  Applies greedy distance-based clumping within each chromosome group,
392
391
  starting from the most significant SNP (lowest ``P`` or highest ``logP``).
@@ -438,7 +437,7 @@ def get_hits_summary_table(
438
437
  table_out: Optional[str] = None,
439
438
  resources: Optional[ResourceConfig] = None,
440
439
  ) -> pd.DataFrame:
441
- GET_HITS_SUMMARY_TABLE = """Annotate lead SNPs with nearest genes and write the locus summary table.
440
+ """Annotate lead SNPs with nearest genes and write the locus summary table.
442
441
 
443
442
  For each lead SNP in *leads_df*, runs two complementary annotation passes:
444
443
 
@@ -475,33 +474,21 @@ def get_hits_summary_table(
475
474
  Clumped locus summary table. Contains all columns from *leads_df*
476
475
  plus annotation fields from both passes, including:
477
476
 
478
- .. list-table::
479
- :widths: 30 70
480
- :header-rows: 1
481
-
482
- * - Column
483
- - Description
484
- * - ``genic``
485
- - ``True`` when the lead SNP overlaps a gene body
486
- * - ``nearest_upstream_gene``
487
- - Nearest upstream gene symbol (strand-aware)
488
- * - ``upstream_distance``
489
- - Distance to ``nearest_upstream_gene`` in bp
490
- * - ``nearest_downstream_gene``
491
- - Nearest downstream gene symbol (strand-aware)
492
- * - ``downstream_distance``
493
- - Distance to ``nearest_downstream_gene`` in bp
494
- * - ``promoter_upstream_flag``
495
- - ``True`` when the SNP is within 2 kb upstream of a TSS
496
- * - ``gene_density``
497
- - Number of genes within the search window
498
- * - ``top_gene``
499
- - Top-priority gene from the scoring pass
500
- * - ``biotype``
501
- - Ensembl biotype of ``top_gene`` (``'intergenic'`` when no
502
- genic overlap)
503
- * - ``priority_score``
504
- - Composite priority score (genic hits only)
477
+ - ``genic`` — ``True`` when the lead SNP overlaps a gene body.
478
+ - ``nearest_upstream_gene`` — nearest upstream gene symbol
479
+ (strand-aware).
480
+ - ``upstream_distance`` — distance to ``nearest_upstream_gene`` in bp.
481
+ - ``nearest_downstream_gene`` — nearest downstream gene symbol
482
+ (strand-aware).
483
+ - ``downstream_distance`` — distance to ``nearest_downstream_gene`` in
484
+ bp.
485
+ - ``promoter_upstream_flag`` — ``True`` when the SNP is within 2 kb
486
+ upstream of a TSS.
487
+ - ``gene_density`` — number of genes within the search window.
488
+ - ``top_gene`` top-priority gene from the scoring pass.
489
+ - ``biotype`` — Ensembl biotype of ``top_gene`` (``'intergenic'`` when
490
+ no genic overlap).
491
+ - ``priority_score`` — composite priority score (genic hits only).
505
492
 
506
493
  Notes
507
494
  -----
@@ -578,7 +565,8 @@ def get_hits_summary_table(
578
565
  locus_table = leads_df
579
566
 
580
567
  if table_out is not None:
581
- locus_table.to_csv(table_out, index=False, sep="\t", na_rep="None")
582
- logger.info("Locus summary written to: %s", table_out)
568
+ outpath = table_out.replace(" ", "_").lower() + '.tsv'
569
+ locus_table.to_csv(outpath, index=False, sep="\t", na_rep="None")
570
+ logger.info("Locus summary written to: %s", outpath)
583
571
 
584
572
  return _clump_by_distance(locus_table, window_kb=window_kb)