pycmplot 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {pycmplot-0.2.0 → pycmplot-0.2.1}/LICENSE +1 -1
  2. {pycmplot-0.2.0/pycmplot.egg-info → pycmplot-0.2.1}/PKG-INFO +6 -3
  3. {pycmplot-0.2.0 → pycmplot-0.2.1}/README.md +5 -2
  4. {pycmplot-0.2.0 → pycmplot-0.2.1}/docs/conf.py +1 -1
  5. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/__init__.py +1 -1
  6. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/_core.py +42 -23
  7. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/annotation.py +48 -45
  8. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/cli.py +38 -16
  9. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/constants.py +2 -2
  10. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/io.py +115 -51
  11. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/liftover.py +8 -8
  12. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/plotting/circular.py +49 -40
  13. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/plotting/linear.py +247 -46
  14. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/resources.py +6 -6
  15. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/stats.py +6 -6
  16. {pycmplot-0.2.0 → pycmplot-0.2.1/pycmplot.egg-info}/PKG-INFO +6 -3
  17. {pycmplot-0.2.0 → pycmplot-0.2.1}/pyproject.toml +1 -1
  18. {pycmplot-0.2.0 → pycmplot-0.2.1}/setup.cfg +1 -1
  19. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz +0 -0
  20. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz +0 -0
  21. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot/data/hg19ToHg38.over.chain +0 -0
  22. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot.egg-info/SOURCES.txt +0 -0
  23. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot.egg-info/dependency_links.txt +0 -0
  24. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot.egg-info/entry_points.txt +0 -0
  25. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot.egg-info/requires.txt +0 -0
  26. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot.egg-info/top_level.txt +0 -0
  27. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot_docs/docs/conf.py +0 -0
  28. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot_docs/docstrings_annotation.py +0 -0
  29. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot_docs/docstrings_core_cli.py +0 -0
  30. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot_docs/docstrings_io.py +0 -0
  31. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot_docs/docstrings_liftover.py +0 -0
  32. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot_docs/docstrings_plotting.py +0 -0
  33. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot_docs/docstrings_resources_constants.py +0 -0
  34. {pycmplot-0.2.0 → pycmplot-0.2.1}/pycmplot_docs/docstrings_stats.py +0 -0
  35. {pycmplot-0.2.0 → pycmplot-0.2.1}/setup.py +0 -0
@@ -1,4 +1,4 @@
1
- CC BY-NC-SA 4.0 License
1
+ CC-BY-NC-SA-4.0 License
2
2
 
3
3
  Copyright (c) 2026 Kevin Esoh
4
4
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycmplot
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
5
5
  Author: Kevin Esoh
6
6
  Author-email: Kevin Esoh <kesohku1@jh.edu>
@@ -85,6 +85,8 @@ certain threshold, e.g. `0.01 (1e-2)` or `0.001 (1e-3)`.
85
85
  A potential useful application is **comparative visualization** of results from multiple imputation panels,
86
86
  multiple populations, or multiple traits to observe shared genetic architecture.
87
87
 
88
+ Read more in the package documentation page: https://pycmplot.readthedocs.io/en/latest/
89
+
88
90
  ---
89
91
 
90
92
  ## Installation
@@ -178,7 +180,8 @@ pycmplot \
178
180
  |------|-------------|---------|
179
181
  | `-s, --sum_stats` | Comma-separated sumstats files | **required** |
180
182
  | `-l, --labels` | Comma-separated track labels | **required** |
181
- | `-b, --build_column` | Genome build column name (containing hg18/hg19/hg38) | **required** |
183
+ | `-b, --build` | Comma-separated genome builds of sumstats | off |
184
+ | `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
182
185
  | `-m, --mode` | `lm` linear or `cm` circular | `lm` |
183
186
  | `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
184
187
  | `--logp` | Plot -log10(p) | off |
@@ -186,7 +189,7 @@ pycmplot \
186
189
  | `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
187
190
  | `-sug, --suggest_threshold` | Threshold for suggestive signals | off |
188
191
  | `-hl, --highlight` | Highlight significant loci | off |
189
- | `-a, --annotate` | Annotate with `SNP` or `GENE` | `SNP` |
192
+ | `-a, --annotate` | Annotate with `snp`, `gene`, or any column in `hits_table` | `snp` |
190
193
  | `-tp, --trim_pval` | Trim variants above this p-value for speed | off |
191
194
  | `-st, --sort_track` | Sort tracks by `label` or `chrom_len` | input order |
192
195
  | `-od, --output_dir` | Output directory | `.` |
@@ -59,6 +59,8 @@ certain threshold, e.g. `0.01 (1e-2)` or `0.001 (1e-3)`.
59
59
  A potential useful application is **comparative visualization** of results from multiple imputation panels,
60
60
  multiple populations, or multiple traits to observe shared genetic architecture.
61
61
 
62
+ Read more in the package documentation page: https://pycmplot.readthedocs.io/en/latest/
63
+
62
64
  ---
63
65
 
64
66
  ## Installation
@@ -152,7 +154,8 @@ pycmplot \
152
154
  |------|-------------|---------|
153
155
  | `-s, --sum_stats` | Comma-separated sumstats files | **required** |
154
156
  | `-l, --labels` | Comma-separated track labels | **required** |
155
- | `-b, --build_column` | Genome build column name (containing hg18/hg19/hg38) | **required** |
157
+ | `-b, --build` | Comma-separated genome builds of sumstats | off |
158
+ | `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
156
159
  | `-m, --mode` | `lm` linear or `cm` circular | `lm` |
157
160
  | `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
158
161
  | `--logp` | Plot -log10(p) | off |
@@ -160,7 +163,7 @@ pycmplot \
160
163
  | `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
161
164
  | `-sug, --suggest_threshold` | Threshold for suggestive signals | off |
162
165
  | `-hl, --highlight` | Highlight significant loci | off |
163
- | `-a, --annotate` | Annotate with `SNP` or `GENE` | `SNP` |
166
+ | `-a, --annotate` | Annotate with `snp`, `gene`, or any column in `hits_table` | `snp` |
164
167
  | `-tp, --trim_pval` | Trim variants above this p-value for speed | off |
165
168
  | `-st, --sort_track` | Sort tracks by `label` or `chrom_len` | input order |
166
169
  | `-od, --output_dir` | Output directory | `.` |
@@ -12,7 +12,7 @@ sys.path.insert(0, os.path.abspath(".."))
12
12
  project = "pycmplot"
13
13
  copyright = "2026, Kevin Esoh"
14
14
  author = "Kevin Esoh"
15
- release = "0.2.0" # update to match your PyPI version
15
+ release = "0.2.1" # update to match PyPI version
16
16
 
17
17
  # -- General configuration -----------------------------------------------------
18
18
  extensions = [
@@ -42,4 +42,4 @@ __all__ = [
42
42
  "ResourceConfig",
43
43
  ]
44
44
 
45
- __version__ = "0.1.9"
45
+ __version__ = "0.2.1"
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- CORE_MODULE = '''"""
3
+ CORE_MODULE = """
4
4
  pycmplot._core
5
5
  ==============
6
6
 
@@ -12,7 +12,7 @@ work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`, and
12
12
  All imports are deferred inside :func:`main` so that
13
13
  ``import pycmplot`` remains fast regardless of the size of the dependency
14
14
  tree.
15
- """'''
15
+ """
16
16
 
17
17
  import logging
18
18
  import warnings
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
26
26
 
27
27
 
28
28
  def main() -> None:
29
- MAIN = '''"""Orchestrate the full pycmplot pipeline from the command line.
29
+ MAIN = """Orchestrate the full pycmplot pipeline from the command line.
30
30
 
31
31
  This function is registered as the ``pycmplot`` console-script entry point
32
32
  in ``pyproject.toml`` / ``setup.cfg``. It performs the following steps in
@@ -75,7 +75,7 @@ def main() -> None:
75
75
  Linear Manhattan plotter called for ``--mode lm`` (default).
76
76
  pycmplot.plotting.circular.plot_circular :
77
77
  Circular Manhattan plotter called for ``--mode cm``.
78
- """'''
78
+ """
79
79
 
80
80
  # ------------------------------------------------------------------
81
81
  # Deferred imports so ``import pycmplot`` remains fast
@@ -105,7 +105,8 @@ def main() -> None:
105
105
  chrom_arg = args.chrom_column
106
106
  pos_arg = args.pos_column
107
107
  snp_arg = args.snp_column
108
- build_arg = args.build_column
108
+ build_arg = args.build
109
+ buildc_arg = args.build_column
109
110
  labels_raw = args.labels
110
111
  pcol_arg = args.pval_column
111
112
  logp = args.logp
@@ -123,13 +124,13 @@ def main() -> None:
123
124
  point_size = args.point_size
124
125
  highlight = args.highlight
125
126
  highlight_thresh = args.highlight_thresh
126
- highight_color = args.highight_color
127
+ highlight_color = args.highlight_color
127
128
  highlight_line = args.highlight_line
128
- highight_line_color = args.highight_line_color
129
+ highlight_line_color = args.highlight_line_color
129
130
  colors_raw = args.colors
130
- r_min = args.r_min
131
- r_max = args.r_max
132
- pad = args.pad
131
+ r_min = args.min_radius
132
+ r_max = args.max_radius
133
+ pad = args.circular_track_spacing
133
134
  output_format = args.output_format
134
135
  output_dir = args.output_dir
135
136
  dpi = args.dpi
@@ -142,18 +143,20 @@ def main() -> None:
142
143
 
143
144
 
144
145
  # ------------------------------------------------------------------
145
- # Sumstat, labels, colours, track heights str to list
146
+ # Sumstat, labels, colours, track heights [build] str to list
146
147
  # ------------------------------------------------------------------
147
148
  (
148
149
  sum_stats,
149
150
  labels,
150
151
  colors,
151
- t_heights
152
+ t_heights,
153
+ builds
152
154
  ) = strip_comma_separated_input_streams(
153
155
  sum_stats = sum_stats_raw,
154
156
  labels = labels_raw,
155
157
  colors_raw = colors_raw,
156
158
  track_heights = track_heights,
159
+ builds = build_arg if build_arg else None,
157
160
  )
158
161
 
159
162
  # ------------------------------------------------------------------
@@ -182,7 +185,8 @@ def main() -> None:
182
185
  pos = pos_arg,
183
186
  snp = snp_arg,
184
187
  pcol = pcol_arg,
185
- build = build_arg
188
+ buildc = buildc_arg,
189
+ build = builds
186
190
  )
187
191
 
188
192
  # ------------------------------------------------------------------
@@ -212,6 +216,19 @@ def main() -> None:
212
216
  resources=resources,
213
217
  )
214
218
 
219
+ # ------------------------------------------------------------------
220
+ # ANNOTATE BY
221
+ # ------------------------------------------------------------------
222
+ if annotate:
223
+ if str(annotate).upper() == "GENE":
224
+ label_col = 'top_gene'
225
+ elif str(annotate).upper() == "SNP":
226
+ label_col = 'SNP'
227
+ else:
228
+ label_col = annotate
229
+
230
+ logger.info(f"Anotate by: {label_col}")
231
+
215
232
  # ------------------------------------------------------------------
216
233
  # CIRCULAR MANHATTAN
217
234
  # ------------------------------------------------------------------
@@ -224,15 +241,16 @@ def main() -> None:
224
241
  signif_lines = signif_lines,
225
242
  highlight = highlight,
226
243
  highlight_thresh = highlight_thresh,
227
- highight_color = highight_color,
244
+ highlight_color = highlight_color,
228
245
  highlight_line = highlight_line,
229
- highight_line_color = highight_line_color,
246
+ highlight_line_color = highlight_line_color,
230
247
  colors = colors,
231
248
  chrom_label_side = chrom_label_side,
232
249
  chrom_label_size = chrom_label_size,
233
250
  track_label_size = track_label_size,
234
251
  track_label_orientation = track_label_orientation,
235
252
  annotate = annotate,
253
+ label_col = label_col if annotate else None,
236
254
  annotation_size = annotation_size,
237
255
  hits_table = hits_table,
238
256
  sector_sizes = merged_assoc_sector_sizes,
@@ -253,24 +271,25 @@ def main() -> None:
253
271
  else:
254
272
  logger.info("Generating LINEAR MANHATTAN Plot ...")
255
273
  plot_linear(
256
- sumstats_loaded = sumstats_loaded,
257
- track_heights = t_heights,
274
+ sumstats_loaded=sumstats_loaded,
275
+ track_heights=t_heights,
258
276
  trim_pval=trim_pval,
259
277
  logp=True if logp else False,
260
278
  point_size=point_size,
261
279
  highlight=highlight,
262
280
  highlight_thresh=highlight_thresh,
263
- highight_color = highight_color,
264
- highlight_line = highlight_line,
265
- highight_line_color = highight_line_color,
266
- annot_df=hits_table if not hits_table.empty else None,
267
- label_col="top_gene",
281
+ highlight_color=highlight_color,
282
+ highlight_line=highlight_line,
283
+ highlight_line_color=highlight_line_color,
284
+ annotate=annotate,
285
+ hits_table=hits_table if not hits_table.empty else None,
286
+ label_col=label_col if annotate else None,
268
287
  chr_spacing=chr_spacing,
269
288
  linear_track_spacing=linear_track_spacing,
270
289
  colors=colors,
271
290
  signif_lines=signif_lines,
272
291
  plot_title=plot_title,
273
- no_track_labels = no_track_labels,
292
+ no_track_labels=no_track_labels,
274
293
  dpi=dpi,
275
294
  output_format=output_format,
276
295
  output_dir=output_dir,
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- MODULE_DOCSTRING = '''"""
3
+ MODULE_DOCSTRING = """
4
4
  pycmplot.annotation
5
5
  ====================
6
6
 
@@ -20,7 +20,7 @@ Annotation relies on a bundled Ensembl gene-info TSV (hg38 or hg19). The
20
20
  file is resolved through :class:`~pycmplot.resources.ResourceConfig`; custom
21
21
  paths can be supplied via the ``PYCMPLOT_GENEINFO_HG38`` /
22
22
  ``PYCMPLOT_GENEINFO_HG19`` environment variables.
23
- """'''
23
+ """
24
24
 
25
25
  import bisect
26
26
  import logging
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
41
41
  # ---------------------------------------------------------------------------
42
42
 
43
43
  def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
44
- BUILD_GENES_DICT = '''"""Build a chromosome-keyed interval dictionary with sorted start positions.
44
+ BUILD_GENES_DICT = """Build a chromosome-keyed interval dictionary with sorted start positions.
45
45
 
46
46
  Pre-processes the gene reference DataFrame into a structure that supports
47
47
  efficient O(log N) binary-search lookup of genes near a query position.
@@ -67,7 +67,7 @@ def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
67
67
  -----
68
68
  This function is called once per :func:`get_hits_summary_table` invocation;
69
69
  the result is passed to :func:`_annotate_variant` for each lead SNP.
70
- """'''
70
+ """
71
71
 
72
72
  genes_df = genes_df.sort_values(["CHR", "START"])
73
73
  genes_dict: dict = {}
@@ -98,7 +98,7 @@ def _annotate_variant(
98
98
  window: int = 500_000,
99
99
  promoter_window: int = 2_000,
100
100
  ) -> dict:
101
- ANNOTATE_VARIANT = '''"""Return strand-aware nearest-gene annotation for a single variant.
101
+ ANNOTATE_VARIANT = """Return strand-aware nearest-gene annotation for a single variant.
102
102
 
103
103
  Searches the pre-built *genes_dict* within *window* bp of *pos* on
104
104
  *chrom*. Reports the nearest upstream and downstream genes (relative to
@@ -138,7 +138,7 @@ def _annotate_variant(
138
138
  within *promoter_window* bp upstream of any TSS.
139
139
  * ``gene_density`` (int) – number of genes with any overlap in the
140
140
  search window.
141
- """'''
141
+ """
142
142
 
143
143
  _empty = {
144
144
  "genic": False,
@@ -238,7 +238,7 @@ def _annotate_and_prioritize_variant(
238
238
  promoter_window: int = 2_000,
239
239
  biotype_weights: Optional[dict] = None,
240
240
  ) -> Optional[dict]:
241
- ANNOTATE_PRIORITIZE = '''"""Score and rank candidate genes for a single variant using a composite
241
+ ANNOTATE_PRIORITIZE = """Score and rank candidate genes for a single variant using a composite
242
242
  priority metric.
243
243
 
244
244
  Builds a candidate gene set within *window* bp of *pos* on *chrom*, then
@@ -287,7 +287,7 @@ def _annotate_and_prioritize_variant(
287
287
  For intergenic variants, ``top_gene`` contains the two nearest flanking
288
288
  gene symbols joined by ``'-'`` (e.g. ``'HBB-HBD'``) and ``biotype``
289
289
  is set to ``'intergenic'``.
290
- """'''
290
+ """
291
291
 
292
292
  if biotype_weights is None:
293
293
  biotype_weights = BIOTYPE_WEIGHTS
@@ -386,7 +386,7 @@ def _annotate_and_prioritize_variant(
386
386
  # ---------------------------------------------------------------------------
387
387
 
388
388
  def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
389
- CLUMP_BY_DISTANCE = '''"""Reduce a lead-SNP table to one representative SNP per locus.
389
+ CLUMP_BY_DISTANCE = """Reduce a lead-SNP table to one representative SNP per locus.
390
390
 
391
391
  Applies greedy distance-based clumping within each chromosome group,
392
392
  starting from the most significant SNP (lowest ``P`` or highest ``logP``).
@@ -406,7 +406,7 @@ def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
406
406
  pandas.DataFrame
407
407
  Deduplicated locus representatives sorted by chromosome and position
408
408
  (natural sort order).
409
- """'''
409
+ """
410
410
 
411
411
  window = window_kb * 1000
412
412
  clumped: list[pd.Series] = []
@@ -438,7 +438,7 @@ def get_hits_summary_table(
438
438
  table_out: Optional[str] = None,
439
439
  resources: Optional[ResourceConfig] = None,
440
440
  ) -> pd.DataFrame:
441
- GET_HITS_SUMMARY_TABLE = '''"""Annotate lead SNPs with nearest genes and write the locus summary table.
441
+ GET_HITS_SUMMARY_TABLE = """Annotate lead SNPs with nearest genes and write the locus summary table.
442
442
 
443
443
  For each lead SNP in *leads_df*, runs two complementary annotation passes:
444
444
 
@@ -528,51 +528,54 @@ def get_hits_summary_table(
528
528
  SNP CHR POS top_gene biotype
529
529
  0 rs123456 2 60718043 BCL11A protein_coding
530
530
  1 rs789012 11 5246696 HBB protein_coding
531
- """'''
531
+ """
532
532
 
533
533
  if resources is None:
534
534
  resources = default_resources
535
535
 
536
536
  # Choose gene info file based on build
537
- if "OLD_POS" not in leads_df.columns and list(set(leads_df["BUILD"])) == ["hg19"]:
538
- geneinfo_path = resources.require("geneinfo_hg19")
539
- else:
540
- geneinfo_path = resources.require("geneinfo_hg38")
537
+ if 'BUILD' in leads_df.columns:
538
+ if "OLD_POS" not in leads_df.columns and list(set(leads_df["BUILD"])) == ["hg19"]:
539
+ geneinfo_path = resources.require("geneinfo_hg19")
540
+ else:
541
+ geneinfo_path = resources.require("geneinfo_hg38")
541
542
 
542
- logger.info("Loading gene info from: %s", geneinfo_path)
543
- geneinfo = pd.read_csv(geneinfo_path, header=0, sep="\t")
544
- genes_dict = _build_genes_dict(geneinfo)
543
+ logger.info("Loading gene info from: %s", geneinfo_path)
544
+ geneinfo = pd.read_csv(geneinfo_path, header=0, sep="\t")
545
+ genes_dict = _build_genes_dict(geneinfo)
545
546
 
546
- window = window_kb * 1_000
547
- records: list[dict] = []
547
+ window = window_kb * 1_000
548
+ records: list[dict] = []
548
549
 
549
550
 
550
- logger.info("Annotating lead variants and generating hits summary table ...")
551
- for _, row in leads_df.iterrows():
552
- annotation = _annotate_variant(
553
- chrom=row["CHR"],
554
- pos=row["POS"],
555
- genes_dict=genes_dict,
556
- window=window,
557
- )
558
- prioritized = _annotate_and_prioritize_variant(
559
- chrom=row["CHR"],
560
- pos=row["POS"],
561
- genes_df=geneinfo,
562
- lead_snps_df=leads_df,
563
- window=window,
564
- )
551
+ logger.info("Annotating lead variants and generating hits summary table ...")
552
+ for _, row in leads_df.iterrows():
553
+ annotation = _annotate_variant(
554
+ chrom=row["CHR"],
555
+ pos=row["POS"],
556
+ genes_dict=genes_dict,
557
+ window=window,
558
+ )
559
+ prioritized = _annotate_and_prioritize_variant(
560
+ chrom=row["CHR"],
561
+ pos=row["POS"],
562
+ genes_df=geneinfo,
563
+ lead_snps_df=leads_df,
564
+ window=window,
565
+ )
565
566
 
566
- record = {
567
- **(row.to_dict()),
568
- **(annotation if annotation is not None else {}),
569
- **(prioritized if prioritized is not None else {}),
570
- }
571
- records.append(record)
567
+ record = {
568
+ **(row.to_dict()),
569
+ **(annotation if annotation is not None else {}),
570
+ **(prioritized if prioritized is not None else {}),
571
+ }
572
+ records.append(record)
572
573
 
573
- locus_table = pd.DataFrame(records).sort_values(
574
- ["CHR", "POS"], key=natsort.natsort_keygen()
575
- )
574
+ locus_table = pd.DataFrame(records).sort_values(
575
+ ["CHR", "POS"], key=natsort.natsort_keygen()
576
+ )
577
+ else:
578
+ locus_table = leads_df
576
579
 
577
580
  if table_out is not None:
578
581
  locus_table.to_csv(table_out, index=False, sep="\t", na_rep="None")
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- CLI_MODULE = '''"""
3
+ CLI_MODULE = """
4
4
  pycmplot.cli
5
5
  ============
6
6
 
@@ -15,7 +15,7 @@ Arguments are organised into four groups:
15
15
  colours, and output format (apply to both plot modes).
16
16
  * **Circular Only** — arguments specific to ``--mode cm``.
17
17
  * **Linear Only** — arguments specific to ``--mode lm`` (default).
18
- """'''
18
+ """
19
19
 
20
20
  import argparse
21
21
  from pathlib import Path
@@ -30,7 +30,7 @@ DESCMSG = """
30
30
 
31
31
 
32
32
  def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
33
- GET_ARGUMENTS = '''"""Parse and return command-line arguments for the pycmplot entry point.
33
+ GET_ARGUMENTS = """Parse and return command-line arguments for the pycmplot entry point.
34
34
 
35
35
  Parameters
36
36
  ----------
@@ -146,8 +146,10 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
146
146
  - Description
147
147
  * - ``annotate``
148
148
  - str
149
- - Annotation content: ``'SNP'`` (rsID) or ``'GENE'`` (nearest
150
- gene symbol). Default ``'SNP'``.
149
+ - Annotation content: Annotate loci by column in hits table
150
+ ``'snp'`` (rsID), ``top_gene``, ``nearest_upstream_gene``, ``nearest_downstream_gene``, etc,
151
+ or ``'gene'`` (let the package decide one of ``top_gene`` or ``nearest_upstream_gene``).
152
+ Default ``'snp'``.
151
153
  * - ``annotation_size``
152
154
  - float
153
155
  - Font size for annotation labels. Default ``6``.
@@ -263,7 +265,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
263
265
  --------
264
266
  pycmplot._core.main :
265
267
  Consumes the :class:`~argparse.Namespace` returned by this function.
266
- """'''
268
+ """
267
269
 
268
270
  parser = argparse.ArgumentParser(
269
271
  prog="pycmplot",
@@ -293,10 +295,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
293
295
  ),
294
296
  required=True, type=str, metavar="str",
295
297
  )
296
- req.add_argument(
297
- "-b", "--build_column", required=True, type=str, metavar="str",
298
- help="Genome build column name (containing hg18/hg19/hg38)."
299
- )
298
+
300
299
 
301
300
  # ------------------------------------------------------------------
302
301
  # Optional
@@ -328,6 +327,23 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
328
327
  type=str, metavar="str",
329
328
  help="File delimiter (autodetected if omitted)."
330
329
  )
330
+ opt.add_argument(
331
+ "-bc", "--build_column", required=False, type=str, metavar="str",
332
+ help="Name of column containing genome build (hg18/hg19/hg38). Or use ``--build`` below to supply genome builds per summary stat file."
333
+ )
334
+ opt.add_argument(
335
+ "-b","--build",
336
+ help="""
337
+ Comma-sperated list of genome build of summary stats file(s) listed in the same order as sumstats files.
338
+ (e.g. hg19,hg38,hg38,hg19 means:
339
+ file1.txt.gz --> hg19
340
+ file2.txt.gz --> hg38
341
+ file3.tsv --> hg38 ... etc)
342
+ """,
343
+ required=False,
344
+ type=str,
345
+ metavar='str'
346
+ )
331
347
  opt.add_argument(
332
348
  "--logp", action="store_true",
333
349
  help="Plot −log₁₀(p) instead of raw p-values."
@@ -355,11 +371,17 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
355
371
  default=None, const=1e-5, nargs="?", type=float, metavar="float",
356
372
  help="Suggestive significance threshold (default: 1e-5)."
357
373
  )
374
+
375
+ # CLASS TO HANDLE ANNOTATION VALUES NOT IN CHOICE LIST
376
+ class AllowAll(list):
377
+ def __contains__(self, item):
378
+ return True
379
+
358
380
  opt.add_argument(
359
381
  "-a", "--annotate",
360
- choices=["SNP", "GENE"], nargs="?",
361
- default="SNP", const="SNP", type=str, #metavar="str",
362
- help="Annotate significant loci by SNP ID or nearest gene."
382
+ choices=AllowAll(["snp", "gene", "top_gene", "nearest_upstream_gene", "nearest_downstream_gene"]), nargs="?",
383
+ default=None, type=str, metavar="{snp,gene,top_gene,nearest_upstream_gene,nearest_downstream_gene,...}", const="SNP",
384
+ help="Annotate loci by column name in hits table (defaults to 'snp' if provided and no value set)."
363
385
  )
364
386
  opt.add_argument(
365
387
  "-p_size", "--point_size", default=6, type=float, metavar="float",
@@ -378,7 +400,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
378
400
  help="P-value threshold for highlighting (default: 5e-8)."
379
401
  )
380
402
  opt.add_argument(
381
- "-hc", "--highight_color", default="brown", type=str, metavar="str",
403
+ "-hc", "--highlight_color", default="brown", type=str, metavar="str",
382
404
  help="Color of highlighted positions (default: brown)."
383
405
  )
384
406
  opt.add_argument(
@@ -386,7 +408,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
386
408
  help="Draw vertical dashed lines through highlighted positions."
387
409
  )
388
410
  opt.add_argument(
389
- "-hlc", "--highight_line_color", default="grey", type=str, metavar="str",
411
+ "-hlc", "--highlight_line_color", default="grey", type=str, metavar="str",
390
412
  help="Color of highlight line (default: grey)."
391
413
  )
392
414
  opt.add_argument(
@@ -444,7 +466,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
444
466
  )
445
467
  cio.add_argument(
446
468
  "-cl_side", "--chrom_label_side", choices=["inside", "outside"],
447
- nargs="?", default="inside", const="inside", type=str,
469
+ nargs="?", default=None, const="inside", type=str,
448
470
  help="Chromosome label placement (default: inside)."
449
471
  )
450
472
  cio.add_argument(
@@ -1,4 +1,4 @@
1
- CONSTANTS_MODULE = '''"""
1
+ CONSTANTS_MODULE = """
2
2
  pycmplot.constants
3
3
  ==================
4
4
 
@@ -27,7 +27,7 @@ Notes
27
27
  ``hg38_chr_lengths`` reflects the GRCh38 primary assembly (GCA_000001405).
28
28
  Values may differ slightly from builds that include alternate contigs or
29
29
  patches.
30
- """'''
30
+ """
31
31
 
32
32
  # ---------------------------------------------------------------------------
33
33
  # hg38 chromosome lengths (GRCh38)