pycmplot 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycmplot/__init__.py CHANGED
@@ -42,4 +42,4 @@ __all__ = [
42
42
  "ResourceConfig",
43
43
  ]
44
44
 
45
- __version__ = "0.1.9"
45
+ __version__ = "0.2.1"
pycmplot/_core.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- CORE_MODULE = '''"""
3
+ CORE_MODULE = """
4
4
  pycmplot._core
5
5
  ==============
6
6
 
@@ -12,7 +12,7 @@ work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`, and
12
12
  All imports are deferred inside :func:`main` so that
13
13
  ``import pycmplot`` remains fast regardless of the size of the dependency
14
14
  tree.
15
- """'''
15
+ """
16
16
 
17
17
  import logging
18
18
  import warnings
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
26
26
 
27
27
 
28
28
  def main() -> None:
29
- MAIN = '''"""Orchestrate the full pycmplot pipeline from the command line.
29
+ MAIN = """Orchestrate the full pycmplot pipeline from the command line.
30
30
 
31
31
  This function is registered as the ``pycmplot`` console-script entry point
32
32
  in ``pyproject.toml`` / ``setup.cfg``. It performs the following steps in
@@ -75,7 +75,7 @@ def main() -> None:
75
75
  Linear Manhattan plotter called for ``--mode lm`` (default).
76
76
  pycmplot.plotting.circular.plot_circular :
77
77
  Circular Manhattan plotter called for ``--mode cm``.
78
- """'''
78
+ """
79
79
 
80
80
  # ------------------------------------------------------------------
81
81
  # Deferred imports so ``import pycmplot`` remains fast
@@ -92,6 +92,7 @@ def main() -> None:
92
92
  )
93
93
  from pycmplot.plotting.linear import plot_linear
94
94
  from pycmplot.plotting.circular import plot_circular
95
+ from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
95
96
  from pycmplot.resources import ResourceConfig
96
97
 
97
98
  # ------------------------------------------------------------------
@@ -105,10 +106,18 @@ def main() -> None:
105
106
  chrom_arg = args.chrom_column
106
107
  pos_arg = args.pos_column
107
108
  snp_arg = args.snp_column
108
- build_arg = args.build_column
109
+ build_arg = args.build
110
+ buildc_arg = args.build_column
109
111
  labels_raw = args.labels
110
112
  pcol_arg = args.pval_column
111
113
  logp = args.logp
114
+ qq = args.qq_plot
115
+ qq_separate = args.qq_separate
116
+ qq_ncols = args.qq_ncols
117
+ qq_thin = args.qq_thin
118
+ thin_below = args.thin_below
119
+ qq_max_points = args.qq_max_points
120
+ qq_overlay = args.qq_overlay
112
121
  chrom_label_size = args.chrom_label_size
113
122
  chrom_label_side = args.chrom_label_side
114
123
  track_label_size = args.track_label_size
@@ -123,13 +132,13 @@ def main() -> None:
123
132
  point_size = args.point_size
124
133
  highlight = args.highlight
125
134
  highlight_thresh = args.highlight_thresh
126
- highight_color = args.highight_color
135
+ highlight_color = args.highlight_color
127
136
  highlight_line = args.highlight_line
128
- highight_line_color = args.highight_line_color
137
+ highlight_line_color = args.highlight_line_color
129
138
  colors_raw = args.colors
130
- r_min = args.r_min
131
- r_max = args.r_max
132
- pad = args.pad
139
+ r_min = args.min_radius
140
+ r_max = args.max_radius
141
+ pad = args.circular_track_spacing
133
142
  output_format = args.output_format
134
143
  output_dir = args.output_dir
135
144
  dpi = args.dpi
@@ -142,18 +151,20 @@ def main() -> None:
142
151
 
143
152
 
144
153
  # ------------------------------------------------------------------
145
- # Sumstat, labels, colours, track heights str to list
154
+ # Sumstat, labels, colours, track heights [build] str to list
146
155
  # ------------------------------------------------------------------
147
156
  (
148
157
  sum_stats,
149
158
  labels,
150
159
  colors,
151
- t_heights
160
+ t_heights,
161
+ builds
152
162
  ) = strip_comma_separated_input_streams(
153
163
  sum_stats = sum_stats_raw,
154
164
  labels = labels_raw,
155
165
  colors_raw = colors_raw,
156
166
  track_heights = track_heights,
167
+ builds = build_arg if build_arg else None,
157
168
  )
158
169
 
159
170
  # ------------------------------------------------------------------
@@ -161,7 +172,8 @@ def main() -> None:
161
172
  # ------------------------------------------------------------------
162
173
  (
163
174
  plt_name,
164
- table_out
175
+ table_out,
176
+ plt_base,
165
177
  ) = get_output_paths(
166
178
  labels,
167
179
  mode = mode,
@@ -182,7 +194,8 @@ def main() -> None:
182
194
  pos = pos_arg,
183
195
  snp = snp_arg,
184
196
  pcol = pcol_arg,
185
- build = build_arg
197
+ buildc = buildc_arg,
198
+ build = builds
186
199
  )
187
200
 
188
201
  # ------------------------------------------------------------------
@@ -198,6 +211,7 @@ def main() -> None:
198
211
  sumstats_loaded,
199
212
  hits_table,
200
213
  signif_lines,
214
+ pval_dict,
201
215
  ) = get_sumstats_and_merged_sector_list(
202
216
  sum_stats=sum_stats,
203
217
  labels=labels,
@@ -212,6 +226,19 @@ def main() -> None:
212
226
  resources=resources,
213
227
  )
214
228
 
229
+ # ------------------------------------------------------------------
230
+ # ANNOTATE BY
231
+ # ------------------------------------------------------------------
232
+ if annotate:
233
+ if str(annotate).upper() == "GENE":
234
+ label_col = 'top_gene'
235
+ elif str(annotate).upper() == "SNP":
236
+ label_col = 'SNP'
237
+ else:
238
+ label_col = annotate
239
+
240
+ logger.info(f"Anotate by: {label_col}")
241
+
215
242
  # ------------------------------------------------------------------
216
243
  # CIRCULAR MANHATTAN
217
244
  # ------------------------------------------------------------------
@@ -224,15 +251,16 @@ def main() -> None:
224
251
  signif_lines = signif_lines,
225
252
  highlight = highlight,
226
253
  highlight_thresh = highlight_thresh,
227
- highight_color = highight_color,
254
+ highlight_color = highlight_color,
228
255
  highlight_line = highlight_line,
229
- highight_line_color = highight_line_color,
256
+ highlight_line_color = highlight_line_color,
230
257
  colors = colors,
231
258
  chrom_label_side = chrom_label_side,
232
259
  chrom_label_size = chrom_label_size,
233
260
  track_label_size = track_label_size,
234
261
  track_label_orientation = track_label_orientation,
235
262
  annotate = annotate,
263
+ label_col = label_col if annotate else None,
236
264
  annotation_size = annotation_size,
237
265
  hits_table = hits_table,
238
266
  sector_sizes = merged_assoc_sector_sizes,
@@ -253,30 +281,77 @@ def main() -> None:
253
281
  else:
254
282
  logger.info("Generating LINEAR MANHATTAN Plot ...")
255
283
  plot_linear(
256
- sumstats_loaded = sumstats_loaded,
257
- track_heights = t_heights,
284
+ sumstats_loaded=sumstats_loaded,
285
+ track_heights=t_heights,
258
286
  trim_pval=trim_pval,
259
287
  logp=True if logp else False,
260
288
  point_size=point_size,
261
289
  highlight=highlight,
262
290
  highlight_thresh=highlight_thresh,
263
- highight_color = highight_color,
264
- highlight_line = highlight_line,
265
- highight_line_color = highight_line_color,
266
- annot_df=hits_table if not hits_table.empty else None,
267
- label_col="top_gene",
291
+ highlight_color=highlight_color,
292
+ highlight_line=highlight_line,
293
+ highlight_line_color=highlight_line_color,
294
+ annotate=annotate,
295
+ hits_table=hits_table if not hits_table.empty else None,
296
+ label_col=label_col if annotate else None,
268
297
  chr_spacing=chr_spacing,
269
298
  linear_track_spacing=linear_track_spacing,
270
299
  colors=colors,
271
300
  signif_lines=signif_lines,
272
301
  plot_title=plot_title,
273
- no_track_labels = no_track_labels,
302
+ no_track_labels=no_track_labels,
274
303
  dpi=dpi,
275
304
  output_format=output_format,
276
305
  output_dir=output_dir,
277
306
  figsize=(15, 9)
278
307
  )
279
308
 
309
+ # ------------------------------------------------------------------
310
+ # QQ PLOT
311
+ # ------------------------------------------------------------------
312
+ if qq and sumstats_loaded:
313
+ logger.info("Generating QQ Plot(s) ...")
314
+ qq_stem = f"{plt_base}_qq"
315
+
316
+ if qq_separate:
317
+ plot_qq_separate(
318
+ pval_dict=pval_dict,
319
+ thin=qq_thin,
320
+ thin_below=thin_below,
321
+ max_points=qq_max_points,
322
+ output_path=qq_stem,
323
+ colors=colors,
324
+ signif_threshold=signif_threshold or 5e-8,
325
+ dpi=dpi,
326
+ fig_format=output_format,
327
+ )
328
+ elif qq_overlay:
329
+ plot_qq_overlay(
330
+ pval_dict=pval_dict,
331
+ thin=qq_thin,
332
+ thin_below=thin_below,
333
+ max_points=qq_max_points,
334
+ colors=colors,
335
+ signif_threshold=signif_threshold or 5e-8,
336
+ dpi=dpi,
337
+ title=plot_title,
338
+ output_path=f"{qq_stem}_overlay",
339
+ fig_format=output_format,
340
+ )
341
+ else:
342
+ plot_qq_combined(
343
+ pval_dict=pval_dict,
344
+ thin=qq_thin,
345
+ thin_below=thin_below,
346
+ max_points=qq_max_points,
347
+ colors=colors,
348
+ ncols=qq_ncols,
349
+ signif_threshold=signif_threshold or 5e-8,
350
+ dpi=dpi,
351
+ title=plot_title,
352
+ output_path=f"{qq_stem}_combined",
353
+ fig_format=output_format,
354
+ )
280
355
 
281
356
  if __name__ == "__main__":
282
357
  main()
pycmplot/annotation.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- MODULE_DOCSTRING = '''"""
3
+ MODULE_DOCSTRING = """
4
4
  pycmplot.annotation
5
5
  ====================
6
6
 
@@ -20,7 +20,7 @@ Annotation relies on a bundled Ensembl gene-info TSV (hg38 or hg19). The
20
20
  file is resolved through :class:`~pycmplot.resources.ResourceConfig`; custom
21
21
  paths can be supplied via the ``PYCMPLOT_GENEINFO_HG38`` /
22
22
  ``PYCMPLOT_GENEINFO_HG19`` environment variables.
23
- """'''
23
+ """
24
24
 
25
25
  import bisect
26
26
  import logging
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
41
41
  # ---------------------------------------------------------------------------
42
42
 
43
43
  def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
44
- BUILD_GENES_DICT = '''"""Build a chromosome-keyed interval dictionary with sorted start positions.
44
+ BUILD_GENES_DICT = """Build a chromosome-keyed interval dictionary with sorted start positions.
45
45
 
46
46
  Pre-processes the gene reference DataFrame into a structure that supports
47
47
  efficient O(log N) binary-search lookup of genes near a query position.
@@ -67,7 +67,7 @@ def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
67
67
  -----
68
68
  This function is called once per :func:`get_hits_summary_table` invocation;
69
69
  the result is passed to :func:`_annotate_variant` for each lead SNP.
70
- """'''
70
+ """
71
71
 
72
72
  genes_df = genes_df.sort_values(["CHR", "START"])
73
73
  genes_dict: dict = {}
@@ -98,7 +98,7 @@ def _annotate_variant(
98
98
  window: int = 500_000,
99
99
  promoter_window: int = 2_000,
100
100
  ) -> dict:
101
- ANNOTATE_VARIANT = '''"""Return strand-aware nearest-gene annotation for a single variant.
101
+ ANNOTATE_VARIANT = """Return strand-aware nearest-gene annotation for a single variant.
102
102
 
103
103
  Searches the pre-built *genes_dict* within *window* bp of *pos* on
104
104
  *chrom*. Reports the nearest upstream and downstream genes (relative to
@@ -138,7 +138,7 @@ def _annotate_variant(
138
138
  within *promoter_window* bp upstream of any TSS.
139
139
  * ``gene_density`` (int) – number of genes with any overlap in the
140
140
  search window.
141
- """'''
141
+ """
142
142
 
143
143
  _empty = {
144
144
  "genic": False,
@@ -238,7 +238,7 @@ def _annotate_and_prioritize_variant(
238
238
  promoter_window: int = 2_000,
239
239
  biotype_weights: Optional[dict] = None,
240
240
  ) -> Optional[dict]:
241
- ANNOTATE_PRIORITIZE = '''"""Score and rank candidate genes for a single variant using a composite
241
+ ANNOTATE_PRIORITIZE = """Score and rank candidate genes for a single variant using a composite
242
242
  priority metric.
243
243
 
244
244
  Builds a candidate gene set within *window* bp of *pos* on *chrom*, then
@@ -287,7 +287,7 @@ def _annotate_and_prioritize_variant(
287
287
  For intergenic variants, ``top_gene`` contains the two nearest flanking
288
288
  gene symbols joined by ``'-'`` (e.g. ``'HBB-HBD'``) and ``biotype``
289
289
  is set to ``'intergenic'``.
290
- """'''
290
+ """
291
291
 
292
292
  if biotype_weights is None:
293
293
  biotype_weights = BIOTYPE_WEIGHTS
@@ -386,7 +386,7 @@ def _annotate_and_prioritize_variant(
386
386
  # ---------------------------------------------------------------------------
387
387
 
388
388
  def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
389
- CLUMP_BY_DISTANCE = '''"""Reduce a lead-SNP table to one representative SNP per locus.
389
+ CLUMP_BY_DISTANCE = """Reduce a lead-SNP table to one representative SNP per locus.
390
390
 
391
391
  Applies greedy distance-based clumping within each chromosome group,
392
392
  starting from the most significant SNP (lowest ``P`` or highest ``logP``).
@@ -406,7 +406,7 @@ def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
406
406
  pandas.DataFrame
407
407
  Deduplicated locus representatives sorted by chromosome and position
408
408
  (natural sort order).
409
- """'''
409
+ """
410
410
 
411
411
  window = window_kb * 1000
412
412
  clumped: list[pd.Series] = []
@@ -438,7 +438,7 @@ def get_hits_summary_table(
438
438
  table_out: Optional[str] = None,
439
439
  resources: Optional[ResourceConfig] = None,
440
440
  ) -> pd.DataFrame:
441
- GET_HITS_SUMMARY_TABLE = '''"""Annotate lead SNPs with nearest genes and write the locus summary table.
441
+ GET_HITS_SUMMARY_TABLE = """Annotate lead SNPs with nearest genes and write the locus summary table.
442
442
 
443
443
  For each lead SNP in *leads_df*, runs two complementary annotation passes:
444
444
 
@@ -528,51 +528,54 @@ def get_hits_summary_table(
528
528
  SNP CHR POS top_gene biotype
529
529
  0 rs123456 2 60718043 BCL11A protein_coding
530
530
  1 rs789012 11 5246696 HBB protein_coding
531
- """'''
531
+ """
532
532
 
533
533
  if resources is None:
534
534
  resources = default_resources
535
535
 
536
536
  # Choose gene info file based on build
537
- if "OLD_POS" not in leads_df.columns and list(set(leads_df["BUILD"])) == ["hg19"]:
538
- geneinfo_path = resources.require("geneinfo_hg19")
539
- else:
540
- geneinfo_path = resources.require("geneinfo_hg38")
537
+ if 'BUILD' in leads_df.columns:
538
+ if "OLD_POS" not in leads_df.columns and list(set(leads_df["BUILD"])) == ["hg19"]:
539
+ geneinfo_path = resources.require("geneinfo_hg19")
540
+ else:
541
+ geneinfo_path = resources.require("geneinfo_hg38")
541
542
 
542
- logger.info("Loading gene info from: %s", geneinfo_path)
543
- geneinfo = pd.read_csv(geneinfo_path, header=0, sep="\t")
544
- genes_dict = _build_genes_dict(geneinfo)
543
+ logger.info("Loading gene info from: %s", geneinfo_path)
544
+ geneinfo = pd.read_csv(geneinfo_path, header=0, sep="\t")
545
+ genes_dict = _build_genes_dict(geneinfo)
545
546
 
546
- window = window_kb * 1_000
547
- records: list[dict] = []
547
+ window = window_kb * 1_000
548
+ records: list[dict] = []
548
549
 
549
550
 
550
- logger.info("Annotating lead variants and generating hits summary table ...")
551
- for _, row in leads_df.iterrows():
552
- annotation = _annotate_variant(
553
- chrom=row["CHR"],
554
- pos=row["POS"],
555
- genes_dict=genes_dict,
556
- window=window,
557
- )
558
- prioritized = _annotate_and_prioritize_variant(
559
- chrom=row["CHR"],
560
- pos=row["POS"],
561
- genes_df=geneinfo,
562
- lead_snps_df=leads_df,
563
- window=window,
564
- )
551
+ logger.info("Annotating lead variants and generating hits summary table ...")
552
+ for _, row in leads_df.iterrows():
553
+ annotation = _annotate_variant(
554
+ chrom=row["CHR"],
555
+ pos=row["POS"],
556
+ genes_dict=genes_dict,
557
+ window=window,
558
+ )
559
+ prioritized = _annotate_and_prioritize_variant(
560
+ chrom=row["CHR"],
561
+ pos=row["POS"],
562
+ genes_df=geneinfo,
563
+ lead_snps_df=leads_df,
564
+ window=window,
565
+ )
565
566
 
566
- record = {
567
- **(row.to_dict()),
568
- **(annotation if annotation is not None else {}),
569
- **(prioritized if prioritized is not None else {}),
570
- }
571
- records.append(record)
567
+ record = {
568
+ **(row.to_dict()),
569
+ **(annotation if annotation is not None else {}),
570
+ **(prioritized if prioritized is not None else {}),
571
+ }
572
+ records.append(record)
572
573
 
573
- locus_table = pd.DataFrame(records).sort_values(
574
- ["CHR", "POS"], key=natsort.natsort_keygen()
575
- )
574
+ locus_table = pd.DataFrame(records).sort_values(
575
+ ["CHR", "POS"], key=natsort.natsort_keygen()
576
+ )
577
+ else:
578
+ locus_table = leads_df
576
579
 
577
580
  if table_out is not None:
578
581
  locus_table.to_csv(table_out, index=False, sep="\t", na_rep="None")
pycmplot/cli.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- CLI_MODULE = '''"""
3
+ CLI_MODULE = """
4
4
  pycmplot.cli
5
5
  ============
6
6
 
@@ -15,7 +15,7 @@ Arguments are organised into four groups:
15
15
  colours, and output format (apply to both plot modes).
16
16
  * **Circular Only** — arguments specific to ``--mode cm``.
17
17
  * **Linear Only** — arguments specific to ``--mode lm`` (default).
18
- """'''
18
+ """
19
19
 
20
20
  import argparse
21
21
  from pathlib import Path
@@ -30,7 +30,7 @@ DESCMSG = """
30
30
 
31
31
 
32
32
  def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
33
- GET_ARGUMENTS = '''"""Parse and return command-line arguments for the pycmplot entry point.
33
+ GET_ARGUMENTS = """Parse and return command-line arguments for the pycmplot entry point.
34
34
 
35
35
  Parameters
36
36
  ----------
@@ -146,8 +146,10 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
146
146
  - Description
147
147
  * - ``annotate``
148
148
  - str
149
- - Annotation content: ``'SNP'`` (rsID) or ``'GENE'`` (nearest
150
- gene symbol). Default ``'SNP'``.
149
+ - Annotation content: Annotate loci by column in hits table
150
+ ``'snp'`` (rsID), ``top_gene``, ``nearest_upstream_gene``, ``nearest_downstream_gene``, etc,
151
+ or ``'gene'`` (let the package decide one of ``top_gene`` or ``nearest_upstream_gene``).
152
+ Default ``'snp'``.
151
153
  * - ``annotation_size``
152
154
  - float
153
155
  - Font size for annotation labels. Default ``6``.
@@ -263,7 +265,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
263
265
  --------
264
266
  pycmplot._core.main :
265
267
  Consumes the :class:`~argparse.Namespace` returned by this function.
266
- """'''
268
+ """
267
269
 
268
270
  parser = argparse.ArgumentParser(
269
271
  prog="pycmplot",
@@ -293,10 +295,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
293
295
  ),
294
296
  required=True, type=str, metavar="str",
295
297
  )
296
- req.add_argument(
297
- "-b", "--build_column", required=True, type=str, metavar="str",
298
- help="Genome build column name (containing hg18/hg19/hg38)."
299
- )
298
+
300
299
 
301
300
  # ------------------------------------------------------------------
302
301
  # Optional
@@ -329,13 +328,51 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
329
328
  help="File delimiter (autodetected if omitted)."
330
329
  )
331
330
  opt.add_argument(
332
- "--logp", action="store_true",
333
- help="Plot −log₁₀(p) instead of raw p-values."
331
+ "-bc", "--build_column", required=False, type=str, metavar="str",
332
+ help=("Name of column containing genome build (hg18/hg19/hg38)."
333
+ "Or use ``--build`` below to supply genome builds per summary stat file."
334
+ ))
335
+ opt.add_argument(
336
+ "-b","--build", required=False, type=str, metavar='str',
337
+ help=
338
+ """Comma-sperated list of genome build of summary stats file(s) listed
339
+ in the same order as sumstats files. e.g. hg19,hg38,hg38,hg19 means:
340
+ file1.txt.gz --> hg19
341
+ file2.txt.gz --> hg38
342
+ file3.tsv --> hg38 ... etc
343
+ """
334
344
  )
335
345
  opt.add_argument(
336
- "-qq", "--qq_plot", action="store_true",
337
- help="Also generate a QQ-plot."
346
+ "--logp", action="store_true",
347
+ help="Plot −log₁₀(p) instead of raw p-values."
338
348
  )
349
+ opt.add_argument("-qq", "--qq_plot", action="store_true",
350
+ help="Generate QQ-plot(s) alongside the Manhattan plot.")
351
+ opt.add_argument("-qq_sep", "--qq_separate", action="store_true",
352
+ help=(
353
+ "Save one QQ-plot file per sumstat instead of a "
354
+ "combined multi-panel figure. Only used when -qq is set."
355
+ ))
356
+ opt.add_argument("-qq_cols", "--qq_ncols", default=3, type=int, metavar="int",
357
+ help="Number of columns in the combined QQ-plot grid (default: 3).")
358
+ opt.add_argument("-qq_thin", "--qq_thin", action="store_true", default=False,
359
+ help=(
360
+ "Thin null-like p-values before QQ plotting for speed (default: off)."
361
+ "Include this flag to turn on for speed."
362
+ ))
363
+ opt.add_argument("-thin_below", "--thin_below", type=float, metavar="float", default=0.01,
364
+ help=(
365
+ "P-value threshold below which all points are always kept."
366
+ "Points above this threshold are downsampled (default: 0.01)."
367
+ ))
368
+ opt.add_argument("-qq_max_pts", "--qq_max_points", default=50000, type=int, metavar="int",
369
+ help="Max points to plot per QQ track after thinning (default: 50000).")
370
+ opt.add_argument("-qq_ov", "--qq_overlay", action="store_true",
371
+ help=(
372
+ "Plot all sumstats on a single overlaid QQ-plot, "
373
+ "each coloured by label with lambda in the legend. "
374
+ "Only used when -qq is set."
375
+ ))
339
376
  opt.add_argument(
340
377
  "-tp", "--trim_pval", type=float, metavar="float",
341
378
  help="Trim variants with p > this value before plotting."
@@ -355,11 +392,17 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
355
392
  default=None, const=1e-5, nargs="?", type=float, metavar="float",
356
393
  help="Suggestive significance threshold (default: 1e-5)."
357
394
  )
395
+
396
+ # CLASS TO HANDLE ANNOTATION VALUES NOT IN CHOICE LIST
397
+ class AllowAll(list):
398
+ def __contains__(self, item):
399
+ return True
400
+
358
401
  opt.add_argument(
359
402
  "-a", "--annotate",
360
- choices=["SNP", "GENE"], nargs="?",
361
- default="SNP", const="SNP", type=str, #metavar="str",
362
- help="Annotate significant loci by SNP ID or nearest gene."
403
+ choices=AllowAll(["snp", "gene", "top_gene", "nearest_upstream_gene", "nearest_downstream_gene"]), nargs="?",
404
+ default=None, type=str, metavar="{snp,gene,top_gene,nearest_upstream_gene,nearest_downstream_gene,...}", const="SNP",
405
+ help="Annotate loci by column name in hits table (defaults to 'snp' if provided and no value set)."
363
406
  )
364
407
  opt.add_argument(
365
408
  "-p_size", "--point_size", default=6, type=float, metavar="float",
@@ -378,7 +421,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
378
421
  help="P-value threshold for highlighting (default: 5e-8)."
379
422
  )
380
423
  opt.add_argument(
381
- "-hc", "--highight_color", default="brown", type=str, metavar="str",
424
+ "-hc", "--highlight_color", default="brown", type=str, metavar="str",
382
425
  help="Color of highlighted positions (default: brown)."
383
426
  )
384
427
  opt.add_argument(
@@ -386,7 +429,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
386
429
  help="Draw vertical dashed lines through highlighted positions."
387
430
  )
388
431
  opt.add_argument(
389
- "-hlc", "--highight_line_color", default="grey", type=str, metavar="str",
432
+ "-hlc", "--highlight_line_color", default="grey", type=str, metavar="str",
390
433
  help="Color of highlight line (default: grey)."
391
434
  )
392
435
  opt.add_argument(
@@ -444,7 +487,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
444
487
  )
445
488
  cio.add_argument(
446
489
  "-cl_side", "--chrom_label_side", choices=["inside", "outside"],
447
- nargs="?", default="inside", const="inside", type=str,
490
+ nargs="?", default=None, const="inside", type=str,
448
491
  help="Chromosome label placement (default: inside)."
449
492
  )
450
493
  cio.add_argument(
pycmplot/constants.py CHANGED
@@ -1,4 +1,4 @@
1
- CONSTANTS_MODULE = '''"""
1
+ CONSTANTS_MODULE = """
2
2
  pycmplot.constants
3
3
  ==================
4
4
 
@@ -27,7 +27,7 @@ Notes
27
27
  ``hg38_chr_lengths`` reflects the GRCh38 primary assembly (GCA_000001405).
28
28
  Values may differ slightly from builds that include alternate contigs or
29
29
  patches.
30
- """'''
30
+ """
31
31
 
32
32
  # ---------------------------------------------------------------------------
33
33
  # hg38 chromosome lengths (GRCh38)