pycmplot 0.2.4__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. pycmplot-0.2.6/LICENSE +21 -0
  2. {pycmplot-0.2.4/pycmplot.egg-info → pycmplot-0.2.6}/PKG-INFO +10 -2
  3. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/__init__.py +7 -2
  4. pycmplot-0.2.6/pycmplot/__main__.py +6 -0
  5. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/_core.py +43 -50
  6. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/annotation.py +59 -38
  7. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/cli.py +166 -229
  8. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/constants.py +1 -1
  9. pycmplot-0.2.6/pycmplot/data/hg18ToHg38.over.chain.gz +0 -0
  10. pycmplot-0.2.6/pycmplot/data/hg19ToHg38.over.chain.gz +0 -0
  11. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/io.py +214 -124
  12. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/liftover.py +108 -26
  13. pycmplot-0.2.6/pycmplot/plotting/__init__.py +31 -0
  14. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/plotting/circular.py +31 -32
  15. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/plotting/linear.py +87 -39
  16. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/plotting/qq.py +50 -3
  17. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/resources.py +44 -36
  18. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/stats.py +6 -6
  19. {pycmplot-0.2.4 → pycmplot-0.2.6/pycmplot.egg-info}/PKG-INFO +10 -2
  20. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot.egg-info/SOURCES.txt +5 -11
  21. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot.egg-info/top_level.txt +1 -1
  22. {pycmplot-0.2.4 → pycmplot-0.2.6}/pyproject.toml +11 -3
  23. {pycmplot-0.2.4 → pycmplot-0.2.6}/setup.cfg +1 -1
  24. pycmplot-0.2.4/LICENSE +0 -441
  25. pycmplot-0.2.4/docs/conf.py +0 -91
  26. pycmplot-0.2.4/pycmplot/data/hg19ToHg38.over.chain +0 -56506
  27. pycmplot-0.2.4/pycmplot_docs/docs/conf.py +0 -91
  28. pycmplot-0.2.4/pycmplot_docs/docstrings_annotation.py +0 -289
  29. pycmplot-0.2.4/pycmplot_docs/docstrings_core_cli.py +0 -347
  30. pycmplot-0.2.4/pycmplot_docs/docstrings_io.py +0 -468
  31. pycmplot-0.2.4/pycmplot_docs/docstrings_liftover.py +0 -156
  32. pycmplot-0.2.4/pycmplot_docs/docstrings_plotting.py +0 -587
  33. pycmplot-0.2.4/pycmplot_docs/docstrings_resources_constants.py +0 -170
  34. pycmplot-0.2.4/pycmplot_docs/docstrings_stats.py +0 -135
  35. {pycmplot-0.2.4 → pycmplot-0.2.6}/README.md +0 -0
  36. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz +0 -0
  37. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz +0 -0
  38. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot.egg-info/dependency_links.txt +0 -0
  39. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot.egg-info/entry_points.txt +0 -0
  40. {pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot.egg-info/requires.txt +0 -0
  41. {pycmplot-0.2.4 → pycmplot-0.2.6}/setup.py +0 -0
pycmplot-0.2.6/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kevin Esoh
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,15 +1,23 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycmplot
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
5
5
  Author: Kevin Esoh
6
6
  Author-email: Kevin Esoh <kesohku1@jh.edu>
7
- License-Expression: CC-BY-NC-SA-4.0
7
+ License-Expression: MIT
8
8
  Project-URL: Homepage, https://github.com/esohkevin/pycmplot
9
9
  Project-URL: Issues, https://github.com/esohkevin/pycmplot/issues
10
10
  Project-URL: Docs, https://pycmplot.readthedocs.io/en/latest/
11
11
  Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
12
16
  Classifier: Operating System :: OS Independent
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: Natural Language :: English
19
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
20
+ Classifier: Topic :: Scientific/Engineering :: Visualization
13
21
  Requires-Python: >=3.9
14
22
  Description-Content-Type: text/markdown
15
23
  License-File: LICENSE
@@ -12,7 +12,7 @@ Command-line::
12
12
  Python API::
13
13
 
14
14
  from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
15
- from pycmplot.plotting import plot_linear, plot_circular
15
+ from pycmplot.plotting import plot_linear, plot_circular, plot_qq_single, plot_qq_separate, plot_qq_overlay, plot_qq_combined
16
16
  from pycmplot.stats import get_lead_snps
17
17
  from pycmplot.annotation import get_hits_summary_table
18
18
 
@@ -22,6 +22,7 @@ Public surface
22
22
 
23
23
  from pycmplot.plotting.linear import plot_linear
24
24
  from pycmplot.plotting.circular import plot_circular, compute_track_radii_dict
25
+ from pycmplot.plotting.qq import plot_qq_single, plot_qq_separate, plot_qq_overlay, plot_qq_combined
25
26
  from pycmplot.stats import get_lead_snps, get_highlight_snps
26
27
  from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
27
28
  from pycmplot.annotation import get_hits_summary_table
@@ -31,6 +32,10 @@ from pycmplot.resources import ResourceConfig
31
32
  __all__ = [
32
33
  "plot_linear",
33
34
  "plot_circular",
35
+ "plot_qq_single",
36
+ "plot_qq_separate",
37
+ "plot_qq_overlay",
38
+ "plot_qq_combined",
34
39
  "compute_track_radii_dict",
35
40
  "get_lead_snps",
36
41
  "get_highlight_snps",
@@ -42,4 +47,4 @@ __all__ = [
42
47
  "ResourceConfig",
43
48
  ]
44
49
 
45
- __version__ = "0.2.4"
50
+ __version__ = "0.2.6"
@@ -0,0 +1,6 @@
1
+ """Entry point for ``python -m pycmplot`` invocation."""
2
+
3
+ from pycmplot._core import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -1,62 +1,62 @@
1
- from __future__ import annotations
2
-
3
- CORE_MODULE = """
1
+ """
4
2
  pycmplot._core
5
3
  ==============
6
4
 
7
5
  Main entry point that orchestrates CLI argument parsing, data loading, and
8
6
  plot dispatch. This module is intentionally thin: it delegates all heavy
9
- work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`, and
10
- :mod:`pycmplot.plotting.circular`.
7
+ work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`,
8
+ :mod:`pycmplot.plotting.circular`, and :mod:`pycmplot.plotting.qq`.
11
9
 
12
10
  All imports are deferred inside :func:`main` so that
13
11
  ``import pycmplot`` remains fast regardless of the size of the dependency
14
12
  tree.
15
13
  """
16
14
 
15
+ from __future__ import annotations
16
+
17
17
  import logging
18
18
  import warnings
19
+ import sys
19
20
 
20
21
  # Suppress noisy font-manager warnings before any matplotlib import
21
22
  logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)
22
23
  warnings.filterwarnings("ignore")
23
24
 
24
- logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
25
+ logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s", stream=sys.stdout)
25
26
  logger = logging.getLogger(__name__)
26
27
 
27
28
 
28
29
  def main() -> None:
29
- MAIN = """Orchestrate the full pycmplot pipeline from the command line.
30
+ """Orchestrate the full pycmplot pipeline from the command line.
30
31
 
31
32
  This function is registered as the ``pycmplot`` console-script entry point
32
33
  in ``pyproject.toml`` / ``setup.cfg``. It performs the following steps in
33
34
  order:
34
35
 
35
36
  1. **Parse CLI arguments** via :func:`~pycmplot.cli.get_arguments`.
36
- 2. **Parse comma-separated inputs** (files, labels, colours, track heights)
37
- into Python lists via
38
- :func:`~pycmplot.io.strip_comma_separated_input_streams`.
37
+ 2. **Parse comma-separated inputs** (files, labels, colours, track heights,
38
+ builds) into Python lists via
39
+ :func:`~pycmplot.io.strip_comma_separated_input_streams`.
39
40
  3. **Construct output paths** (plot image and locus summary table TSV) via
40
- :func:`~pycmplot.io.get_output_paths`.
41
+ :func:`~pycmplot.io.get_output_paths`.
41
42
  4. **Resolve column names** for every input file via
42
- :func:`~pycmplot.io.prep_pycmplot_input_info`.
43
+ :func:`~pycmplot.io.prep_pycmplot_input_info`.
43
44
  5. **Load data** — reads summary statistics, normalises chromosome names,
44
- runs hg19 → hg38 liftover if needed, extracts lead SNPs, generates the
45
- hits summary table, and computes merged Circos sector sizes via
46
- :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`.
47
- 6. **Dispatch plotting** — calls
48
- :func:`~pycmplot.plotting.circular.plot_circular` when ``--mode cm``,
49
- or :func:`~pycmplot.plotting.linear.plot_linear` otherwise.
50
-
51
- Parameters
52
- ----------
53
- None
54
- All input is taken from ``sys.argv`` via :mod:`argparse`.
45
+ runs hg19 → hg38 liftover if needed, extracts lead SNPs, generates the
46
+ hits summary table, and computes merged Circos sector sizes via
47
+ :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`.
48
+ 6. **Dispatch Manhattan plot** — calls
49
+ :func:`~pycmplot.plotting.circular.plot_circular` when ``--mode cm``,
50
+ or :func:`~pycmplot.plotting.linear.plot_linear` otherwise.
51
+ 7. **Optional QQ plot** — when ``--qq_plot`` is set, dispatches to one of
52
+ :func:`~pycmplot.plotting.qq.plot_qq_combined` (default),
53
+ :func:`~pycmplot.plotting.qq.plot_qq_separate` (``--qq_separate``), or
54
+ :func:`~pycmplot.plotting.qq.plot_qq_overlay` (``--qq_overlay``).
55
55
 
56
56
  Returns
57
57
  -------
58
58
  None
59
- Saves the plot image and locus summary table to the directory
59
+ Saves the plot image(s) and locus summary table to the directory
60
60
  specified by ``--output_dir``.
61
61
 
62
62
  Raises
@@ -94,6 +94,7 @@ def main() -> None:
94
94
  from pycmplot.plotting.circular import plot_circular
95
95
  from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
96
96
  from pycmplot.resources import ResourceConfig
97
+ from pycmplot.annotation import get_annotation_column
97
98
 
98
99
  # ------------------------------------------------------------------
99
100
  # Parse CLI
@@ -147,7 +148,9 @@ def main() -> None:
147
148
  track_heights = args.track_heights
148
149
  linear_track_spacing = args.linear_track_spacing
149
150
  no_track_labels = args.no_track_labels
151
+ ylabel = args.ylabel
150
152
  chr_spacing = args.chr_spacing
153
+ figure_size = args.figure_size
151
154
 
152
155
 
153
156
  # ------------------------------------------------------------------
@@ -194,8 +197,8 @@ def main() -> None:
194
197
  pos = pos_arg,
195
198
  snp = snp_arg,
196
199
  pcol = pcol_arg,
197
- buildc = buildc_arg,
198
- build = builds
200
+ build_column = buildc_arg,
201
+ build_list = builds
199
202
  )
200
203
 
201
204
  # ------------------------------------------------------------------
@@ -206,38 +209,25 @@ def main() -> None:
206
209
  # ------------------------------------------------------------------
207
210
  # Load data, compute sectors, get hits table
208
211
  # ------------------------------------------------------------------
209
- (
210
- merged_assoc_sector_sizes,
211
- sumstats_loaded,
212
- hits_table,
213
- signif_lines,
214
- pval_dict,
215
- ) = get_sumstats_and_merged_sector_list(
212
+ pycmplot_dict = get_sumstats_and_merged_sector_list(
216
213
  sum_stats=sum_stats,
217
214
  labels=labels,
218
215
  trim_pval=trim_pval,
219
216
  logp=logp,
220
217
  file_info=sumstats_hdr_dic,
221
218
  sort_tracks=sort_track,
222
- table_out=table_out,
219
+ table_out=plt_base,
223
220
  signif_threshold=signif_threshold,
224
221
  signif_line=signif_line,
225
222
  suggest_threshold=suggest_threshold,
226
223
  resources=resources,
227
224
  )
228
225
 
229
- # ------------------------------------------------------------------
230
- # ANNOTATE BY
231
- # ------------------------------------------------------------------
232
- label_col = 'SNP'
233
- if annotate:
234
- if str(annotate).upper() == "GENE" and 'top_gene' in hits_table.columns:
235
- label_col = 'top_gene'
236
- elif label_col in hits_table.columns:
237
- label_col = annotate
238
-
239
-
240
- logger.info(f"Anotate by: {label_col}")
226
+ merged_assoc_sector_sizes = pycmplot_dict["sectors"]
227
+ sumstats_loaded = pycmplot_dict["dfs"]
228
+ hits_table = pycmplot_dict["annot"]
229
+ signif_lines = pycmplot_dict["lines"]
230
+ pval_dict = pycmplot_dict["pvals"]
241
231
 
242
232
  # ------------------------------------------------------------------
243
233
  # CIRCULAR MANHATTAN
@@ -260,7 +250,6 @@ def main() -> None:
260
250
  track_label_size = track_label_size,
261
251
  track_label_orientation = track_label_orientation,
262
252
  annotate = annotate,
263
- label_col = label_col if annotate else None,
264
253
  annotation_size = annotation_size,
265
254
  hits_table = hits_table,
266
255
  sector_sizes = merged_assoc_sector_sizes,
@@ -280,6 +269,9 @@ def main() -> None:
280
269
  # ------------------------------------------------------------------
281
270
  else:
282
271
  logger.info("Generating LINEAR MANHATTAN Plot ...")
272
+ fsize = figure_size.strip(" ").split(",")
273
+ fsize = [int(v) for v in fsize]
274
+ logger.info(f"FIGURE SIZE: {fsize}")
283
275
  plot_linear(
284
276
  sumstats_loaded=sumstats_loaded,
285
277
  track_heights=t_heights,
@@ -291,19 +283,19 @@ def main() -> None:
291
283
  highlight_color=highlight_color,
292
284
  highlight_line=highlight_line,
293
285
  highlight_line_color=highlight_line_color,
294
- annotate=annotate,
286
+ annotate=annotate,
295
287
  hits_table=hits_table if not hits_table.empty else None,
296
- label_col=label_col if annotate else None,
297
288
  chr_spacing=chr_spacing,
298
289
  linear_track_spacing=linear_track_spacing,
299
290
  colors=colors,
300
291
  signif_lines=signif_lines,
301
292
  plot_title=plot_title,
302
293
  no_track_labels=no_track_labels,
294
+ ylabel=ylabel,
303
295
  dpi=dpi,
304
296
  output_format=output_format,
305
297
  output_dir=output_dir,
306
- figsize=(15, 9)
298
+ figsize=fsize
307
299
  )
308
300
 
309
301
  # ------------------------------------------------------------------
@@ -316,6 +308,7 @@ def main() -> None:
316
308
  if qq_separate:
317
309
  plot_qq_separate(
318
310
  pval_dict=pval_dict,
311
+ base_name=plot_title,
319
312
  thin=qq_thin,
320
313
  thin_below=thin_below,
321
314
  max_points=qq_max_points,
@@ -1,6 +1,4 @@
1
- from __future__ import annotations
2
-
3
- MODULE_DOCSTRING = """
1
+ """
4
2
  pycmplot.annotation
5
3
  ====================
6
4
 
@@ -22,6 +20,8 @@ paths can be supplied via the ``PYCMPLOT_GENEINFO_HG38`` /
22
20
  ``PYCMPLOT_GENEINFO_HG19`` environment variables.
23
21
  """
24
22
 
23
+ from __future__ import annotations
24
+
25
25
  import bisect
26
26
  import logging
27
27
  from typing import Optional
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
41
41
  # ---------------------------------------------------------------------------
42
42
 
43
43
  def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
44
- BUILD_GENES_DICT = """Build a chromosome-keyed interval dictionary with sorted start positions.
44
+ """Build a chromosome-keyed interval dictionary with sorted start positions.
45
45
 
46
46
  Pre-processes the gene reference DataFrame into a structure that supports
47
47
  efficient O(log N) binary-search lookup of genes near a query position.
@@ -98,7 +98,7 @@ def _annotate_variant(
98
98
  window: int = 500_000,
99
99
  promoter_window: int = 2_000,
100
100
  ) -> dict:
101
- ANNOTATE_VARIANT = """Return strand-aware nearest-gene annotation for a single variant.
101
+ """Return strand-aware nearest-gene annotation for a single variant.
102
102
 
103
103
  Searches the pre-built *genes_dict* within *window* bp of *pos* on
104
104
  *chrom*. Reports the nearest upstream and downstream genes (relative to
@@ -238,8 +238,7 @@ def _annotate_and_prioritize_variant(
238
238
  promoter_window: int = 2_000,
239
239
  biotype_weights: Optional[dict] = None,
240
240
  ) -> Optional[dict]:
241
- ANNOTATE_PRIORITIZE = """Score and rank candidate genes for a single variant using a composite
242
- priority metric.
241
+ """Score and rank candidate genes for a single variant using a composite priority metric.
243
242
 
244
243
  Builds a candidate gene set within *window* bp of *pos* on *chrom*, then
245
244
  scores each candidate on four additive components:
@@ -386,7 +385,7 @@ def _annotate_and_prioritize_variant(
386
385
  # ---------------------------------------------------------------------------
387
386
 
388
387
  def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
389
- CLUMP_BY_DISTANCE = """Reduce a lead-SNP table to one representative SNP per locus.
388
+ """Reduce a lead-SNP table to one representative SNP per locus.
390
389
 
391
390
  Applies greedy distance-based clumping within each chromosome group,
392
391
  starting from the most significant SNP (lowest ``P`` or highest ``logP``).
@@ -438,7 +437,7 @@ def get_hits_summary_table(
438
437
  table_out: Optional[str] = None,
439
438
  resources: Optional[ResourceConfig] = None,
440
439
  ) -> pd.DataFrame:
441
- GET_HITS_SUMMARY_TABLE = """Annotate lead SNPs with nearest genes and write the locus summary table.
440
+ """Annotate lead SNPs with nearest genes and write the locus summary table.
442
441
 
443
442
  For each lead SNP in *leads_df*, runs two complementary annotation passes:
444
443
 
@@ -475,33 +474,21 @@ def get_hits_summary_table(
475
474
  Clumped locus summary table. Contains all columns from *leads_df*
476
475
  plus annotation fields from both passes, including:
477
476
 
478
- .. list-table::
479
- :widths: 30 70
480
- :header-rows: 1
481
-
482
- * - Column
483
- - Description
484
- * - ``genic``
485
- - ``True`` when the lead SNP overlaps a gene body
486
- * - ``nearest_upstream_gene``
487
- - Nearest upstream gene symbol (strand-aware)
488
- * - ``upstream_distance``
489
- - Distance to ``nearest_upstream_gene`` in bp
490
- * - ``nearest_downstream_gene``
491
- - Nearest downstream gene symbol (strand-aware)
492
- * - ``downstream_distance``
493
- - Distance to ``nearest_downstream_gene`` in bp
494
- * - ``promoter_upstream_flag``
495
- - ``True`` when the SNP is within 2 kb upstream of a TSS
496
- * - ``gene_density``
497
- - Number of genes within the search window
498
- * - ``top_gene``
499
- - Top-priority gene from the scoring pass
500
- * - ``biotype``
501
- - Ensembl biotype of ``top_gene`` (``'intergenic'`` when no
502
- genic overlap)
503
- * - ``priority_score``
504
- - Composite priority score (genic hits only)
477
+ - ``genic`` — ``True`` when the lead SNP overlaps a gene body.
478
+ - ``nearest_upstream_gene`` — nearest upstream gene symbol
479
+ (strand-aware).
480
+ - ``upstream_distance`` — distance to ``nearest_upstream_gene`` in bp.
481
+ - ``nearest_downstream_gene`` — nearest downstream gene symbol
482
+ (strand-aware).
483
+ - ``downstream_distance`` — distance to ``nearest_downstream_gene`` in
484
+ bp.
485
+ - ``promoter_upstream_flag`` — ``True`` when the SNP is within 2 kb
486
+ upstream of a TSS.
487
+ - ``gene_density`` — number of genes within the search window.
488
+ - ``top_gene`` top-priority gene from the scoring pass.
489
+ - ``biotype`` — Ensembl biotype of ``top_gene`` (``'intergenic'`` when
490
+ no genic overlap).
491
+ - ``priority_score`` — composite priority score (genic hits only).
505
492
 
506
493
  Notes
507
494
  -----
@@ -578,7 +565,41 @@ def get_hits_summary_table(
578
565
  locus_table = leads_df
579
566
 
580
567
  if table_out is not None:
581
- locus_table.to_csv(table_out, index=False, sep="\t", na_rep="None")
582
- logger.info("Locus summary written to: %s", table_out)
568
+ outpath = table_out.replace(" ", "_").lower() + '.tsv'
569
+ locus_table.to_csv(outpath, index=False, sep="\t", na_rep="None")
570
+ logger.info("Locus summary written to: %s", outpath)
583
571
 
584
572
  return _clump_by_distance(locus_table, window_kb=window_kb)
573
+
574
+
575
+ def get_annotation_column(
576
+ annotate: str = None,
577
+ hits_table: pd.DataFrame = None,
578
+ label_col: str = None,
579
+ ):
580
+ if annotate and not hits_table.empty:
581
+ if label_col is not None and label_col in hits_table.columns:
582
+ label_clm = label_col
583
+ elif annotate in hits_table.columns:
584
+ label_clm = annotate
585
+ else:
586
+ if str(annotate).upper() == "GENE":
587
+ for i, (_, row) in enumerate(hits_table.iterrows()):
588
+ try:
589
+ if row["genic"]:
590
+ label_clm = "nearest_upstream_gene"
591
+ label_msg = "'POS' is genic"
592
+ else:
593
+ label_clm = "top_gene"
594
+ label_msg = "'POS' is not genic"
595
+ logger.info("%s", label_msg)
596
+ except Exception:
597
+ logger.warning(
598
+ "Annotation columns '%s' and '%s' not found in hits table: %s; "
599
+ "falling back to 'SNP'.", annotate, label_col, hits_table.columns.values,
600
+ )
601
+ label_clm = 'SNP'
602
+
603
+ logger.info("Annotating by: %s", label_clm)
604
+
605
+ return label_clm