pycmplot 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {pycmplot-0.2.2/pycmplot.egg-info → pycmplot-0.2.4}/PKG-INFO +17 -23
  2. {pycmplot-0.2.2 → pycmplot-0.2.4}/README.md +11 -22
  3. {pycmplot-0.2.2 → pycmplot-0.2.4}/docs/conf.py +1 -1
  4. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/__init__.py +1 -1
  5. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/_core.py +4 -4
  6. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/cli.py +2 -2
  7. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/io.py +33 -23
  8. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/liftover.py +11 -1
  9. {pycmplot-0.2.2 → pycmplot-0.2.4/pycmplot.egg-info}/PKG-INFO +17 -23
  10. {pycmplot-0.2.2 → pycmplot-0.2.4}/pyproject.toml +14 -2
  11. {pycmplot-0.2.2 → pycmplot-0.2.4}/setup.cfg +1 -1
  12. {pycmplot-0.2.2 → pycmplot-0.2.4}/LICENSE +0 -0
  13. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/annotation.py +0 -0
  14. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/constants.py +0 -0
  15. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz +0 -0
  16. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz +0 -0
  17. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/data/hg19ToHg38.over.chain +0 -0
  18. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/plotting/circular.py +0 -0
  19. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/plotting/linear.py +0 -0
  20. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/plotting/qq.py +0 -0
  21. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/resources.py +0 -0
  22. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot/stats.py +0 -0
  23. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot.egg-info/SOURCES.txt +0 -0
  24. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot.egg-info/dependency_links.txt +0 -0
  25. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot.egg-info/entry_points.txt +0 -0
  26. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot.egg-info/requires.txt +0 -0
  27. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot.egg-info/top_level.txt +0 -0
  28. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot_docs/docs/conf.py +0 -0
  29. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot_docs/docstrings_annotation.py +0 -0
  30. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot_docs/docstrings_core_cli.py +0 -0
  31. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot_docs/docstrings_io.py +0 -0
  32. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot_docs/docstrings_liftover.py +0 -0
  33. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot_docs/docstrings_plotting.py +0 -0
  34. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot_docs/docstrings_resources_constants.py +0 -0
  35. {pycmplot-0.2.2 → pycmplot-0.2.4}/pycmplot_docs/docstrings_stats.py +0 -0
  36. {pycmplot-0.2.2 → pycmplot-0.2.4}/setup.py +0 -0
@@ -1,10 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycmplot
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
5
5
  Author: Kevin Esoh
6
6
  Author-email: Kevin Esoh <kesohku1@jh.edu>
7
7
  License-Expression: CC-BY-NC-SA-4.0
8
+ Project-URL: Homepage, https://github.com/esohkevin/pycmplot
9
+ Project-URL: Issues, https://github.com/esohkevin/pycmplot/issues
10
+ Project-URL: Docs, https://pycmplot.readthedocs.io/en/latest/
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Operating System :: OS Independent
8
13
  Requires-Python: >=3.9
9
14
  Description-Content-Type: text/markdown
10
15
  License-File: LICENSE
@@ -101,6 +106,9 @@ pip install pycmplot
101
106
  ```bash
102
107
  git clone https://github.com/esohkevin/pycmplot.git
103
108
 
109
+ # or with most recent updates from development branch
110
+ # git clone -b dev https://github.com/esohkevin/pycmplot.git
111
+
104
112
  cd pycmplot
105
113
 
106
114
  pip install -e .
@@ -207,26 +215,12 @@ A demonstration of how to use the python API is provided in this notebook: https
207
215
 
208
216
  ---
209
217
 
210
- ## Package structure
218
+ ## Contributing
219
+
220
+ See how to contribute here https://github.com/esohkevin/pycmplot?tab=contributing-ov-file
221
+
222
+
223
+ ## Contributors
224
+
225
+ - [Kevin Esoh](https://github.com/esohkevin)
211
226
 
212
- ```
213
- pycmplot/
214
- ├── pyproject.toml
215
- ├── setup.py
216
- ├── setup.cfg
217
- ├── README.md
218
- └── pycmplot/
219
- ├── __init__.py # public API exports
220
- ├── __main__.py # python -m pycmplot
221
- ├── _core.py # main() orchestration
222
- ├── cli.py # argparse definitions
223
- ├── constants.py # chromosome lengths, biotype weights
224
- ├── resources.py # external resource path config
225
- ├── io.py # sumstat loading, delimiter detection
226
- ├── stats.py # get_lead_snps, get_highlight_snps
227
- ├── liftover.py # lazy hg19→hg38 liftover
228
- ├── annotation.py # nearest-gene annotation, hits table
229
- └── plotting/
230
- ├── __init__.py
231
- ├── linear.py # plot_linear
232
- └── circular.py # plot_circular, compute_track_radii_dict
@@ -75,6 +75,9 @@ pip install pycmplot
75
75
  ```bash
76
76
  git clone https://github.com/esohkevin/pycmplot.git
77
77
 
78
+ # or with most recent updates from development branch
79
+ # git clone -b dev https://github.com/esohkevin/pycmplot.git
80
+
78
81
  cd pycmplot
79
82
 
80
83
  pip install -e .
@@ -181,26 +184,12 @@ A demonstration of how to use the python API is provided in this notebook: https
181
184
 
182
185
  ---
183
186
 
184
- ## Package structure
187
+ ## Contributing
188
+
189
+ See how to contribute here https://github.com/esohkevin/pycmplot?tab=contributing-ov-file
190
+
191
+
192
+ ## Contributors
193
+
194
+ - [Kevin Esoh](https://github.com/esohkevin)
185
195
 
186
- ```
187
- pycmplot/
188
- ├── pyproject.toml
189
- ├── setup.py
190
- ├── setup.cfg
191
- ├── README.md
192
- └── pycmplot/
193
- ├── __init__.py # public API exports
194
- ├── __main__.py # python -m pycmplot
195
- ├── _core.py # main() orchestration
196
- ├── cli.py # argparse definitions
197
- ├── constants.py # chromosome lengths, biotype weights
198
- ├── resources.py # external resource path config
199
- ├── io.py # sumstat loading, delimiter detection
200
- ├── stats.py # get_lead_snps, get_highlight_snps
201
- ├── liftover.py # lazy hg19→hg38 liftover
202
- ├── annotation.py # nearest-gene annotation, hits table
203
- └── plotting/
204
- ├── __init__.py
205
- ├── linear.py # plot_linear
206
- └── circular.py # plot_circular, compute_track_radii_dict
@@ -12,7 +12,7 @@ sys.path.insert(0, os.path.abspath(".."))
12
12
  project = "pycmplot"
13
13
  copyright = "2026, Kevin Esoh"
14
14
  author = "Kevin Esoh"
15
- release = "0.2.2" # update to match PyPI version
15
+ release = "0.2.4" # update to match PyPI version
16
16
 
17
17
  # -- General configuration -----------------------------------------------------
18
18
  extensions = [
@@ -42,4 +42,4 @@ __all__ = [
42
42
  "ResourceConfig",
43
43
  ]
44
44
 
45
- __version__ = "0.2.1"
45
+ __version__ = "0.2.4"
@@ -229,13 +229,13 @@ def main() -> None:
229
229
  # ------------------------------------------------------------------
230
230
  # ANNOTATE BY
231
231
  # ------------------------------------------------------------------
232
+ label_col = 'SNP'
232
233
  if annotate:
233
- if str(annotate).upper() == "GENE":
234
+ if str(annotate).upper() == "GENE" and 'top_gene' in hits_table.columns:
234
235
  label_col = 'top_gene'
235
- elif str(annotate).upper() == "SNP":
236
- label_col = 'SNP'
237
- else:
236
+ elif label_col in hits_table.columns:
238
237
  label_col = annotate
238
+
239
239
 
240
240
  logger.info(f"Anotate by: {label_col}")
241
241
 
@@ -328,12 +328,12 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
328
328
  help="File delimiter (autodetected if omitted)."
329
329
  )
330
330
  opt.add_argument(
331
- "-bc", "--build_column", required=False, type=str, metavar="str",
331
+ "-bc", "--build_column", default=None, required=False, type=str, metavar="str",
332
332
  help=("Name of column containing genome build (hg18/hg19/hg38)."
333
333
  "Or use ``--build`` below to supply genome builds per summary stat file."
334
334
  ))
335
335
  opt.add_argument(
336
- "-b","--build", required=False, type=str, metavar='str',
336
+ "-b","--build", default=None, required=False, type=str, metavar='str',
337
337
  help=
338
338
  """Comma-sperated list of genome build of summary stats file(s) listed
339
339
  in the same order as sumstats files. e.g. hg19,hg38,hg38,hg19 means:
@@ -39,6 +39,7 @@ import pandas as pd
39
39
  from pycmplot.stats import get_lead_snps, get_highlight_snps
40
40
  from pycmplot.annotation import get_hits_summary_table
41
41
  from pycmplot.resources import ResourceConfig, default_resources
42
+ from pycmplot.constants import hg38_chr_lengths
42
43
 
43
44
  logger = logging.getLogger(__name__)
44
45
 
@@ -563,14 +564,6 @@ def prep_pycmplot_input_info(
563
564
  snp_candidates = [c for c in snp_candidates if c]
564
565
  pvl_candidates = [c for c in pvl_candidates if c]
565
566
 
566
- bld_candidates = []
567
- if buildc:
568
- bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
569
- bld_candidates_l = [x.lower() for x in bld_candidates]
570
- bld_candidates_u = [x.upper() for x in bld_candidates]
571
- bld_candidates = [buildc] + bld_candidates + bld_candidates_l + bld_candidates_u
572
- bld_candidates = [c for c in bld_candidates if c]
573
-
574
567
  # ------------------------------------------------------------------
575
568
  # Resolve column names per file
576
569
  # ------------------------------------------------------------------
@@ -610,21 +603,28 @@ def prep_pycmplot_input_info(
610
603
  pcol: float,
611
604
  }
612
605
 
613
- if not bld_candidates and isinstance(build, list):
606
+ # CHECK BUILD COLUMN
607
+ if buildc:
608
+ bld_candidates = buildc
609
+ else:
610
+ bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
611
+ bld_candidates_l = [x.lower() for x in bld_candidates]
612
+ bld_candidates_u = [x.upper() for x in bld_candidates]
613
+ bld_candidates = [buildc] + bld_candidates + bld_candidates_l + bld_candidates_u
614
+ bld_candidates = [c for c in bld_candidates if c]
615
+
616
+ try:
617
+ bcol = next(c for c in hdr if c in set(bld_candidates))
618
+ except Exception:
619
+ bcol = False
620
+
621
+
622
+ if not bcol and isinstance(build, list):
614
623
  for name, fpath, build in zip(labels, sum_stats, build):
615
624
  sumstats_hdr_dic[name] = [old_cols, col_dtypes, new_cols, file_sep, build]
616
625
 
617
- elif bld_candidates:
626
+ elif bcol:
618
627
  for name, fpath in zip(labels, sum_stats):
619
- try:
620
- bcol = next(c for c in hdr if c in set(bld_candidates))
621
- except StopIteration as exc:
622
- sys.exit(
623
- f"Error: could not find build column in {fpath}.\n"
624
- f" Header: {hdr}\n"
625
- f" Details: {exc}"
626
- )
627
-
628
628
  old_cols = [chrom_col, pos_col, snp_col, pcol, bcol]
629
629
  new_cols = {
630
630
  chrom_col: "CHR",
@@ -635,7 +635,7 @@ def prep_pycmplot_input_info(
635
635
  }
636
636
  col_dtypes = {
637
637
  chrom_col: str,
638
- pos_col: int,
638
+ pos_col: object,
639
639
  snp_col: str,
640
640
  pcol: float,
641
641
  bcol: str,
@@ -643,7 +643,7 @@ def prep_pycmplot_input_info(
643
643
 
644
644
  sumstats_hdr_dic[name] = [old_cols, col_dtypes, new_cols, file_sep]
645
645
  else:
646
- logger.warning("""No build column or builds specified. Summary stats will be
646
+ logger.warning("""No build column or builds detected. Summary stats will be
647
647
  plotted in their respective coordinate systems.
648
648
  If your data are in different coordinate systems, putting them in one plot
649
649
  is not advisable, especially if ``--annotate`` is set!""")
@@ -682,6 +682,7 @@ def get_sumstats_and_merged_sector_list(
682
682
  signif_line: Optional[float] = None,
683
683
  suggest_threshold: Optional[float] = None,
684
684
  resources: Optional[ResourceConfig] = None,
685
+ hg38_chr_lengths = hg38_chr_lengths,
685
686
  ):
686
687
  GET_SUMSTATS = """Load summary statistics, run liftover, extract lead SNPs, and compute
687
688
  merged Circos sector sizes.
@@ -824,7 +825,7 @@ def get_sumstats_and_merged_sector_list(
824
825
  ).rename(columns=sumstat_newcols)
825
826
 
826
827
  # Get dict of p-values for qq-plotting before applying trim_pval
827
- logger.info("Extracting raw p-values for qq-plotting ...")
828
+ logger.info("Extracting raw p-values for QQ-plotting ...")
828
829
  pval_dict[label] = df["P"].dropna().astype(float).values
829
830
 
830
831
 
@@ -834,7 +835,7 @@ def get_sumstats_and_merged_sector_list(
834
835
 
835
836
  # Trim insignificant variants for faster plotting
836
837
  if trim_pval:
837
- logger.info("Excluding variants with p-value less than %s ...", trim_pval)
838
+ logger.info("Excluding variants with p-value less than %s to speed up Manhattan plotting ...", trim_pval)
838
839
  df = df[df["P"].astype(float) <= float(trim_pval)]
839
840
  else:
840
841
  df = df[df["P"].astype(float) <= 1]
@@ -863,6 +864,7 @@ def get_sumstats_and_merged_sector_list(
863
864
  if "BUILD" in df.columns and "hg19" in df["BUILD"].unique():
864
865
  logger.info("Converting hg19 coordinates to hg38 ...")
865
866
  sumstats_loaded[label][0] = liftover_position(df, resources=resources)
867
+ liftover = True
866
868
 
867
869
  # Lead SNPs
868
870
  logger.info("Extracting variants to highlight ...")
@@ -959,6 +961,14 @@ def get_sumstats_and_merged_sector_list(
959
961
  sub = assoc[assoc["CHR"] == chrom]
960
962
  lo_val = max(sub["POS"].min() - 1_000_000, 0)
961
963
  hi_val = sub["POS"].max()
964
+
965
+ # Ensure sector sizes are within chrom ranges if liftover
966
+ #chrom_max = hi_val
967
+ #if liftover:
968
+ # hg38_chr_lengths = {k.replace("chr",""): v for k, v in hg38_chr_lengths.items()}
969
+ # chrom_max = hg38_chr_lengths[chrom]
970
+ #hi_val = min(hi_val, chrom_max)
971
+
962
972
  assoc_dic[str(chrom)] = [lo_val, hi_val]
963
973
 
964
974
  min_dic_val = min(assoc_dic.values())
@@ -30,6 +30,7 @@ import numpy as np
30
30
  import pandas as pd
31
31
 
32
32
  from pycmplot.resources import ResourceConfig, default_resources
33
+ from pycmplot.constants import hg38_chr_lengths
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
@@ -136,6 +137,7 @@ def liftover_hg19_to_hg38(
136
137
 
137
138
  def liftover_position(
138
139
  df: pd.DataFrame,
140
+ hg38_chr_lengths = hg38_chr_lengths,
139
141
  resources: Optional[ResourceConfig] = None,
140
142
  ) -> pd.DataFrame:
141
143
  LIFTOVER_POSITION = """Liftover all hg19 rows in *df* from hg19 to hg38 coordinates.
@@ -190,6 +192,8 @@ def liftover_position(
190
192
  True
191
193
  """
192
194
 
195
+ hg38_chr_lengths = {k.replace("chr",""): v for k, v in hg38_chr_lengths.items()}
196
+
193
197
  if resources is None:
194
198
  resources = default_resources
195
199
 
@@ -208,4 +212,10 @@ def liftover_position(
208
212
  df["BUILD"] = "hg38"
209
213
  df["POS"] = new_positions
210
214
  df["POS"] = df["POS"].fillna(0).astype(int)
211
- return df[df["POS"] != 0]
215
+ clean_df = []
216
+ for chrom in df["CHR"].unique():
217
+ chr_df = df[df["CHR"] == chrom]
218
+ chr_df = chr_df[chr_df["POS"] <= hg38_chr_lengths[chrom]]
219
+ clean_df = pd.concat([pd.DataFrame(clean_df), pd.DataFrame(chr_df)], axis = 0, ignore_index=True)
220
+
221
+ return clean_df[clean_df["POS"] != 0]
@@ -1,10 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycmplot
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
5
5
  Author: Kevin Esoh
6
6
  Author-email: Kevin Esoh <kesohku1@jh.edu>
7
7
  License-Expression: CC-BY-NC-SA-4.0
8
+ Project-URL: Homepage, https://github.com/esohkevin/pycmplot
9
+ Project-URL: Issues, https://github.com/esohkevin/pycmplot/issues
10
+ Project-URL: Docs, https://pycmplot.readthedocs.io/en/latest/
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Operating System :: OS Independent
8
13
  Requires-Python: >=3.9
9
14
  Description-Content-Type: text/markdown
10
15
  License-File: LICENSE
@@ -101,6 +106,9 @@ pip install pycmplot
101
106
  ```bash
102
107
  git clone https://github.com/esohkevin/pycmplot.git
103
108
 
109
+ # or with most recent updates from development branch
110
+ # git clone -b dev https://github.com/esohkevin/pycmplot.git
111
+
104
112
  cd pycmplot
105
113
 
106
114
  pip install -e .
@@ -207,26 +215,12 @@ A demonstration of how to use the python API is provided in this notebook: https
207
215
 
208
216
  ---
209
217
 
210
- ## Package structure
218
+ ## Contributing
219
+
220
+ See how to contribute here https://github.com/esohkevin/pycmplot?tab=contributing-ov-file
221
+
222
+
223
+ ## Contributors
224
+
225
+ - [Kevin Esoh](https://github.com/esohkevin)
211
226
 
212
- ```
213
- pycmplot/
214
- ├── pyproject.toml
215
- ├── setup.py
216
- ├── setup.cfg
217
- ├── README.md
218
- └── pycmplot/
219
- ├── __init__.py # public API exports
220
- ├── __main__.py # python -m pycmplot
221
- ├── _core.py # main() orchestration
222
- ├── cli.py # argparse definitions
223
- ├── constants.py # chromosome lengths, biotype weights
224
- ├── resources.py # external resource path config
225
- ├── io.py # sumstat loading, delimiter detection
226
- ├── stats.py # get_lead_snps, get_highlight_snps
227
- ├── liftover.py # lazy hg19→hg38 liftover
228
- ├── annotation.py # nearest-gene annotation, hits table
229
- └── plotting/
230
- ├── __init__.py
231
- ├── linear.py # plot_linear
232
- └── circular.py # plot_circular, compute_track_radii_dict
@@ -4,13 +4,19 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pycmplot"
7
- version = "0.2.2"
7
+ version = "0.2.4"
8
8
  description = "Multi-track circular and linear Manhattan plot generation for GWAS summary statistics"
9
9
  readme = "README.md"
10
10
  license = "CC-BY-NC-SA-4.0"
11
+ license-files = ["LICEN[CS]E*"]
11
12
  authors = [{ name = "Kevin Esoh", email = "kesohku1@jh.edu" }]
12
13
  requires-python = ">=3.9"
13
14
 
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+
14
20
  dependencies = [
15
21
  "pandas>=1.5",
16
22
  "numpy>=1.23",
@@ -25,6 +31,12 @@ dependencies = [
25
31
  [project.optional-dependencies]
26
32
  dev = ["pytest", "black", "ruff", "towncrier", "sphinx"]
27
33
 
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/esohkevin/pycmplot"
37
+ Issues = "https://github.com/esohkevin/pycmplot/issues"
38
+ Docs = "https://pycmplot.readthedocs.io/en/latest/"
39
+
28
40
  [tool.setuptools.package-data]
29
41
  pycmplot = ["data/*"]
30
42
 
@@ -36,7 +48,7 @@ where = ["."]
36
48
  exclude = [
37
49
  "dist*", "changelog.d*", "build-pypi-package.md",
38
50
  "pycmplot_python_api.ipynb", "mit-license.txt",
39
- "pycmplot_docs", "docs"
51
+ "pycmplot_docs", "docs", "CHANGELOG.rst"
40
52
  ]
41
53
 
42
54
 
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = pycmplot
3
- version = 0.2.2
3
+ version = 0.2.4
4
4
  author = Kevin Esoh
5
5
  author_email = kesohku1@jh.edu
6
6
  description = Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
File without changes
File without changes
File without changes
File without changes
File without changes