pycmplot 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycmplot/__init__.py CHANGED
@@ -42,4 +42,4 @@ __all__ = [
42
42
  "ResourceConfig",
43
43
  ]
44
44
 
45
- __version__ = "0.2.3"
45
+ __version__ = "0.2.4"
pycmplot/cli.py CHANGED
@@ -328,12 +328,12 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
328
328
  help="File delimiter (autodetected if omitted)."
329
329
  )
330
330
  opt.add_argument(
331
- "-bc", "--build_column", required=False, type=str, metavar="str",
331
+ "-bc", "--build_column", default=None, required=False, type=str, metavar="str",
332
332
  help=("Name of column containing genome build (hg18/hg19/hg38)."
333
333
  "Or use ``--build`` below to supply genome builds per summary stat file."
334
334
  ))
335
335
  opt.add_argument(
336
- "-b","--build", required=False, type=str, metavar='str',
336
+ "-b","--build", default=None, required=False, type=str, metavar='str',
337
337
  help=
338
338
  """Comma-sperated list of genome build of summary stats file(s) listed
339
339
  in the same order as sumstats files. e.g. hg19,hg38,hg38,hg19 means:
pycmplot/io.py CHANGED
@@ -564,15 +564,6 @@ def prep_pycmplot_input_info(
564
564
  snp_candidates = [c for c in snp_candidates if c]
565
565
  pvl_candidates = [c for c in pvl_candidates if c]
566
566
 
567
- #if buildc:
568
- bld_candidates = buildc
569
- if not bld_candidates:
570
- bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
571
- bld_candidates_l = [x.lower() for x in bld_candidates]
572
- bld_candidates_u = [x.upper() for x in bld_candidates]
573
- bld_candidates = [buildc] + bld_candidates + bld_candidates_l + bld_candidates_u
574
- bld_candidates = [c for c in bld_candidates if c]
575
-
576
567
  # ------------------------------------------------------------------
577
568
  # Resolve column names per file
578
569
  # ------------------------------------------------------------------
@@ -612,21 +603,28 @@ def prep_pycmplot_input_info(
612
603
  pcol: float,
613
604
  }
614
605
 
615
- if not bld_candidates and isinstance(build, list):
606
+ # CHECK BUILD COLUMN
607
+ if buildc:
608
+ bld_candidates = buildc
609
+ else:
610
+ bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
611
+ bld_candidates_l = [x.lower() for x in bld_candidates]
612
+ bld_candidates_u = [x.upper() for x in bld_candidates]
613
+ bld_candidates = [buildc] + bld_candidates + bld_candidates_l + bld_candidates_u
614
+ bld_candidates = [c for c in bld_candidates if c]
615
+
616
+ try:
617
+ bcol = next(c for c in hdr if c in set(bld_candidates))
618
+ except Exception:
619
+ bcol = False
620
+
621
+
622
+ if not bcol and isinstance(build, list):
616
623
  for name, fpath, build in zip(labels, sum_stats, build):
617
624
  sumstats_hdr_dic[name] = [old_cols, col_dtypes, new_cols, file_sep, build]
618
625
 
619
- elif bld_candidates:
626
+ elif bcol:
620
627
  for name, fpath in zip(labels, sum_stats):
621
- try:
622
- bcol = next(c for c in hdr if c in set(bld_candidates))
623
- except StopIteration as exc:
624
- sys.exit(
625
- f"Error: could not find build column in {fpath}.\n"
626
- f" Header: {hdr}\n"
627
- f" Details: {exc}"
628
- )
629
-
630
628
  old_cols = [chrom_col, pos_col, snp_col, pcol, bcol]
631
629
  new_cols = {
632
630
  chrom_col: "CHR",
@@ -645,7 +643,7 @@ def prep_pycmplot_input_info(
645
643
 
646
644
  sumstats_hdr_dic[name] = [old_cols, col_dtypes, new_cols, file_sep]
647
645
  else:
648
- logger.warning("""No build column or builds specified. Summary stats will be
646
+ logger.warning("""No build column or builds detected. Summary stats will be
649
647
  plotted in their respective coordinate systems.
650
648
  If your data are in different coordinate systems, putting them in one plot
651
649
  is not advisable, especially if ``--annotate`` is set!""")
@@ -963,15 +961,14 @@ def get_sumstats_and_merged_sector_list(
963
961
  sub = assoc[assoc["CHR"] == chrom]
964
962
  lo_val = max(sub["POS"].min() - 1_000_000, 0)
965
963
  hi_val = sub["POS"].max()
966
- chrom_max = hi_val
967
964
 
968
965
  # Ensure sector sizes are within chrom ranges if liftover
969
- if liftover:
970
- logger.info("Limiting sector sizes to chromosome ranges for liftover sumstats ...")
971
- hg38_chr_lengths = {k.replace("chr",""): v for k, v in hg38_chr_lengths.items()}
972
- chrom_max = hg38_chr_lengths[chrom]
966
+ #chrom_max = hi_val
967
+ #if liftover:
968
+ # hg38_chr_lengths = {k.replace("chr",""): v for k, v in hg38_chr_lengths.items()}
969
+ # chrom_max = hg38_chr_lengths[chrom]
970
+ #hi_val = min(hi_val, chrom_max)
973
971
 
974
- hi_val = min(hi_val, chrom_max)
975
972
  assoc_dic[str(chrom)] = [lo_val, hi_val]
976
973
 
977
974
  min_dic_val = min(assoc_dic.values())
pycmplot/liftover.py CHANGED
@@ -30,6 +30,7 @@ import numpy as np
30
30
  import pandas as pd
31
31
 
32
32
  from pycmplot.resources import ResourceConfig, default_resources
33
+ from pycmplot.constants import hg38_chr_lengths
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
@@ -136,6 +137,7 @@ def liftover_hg19_to_hg38(
136
137
 
137
138
  def liftover_position(
138
139
  df: pd.DataFrame,
140
+ hg38_chr_lengths = hg38_chr_lengths,
139
141
  resources: Optional[ResourceConfig] = None,
140
142
  ) -> pd.DataFrame:
141
143
  LIFTOVER_POSITION = """Liftover all hg19 rows in *df* from hg19 to hg38 coordinates.
@@ -190,6 +192,8 @@ def liftover_position(
190
192
  True
191
193
  """
192
194
 
195
+ hg38_chr_lengths = {k.replace("chr",""): v for k, v in hg38_chr_lengths.items()}
196
+
193
197
  if resources is None:
194
198
  resources = default_resources
195
199
 
@@ -208,4 +212,10 @@ def liftover_position(
208
212
  df["BUILD"] = "hg38"
209
213
  df["POS"] = new_positions
210
214
  df["POS"] = df["POS"].fillna(0).astype(int)
211
- return df[df["POS"] != 0]
215
+ clean_df = []
216
+ for chrom in df["CHR"].unique():
217
+ chr_df = df[df["CHR"] == chrom]
218
+ chr_df = chr_df[chr_df["POS"] <= hg38_chr_lengths[chrom]]
219
+ clean_df = pd.concat([pd.DataFrame(clean_df), pd.DataFrame(chr_df)], axis = 0, ignore_index=True)
220
+
221
+ return clean_df[clean_df["POS"] != 0]
@@ -1,10 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycmplot
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
5
5
  Author: Kevin Esoh
6
6
  Author-email: Kevin Esoh <kesohku1@jh.edu>
7
7
  License-Expression: CC-BY-NC-SA-4.0
8
+ Project-URL: Homepage, https://github.com/esohkevin/pycmplot
9
+ Project-URL: Issues, https://github.com/esohkevin/pycmplot/issues
10
+ Project-URL: Docs, https://pycmplot.readthedocs.io/en/latest/
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Operating System :: OS Independent
8
13
  Requires-Python: >=3.9
9
14
  Description-Content-Type: text/markdown
10
15
  License-File: LICENSE
@@ -101,6 +106,9 @@ pip install pycmplot
101
106
  ```bash
102
107
  git clone https://github.com/esohkevin/pycmplot.git
103
108
 
109
+ # or with most recent updates from development branch
110
+ # git clone -b dev https://github.com/esohkevin/pycmplot.git
111
+
104
112
  cd pycmplot
105
113
 
106
114
  pip install -e .
@@ -207,26 +215,12 @@ A demonstration of how to use the python API is provided in this notebook: https
207
215
 
208
216
  ---
209
217
 
210
- ## Package structure
218
+ ## Contributing
219
+
220
+ See how to contribute here https://github.com/esohkevin/pycmplot?tab=contributing-ov-file
221
+
222
+
223
+ ## Contributors
224
+
225
+ - [Kevin Esoh](https://github.com/esohkevin)
211
226
 
212
- ```
213
- pycmplot/
214
- ├── pyproject.toml
215
- ├── setup.py
216
- ├── setup.cfg
217
- ├── README.md
218
- └── pycmplot/
219
- ├── __init__.py # public API exports
220
- ├── __main__.py # python -m pycmplot
221
- ├── _core.py # main() orchestration
222
- ├── cli.py # argparse definitions
223
- ├── constants.py # chromosome lengths, biotype weights
224
- ├── resources.py # external resource path config
225
- ├── io.py # sumstat loading, delimiter detection
226
- ├── stats.py # get_lead_snps, get_highlight_snps
227
- ├── liftover.py # lazy hg19→hg38 liftover
228
- ├── annotation.py # nearest-gene annotation, hits table
229
- └── plotting/
230
- ├── __init__.py
231
- ├── linear.py # plot_linear
232
- └── circular.py # plot_circular, compute_track_radii_dict
@@ -1,10 +1,10 @@
1
- pycmplot/__init__.py,sha256=XH0PANwq6BAlDe7qjz-77DsRzXk5qhmgMIyNd_83TaE,1288
1
+ pycmplot/__init__.py,sha256=c87ZKZ-URgiW_M_LIlZF6jy1wkZ5fGByyO-o0kksTbM,1288
2
2
  pycmplot/_core.py,sha256=sA1kjavqEX9cLn98d8RU4YJSANCDtMbxbrgGFd8Trew,13046
3
3
  pycmplot/annotation.py,sha256=gMgDfnHmMYpkLuQIaJNHmtXVHHIeSUAcviLrisF2vmY,20886
4
- pycmplot/cli.py,sha256=k6uBR5V0Y89VSzq-GHZxfR0XmBGS6aEUt66fqYEmQ40,19430
4
+ pycmplot/cli.py,sha256=1sb7Gx2qScFyLOe923NNTDeNVnl0c6DM-wxLIqKmmwE,19457
5
5
  pycmplot/constants.py,sha256=XaT3pTWM3dkawU1cA0HFpaNnUupSjv28wpPgmnVEjL0,3431
6
- pycmplot/io.py,sha256=mODcBGxOOH1s7hnJg9FdUgGAEdkM7b1tX5C-Z3ZWFhw,35905
7
- pycmplot/liftover.py,sha256=ZawfO9ZKZADFwyXZBnbrovh4TnV-ja1qHHnIgtxSCBM,6942
6
+ pycmplot/io.py,sha256=drQ_3rnl8ZjC3HfYJd1FMZKPJ02vz8usy6bW8L2dFIg,35585
7
+ pycmplot/liftover.py,sha256=zGuzpcEChFz-EEY7gHHVFAkc5VhV2uLCa_qxSWavoxE,7397
8
8
  pycmplot/resources.py,sha256=r0zHy_-9wu98lkqKENYrptX54uO6np_x94_ju3v2KYE,6414
9
9
  pycmplot/stats.py,sha256=8TXHxfGc4sUr3rE3cHnS2mXfIS1PPj0YgDk1C-z2Pqk,5813
10
10
  pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz,sha256=kLldtgT5-k4ZzU5jN--woFZEuOaWe9pQ4g4hhB3sdQI,840666
@@ -13,10 +13,10 @@ pycmplot/data/hg19ToHg38.over.chain,sha256=oHPYkUIztVQtKXYauOxLOBUFKxOWSRnBKh77L
13
13
  pycmplot/plotting/circular.py,sha256=Xa_2GDW7lLKb8PeN_IL3_hvfkQaqcbQ68Q5QrxjCKs4,27295
14
14
  pycmplot/plotting/linear.py,sha256=lrioORKyOaz34UAOhBWg3W2kLq6nBT7HNkPxy8hX340,37416
15
15
  pycmplot/plotting/qq.py,sha256=ylPJj9gMlDqTOR7JWnQ7wCfvEBUczqEUVMsFiXXEt3s,20883
16
- pycmplot-0.2.3.dist-info/licenses/LICENSE,sha256=7HtJWU-I9Tayt7xnvHU0D6oVqeTp3hMqCbTxbkYBTZQ,20904
16
+ pycmplot-0.2.4.dist-info/licenses/LICENSE,sha256=7HtJWU-I9Tayt7xnvHU0D6oVqeTp3hMqCbTxbkYBTZQ,20904
17
17
  pycmplot_docs/docs/conf.py,sha256=gUt_OitflxpaOrIjeP2aYJ_LCWqTRRdmo_HIcVVf3hI,2992
18
- pycmplot-0.2.3.dist-info/METADATA,sha256=iAIL_nG9fv9JYJ9KA5n7jtqUzr8Cpe-XADv4oy1Asls,8108
19
- pycmplot-0.2.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
20
- pycmplot-0.2.3.dist-info/entry_points.txt,sha256=cE8IAltA_Q-QQuWQ5DE3Lv-9ktYQ_jyWaD6I97QbeyU,49
21
- pycmplot-0.2.3.dist-info/top_level.txt,sha256=gxbPirasq6TczoykxC2gfk5_En7R65BN-J5ADiV5i3c,23
22
- pycmplot-0.2.3.dist-info/RECORD,,
18
+ pycmplot-0.2.4.dist-info/METADATA,sha256=KHXVq9Q-SQ7mV7WzQAp5e4XaYyNAyqYsJDNKcPeDOpc,7683
19
+ pycmplot-0.2.4.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
20
+ pycmplot-0.2.4.dist-info/entry_points.txt,sha256=cE8IAltA_Q-QQuWQ5DE3Lv-9ktYQ_jyWaD6I97QbeyU,49
21
+ pycmplot-0.2.4.dist-info/top_level.txt,sha256=gxbPirasq6TczoykxC2gfk5_En7R65BN-J5ADiV5i3c,23
22
+ pycmplot-0.2.4.dist-info/RECORD,,