PyPI - pycmplot - Versions diffs - 0.2.1__tar.gz → 0.2.2__tar.gz - Mend

pycmplot 0.2.1tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{pycmplot-0.2.1/pycmplot.egg-info → pycmplot-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pycmplot
-Version: 0.2.1
+Version: 0.2.2
 Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
 Author: Kevin Esoh
 Author-email: Kevin Esoh <kesohku1@jh.edu>
@@ -183,7 +183,8 @@ pycmplot \
 | `-b, --build` | Comma-separated genome builds of sumstats  | off |
 | `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
 | `-m, --mode` | `lm` linear or `cm` circular | `lm` |
-| `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
+| `-qq, --qq_plot` | Also generate a QQ-plot | off |
+| `-qq_thin, --qq_thin` | Thin p-values for faster QQ-plotting | off |
 | `--logp` | Plot -log10(p) | off |
 | `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
 | `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |

{pycmplot-0.2.1 → pycmplot-0.2.2}/README.md RENAMED Viewed

@@ -157,7 +157,8 @@ pycmplot \
 | `-b, --build` | Comma-separated genome builds of sumstats  | off |
 | `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
 | `-m, --mode` | `lm` linear or `cm` circular | `lm` |
-| `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
+| `-qq, --qq_plot` | Also generate a QQ-plot | off |
+| `-qq_thin, --qq_thin` | Thin p-values for faster QQ-plotting | off |
 | `--logp` | Plot -log10(p) | off |
 | `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
 | `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |

{pycmplot-0.2.1 → pycmplot-0.2.2}/docs/conf.py RENAMED Viewed

@@ -12,7 +12,7 @@ sys.path.insert(0, os.path.abspath(".."))
 project = "pycmplot"
 copyright = "2026, Kevin Esoh"
 author = "Kevin Esoh"
-release = "0.2.1"  # update to match PyPI version
+release = "0.2.2"  # update to match PyPI version
 # -- General configuration -----------------------------------------------------
 extensions = [

{pycmplot-0.2.1 → pycmplot-0.2.2}/pycmplot/_core.py RENAMED Viewed

@@ -92,6 +92,7 @@ def main() -> None:
     )
     from pycmplot.plotting.linear import plot_linear
     from pycmplot.plotting.circular import plot_circular
+    from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
     from pycmplot.resources import ResourceConfig
     # ------------------------------------------------------------------
@@ -110,6 +111,13 @@ def main() -> None:
     labels_raw       = args.labels
     pcol_arg         = args.pval_column
     logp             = args.logp
+    qq               = args.qq_plot
+    qq_separate      = args.qq_separate
+    qq_ncols         = args.qq_ncols
+    qq_thin          = args.qq_thin
+    thin_below       = args.thin_below
+    qq_max_points    = args.qq_max_points
+    qq_overlay       = args.qq_overlay
     chrom_label_size = args.chrom_label_size
     chrom_label_side = args.chrom_label_side
     track_label_size = args.track_label_size
@@ -164,7 +172,8 @@ def main() -> None:
     # ------------------------------------------------------------------
     (
         plt_name,
-        table_out
+        table_out,
+        plt_base,
     ) = get_output_paths(
         labels,
         mode = mode,
@@ -202,6 +211,7 @@ def main() -> None:
         sumstats_loaded,
         hits_table,
         signif_lines,
+        pval_dict,
     ) = get_sumstats_and_merged_sector_list(
         sum_stats=sum_stats,
         labels=labels,
@@ -296,6 +306,52 @@ def main() -> None:
             figsize=(15, 9)
         )
+    # ------------------------------------------------------------------
+    # QQ PLOT
+    # ------------------------------------------------------------------
+    if qq and sumstats_loaded:
+        logger.info("Generating QQ Plot(s) ...")
+        qq_stem = f"{plt_base}_qq"
+        if qq_separate:
+            plot_qq_separate(
+                pval_dict=pval_dict,
+                thin=qq_thin,
+                thin_below=thin_below,
+                max_points=qq_max_points,
+                output_path=qq_stem,
+                colors=colors,
+                signif_threshold=signif_threshold or 5e-8,
+                dpi=dpi,
+                fig_format=output_format,
+            )
+        elif qq_overlay:
+            plot_qq_overlay(
+                pval_dict=pval_dict,
+                thin=qq_thin,
+                thin_below=thin_below,
+                max_points=qq_max_points,
+                colors=colors,
+                signif_threshold=signif_threshold or 5e-8,
+                dpi=dpi,
+                title=plot_title,
+                output_path=f"{qq_stem}_overlay",
+                fig_format=output_format,
+            )
+        else:
+            plot_qq_combined(
+                pval_dict=pval_dict,
+                thin=qq_thin,
+                thin_below=thin_below,
+                max_points=qq_max_points,
+                colors=colors,
+                ncols=qq_ncols,
+                signif_threshold=signif_threshold or 5e-8,
+                dpi=dpi,
+                title=plot_title,
+                output_path=f"{qq_stem}_combined",
+                fig_format=output_format,
+            )
 if __name__ == "__main__":
     main()

{pycmplot-0.2.1 → pycmplot-0.2.2}/pycmplot/cli.py RENAMED Viewed

@@ -329,29 +329,50 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
     )
     opt.add_argument(
         "-bc",   "--build_column",  required=False, type=str, metavar="str",
-        help="Name of column containing genome build (hg18/hg19/hg38). Or use ``--build`` below to supply genome builds per summary stat file."
-    )
+                     help=("Name of column containing genome build (hg18/hg19/hg38)."
+                         "Or use ``--build`` below to supply genome builds per summary stat file."
+                    ))
     opt.add_argument(
-        "-b","--build",
-        help="""
-        Comma-sperated list of genome build of summary stats file(s) listed in the same order as sumstats files.
-        (e.g. hg19,hg38,hg38,hg19 means:
-            file1.txt.gz --> hg19
-            file2.txt.gz --> hg38
-            file3.tsv --> hg38 ... etc)
-        """,
-        required=False,
-        type=str,
-        metavar='str'
+        "-b","--build", required=False, type=str, metavar='str',
+        help=
+        """Comma-sperated list of genome build of summary stats file(s) listed
+        in the same order as sumstats files. e.g. hg19,hg38,hg38,hg19 means:
+        file1.txt.gz --> hg19
+        file2.txt.gz --> hg38
+        file3.tsv --> hg38 ... etc
+        """
     )
     opt.add_argument(
         "--logp", action="store_true",
         help="Plot −log₁₀(p) instead of raw p-values."
     )
-    opt.add_argument(
-        "-qq", "--qq_plot", action="store_true",
-        help="Also generate a QQ-plot."
-    )
+    opt.add_argument("-qq", "--qq_plot", action="store_true",
+                     help="Generate QQ-plot(s) alongside the Manhattan plot.")
+    opt.add_argument("-qq_sep", "--qq_separate", action="store_true",
+                     help=(
+                         "Save one QQ-plot file per sumstat instead of a "
+                         "combined multi-panel figure. Only used when -qq is set."
+                     ))
+    opt.add_argument("-qq_cols", "--qq_ncols", default=3, type=int, metavar="int",
+                     help="Number of columns in the combined QQ-plot grid (default: 3).")
+    opt.add_argument("-qq_thin", "--qq_thin", action="store_true", default=False,
+                     help=(
+                         "Thin null-like p-values before QQ plotting for speed (default: off)."
+                         "Include this flag to turn on for speed."
+                    ))
+    opt.add_argument("-thin_below", "--thin_below", type=float, metavar="float", default=0.01,
+                     help=(
+                         "P-value threshold below which all points are always kept."
+                         "Points above this threshold are downsampled (default: 0.01)."
+                     ))
+    opt.add_argument("-qq_max_pts", "--qq_max_points", default=50000, type=int, metavar="int",
+                     help="Max points to plot per QQ track after thinning (default: 50000).")
+    opt.add_argument("-qq_ov", "--qq_overlay", action="store_true",
+                     help=(
+                         "Plot all sumstats on a single overlaid QQ-plot, "
+                         "each coloured by label with lambda in the legend. "
+                         "Only used when -qq is set."
+                     ))
     opt.add_argument(
         "-tp", "--trim_pval", type=float, metavar="float",
         help="Trim variants with p > this value before plotting."

{pycmplot-0.2.1 → pycmplot-0.2.2}/pycmplot/io.py RENAMED Viewed

@@ -291,7 +291,9 @@ def strip_comma_separated_input_streams(
     if builds:
         builds  = [s.strip() for s in builds.strip().split(",")]
-        if len(sum_stats) != len(labels) != len(builds):
+        if len(sum_stats) == len(labels) == len(builds):
+            pass
+        else:
             sys.exit(
                 "Error: number of summary stats files, labels, and builds must match.\n"
                 f"  Files:  {sum_stats}\n"
@@ -429,16 +431,16 @@ def get_output_paths(
     labels = [re.sub(r"[^a-zA-Z0-9\s]", "", x).replace(" ", "_") for x in labels]
-    plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}")
     suffix     = "_logp" if logp else "_pval"
-    plt_name   = f"{plt_base}{suffix}.{output_format.lower()}"
+    plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}{suffix}")
+    plt_name   = f"{plt_base}.{output_format.lower()}"
-    table_out  = f"{plt_base}{suffix}_locus_summary_table.tsv"
+    table_out  = f"{plt_base}_locus_summary_table.tsv"
-    return plt_name, table_out
+    return plt_name, table_out, plt_base
@@ -561,11 +563,13 @@ def prep_pycmplot_input_info(
     snp_candidates = [c for c in snp_candidates if c]
     pvl_candidates = [c for c in pvl_candidates if c]
-    bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
-    bld_candidates_l = [x.lower() for x in bld_candidates]
-    bld_candidates_u = [x.upper() for x in bld_candidates]
-    bld_candidates = [build] + bld_candidates + bld_candidates_l + bld_candidates_u
-    bld_candidates = [c for c in bld_candidates if c]
+    bld_candidates = []
+    if buildc:
+        bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
+        bld_candidates_l = [x.lower() for x in bld_candidates]
+        bld_candidates_u = [x.upper() for x in bld_candidates]
+        bld_candidates = [buildc] + bld_candidates + bld_candidates_l + bld_candidates_u
+        bld_candidates = [c for c in bld_candidates if c]
     # ------------------------------------------------------------------
     # Resolve column names per file
@@ -795,6 +799,7 @@ def get_sumstats_and_merged_sector_list(
     }
     sumstats_loaded: dict[str, list] = {}
+    pval_dict: dict[str, np.ndarray | pd.Series] = {}
     all_lead_snps: list[pd.DataFrame] = []
     for label in sumstats.keys() & (file_info or {}).keys():
@@ -818,6 +823,11 @@ def get_sumstats_and_merged_sector_list(
             dtype=sumstat_dtypes,
         ).rename(columns=sumstat_newcols)
+        # Get dict of p-values for qq-plotting before applying trim_pval
+        logger.info("Extracting raw p-values for qq-plotting ...")
+        pval_dict[label] = df["P"].dropna().astype(float).values
         # Add build column if not exist and build supplied
         if build:
             df['BUILD'] = build
@@ -917,7 +927,13 @@ def get_sumstats_and_merged_sector_list(
         for _ in sumstats
     ]
-    # Optionally sort tracks
+    # sort dicts by user-supplied order
+    sumstats_loaded = {key: sumstats_loaded[key] for key in labels if key in sumstats_loaded}
+    pval_dict = {key: pval_dict[key] for key in labels if key in pval_dict}
+    # or sort by user option
     if sort_tracks is not None:
         if sort_tracks.lower() == "label":
             sumstats_loaded = dict(sorted(sumstats_loaded.items()))
@@ -928,6 +944,7 @@ def get_sumstats_and_merged_sector_list(
                     key=lambda item: (item[0], natsort.natsort_keygen()(item[1][1])),
                 )
             )
     # Compute per-sumstat sector sizes (chrom → [min_pos, max_pos])
     assoc_sector_sizes_list: list[dict] = []
@@ -941,7 +958,7 @@ def get_sumstats_and_merged_sector_list(
         for chrom in assoc["CHR"].unique():
             sub = assoc[assoc["CHR"] == chrom]
             lo_val = max(sub["POS"].min() - 1_000_000, 0)
-            hi_val = sub["POS"].max() + 1_000_000
+            hi_val = sub["POS"].max()
             assoc_dic[str(chrom)] = [lo_val, hi_val]
         min_dic_val = min(assoc_dic.values())
@@ -955,9 +972,9 @@ def get_sumstats_and_merged_sector_list(
     # Add spacer sector for y-axis labelling
     if min_dic_val is not None:
-        if len(labels) <= 5:
-            merged["Spacer1"] = [x + x / 2 for x in min_dic_val]
-        else:
-            merged["Spacer1"] = [x * 2 for x in min_dic_val]
+        #if len(labels) <= 5:
+        #    merged["Spacer1"] = [x + x / 2 for x in min_dic_val]
+        #else:
+        merged["Spacer1"] = [x * 2 for x in min_dic_val]
-    return merged, sumstats_loaded, hits_table, signif_lines
+    return merged, sumstats_loaded, hits_table, signif_lines, pval_dict

{pycmplot-0.2.1 → pycmplot-0.2.2}/pycmplot/plotting/circular.py RENAMED Viewed

@@ -380,6 +380,7 @@ def plot_circular(
     annotate: str = None,
     label_col: str = None,
     chrom_label_side: str = 'inside',
+    chrom_label_size: float = 6,
     signif_line: float = 5e-8,
     highlight: bool = False,
     highlight_thresh: float = 5e-8,
@@ -387,7 +388,6 @@ def plot_circular(
     highlight_line: bool = False,
     highlight_line_color: str = 'grey',
     colors: list[str] = ['steelblue', 'grey'],
-    chrom_label_size: float = 6,
     track_label_size: float = 6,
     track_label_orientation: str = 'vertical',
     hits_table: pd.DataFrame = None,
@@ -526,7 +526,8 @@ def plot_circular(
     labels = list(sumstats_loaded.keys())
     (
         plt_name,
-        table_out
+        table_out,
+        plt_base,
     ) = get_output_paths(
         labels,
         mode='cm',
@@ -558,14 +559,16 @@ def plot_circular(
     radii_reversed = dict(reversed(list(radii.items())))
     inside_loc  = r_min - 3
-    outside_loc = 105
-    chrom_label_loc = outside_loc if chrom_label_side == "outside" else inside_loc
+    outside_loc = r_max + 4
     if annotate:
         annot_key = next(iter(radii_reversed))
         annot_r   = radii_reversed.pop(annot_key)
+        outside_loc = max(list(radii_reversed.values())[0]) + 2
         radii_reversed["annot_track_r"] = annot_r
+    chrom_label_loc = outside_loc if chrom_label_side == "outside" else inside_loc
     for index, (sector_radius, sumstats_key, sumstats_value, signif_dict) in enumerate(
         zip(
             radii_reversed.values(),
@@ -647,7 +650,7 @@ def plot_circular(
                         x=pos,
                         label=str(label),
                         min_r=r_low,
-                        max_r=r_low + 3,
+                        max_r=r_low + 6,
                         label_size=annotation_size,
                         text_kws={
                             "size": "large",
@@ -668,8 +671,9 @@ def plot_circular(
                             r=[sector_min_r, r_low],
                             start=pos,
                             end=pos,
+                            alpha=0.4,
                             color=highlight_line_color,
-                            lw=0.5,
+                            lw=0.4,
                             ls="--",
                         )
@@ -709,6 +713,6 @@ def plot_circular(
     if plt_name:
         fig.savefig(fname=plt_name.lower(), dpi=dpi)
-        logger.info("Saved circular Manhattan plot: %s", plt_name)
+        logger.info("Saved circular Manhattan plot: %s", plt_name.lower())
     return fig

{pycmplot-0.2.1 → pycmplot-0.2.2}/pycmplot/plotting/linear.py RENAMED Viewed

@@ -100,7 +100,7 @@ def _cluster_annotations_by_chr(
     return clusters
-def _draw_annotation_arrows_2(
+def _draw_annotation_arrows(
     ax,
     annot_df,
     chr_col: str,
@@ -212,7 +212,7 @@ def _draw_annotation_arrows_2(
-def _draw_annotation_arrows(
+def _draw_annotation_arrows_2(
     ax,
     annot_df,
     chr_col: str,
@@ -361,6 +361,118 @@ def _draw_annotation_arrows(
         last_xtext = max(x_texts)
+# Using cumulative distance for anntations and separating clusters
+def _draw_annotation_arrows_3(
+    ax,
+    annot_df,
+    chr_col: str,
+    label_col: str,
+    offsets: dict,
+    chr_max: dict,
+    spread_width: float = 60e6,
+    isolation_threshold: float = 80e6,
+    stack_threshold: float = 10e6,
+    y_text_base: float = 0.55,
+    y_stack_step: float = 0.02,
+    max_rad: float = 0.35,
+    y_tip: float = 0.0,
+) -> None:
+    annot_df = annot_df.sort_values(by=[chr_col, "x"], key=natsort_keygen())
+    last_xtext = 0 - spread_width
+    for chr_name, df_chr in annot_df.groupby(chr_col, sort=False):
+        df_chr    = df_chr.sort_values("x")
+        chr_start = offsets[chr_name]
+        chr_end   = offsets[chr_name] + chr_max[chr_name]
+        chr_range = chr_end - chr_start
+        x_signals = df_chr["x"].values
+        labels    = df_chr[label_col].values
+        n         = len(x_signals)
+        # ------------------------------------------------------------------
+        # Compute label x positions (spread or straight)
+        # ------------------------------------------------------------------
+        x_texts = []
+        for k, x_sig in enumerate(x_signals):
+            neighbours = np.delete(x_signals, k)
+            min_dist   = np.min(np.abs(neighbours - x_sig)) if len(neighbours) else np.inf
+            if min_dist >= isolation_threshold:
+                x_texts.append(x_sig)          # Tier 1: sit directly above
+            else:
+                x_texts.append(None)            # Tier 2: needs spreading
+        spread_indices = [k for k, v in enumerate(x_texts) if v is None]
+        if spread_indices:
+            sw  = spread_width
+            pad = sw / int(str(sw)[:2]) / 2
+            while sw > chr_range and sw > pad:
+                sw -= pad
+            sig_start = x_signals[spread_indices[0]]
+            xmin      = sig_start - sw
+            positions = np.arange(xmin, xmin + len(spread_indices) * sw, sw)
+            while positions[0] <= last_xtext:
+                positions = positions + sw
+            for j, k in enumerate(spread_indices):
+                x_texts[k] = positions[j]
+        # ------------------------------------------------------------------
+        # Compute label y positions using cumulative x distance
+        # ------------------------------------------------------------------
+        y_texts = [y_text_base] * n
+        for k in range(1, n):
+            cum_dist = abs(x_texts[k] - x_texts[k - 1])
+            if cum_dist <= stack_threshold:
+                # too close to previous label — stack upward adaptively
+                y_texts[k] = y_texts[k - 1] + y_stack_step + (
+                    y_stack_step * (1 - cum_dist / stack_threshold)
+                )
+            else:
+                y_texts[k] = y_text_base     # far enough — reset to baseline
+        # ------------------------------------------------------------------
+        # Draw arrows and labels
+        # ------------------------------------------------------------------
+        for x_sig, x_txt, y_txt, label in zip(x_signals, x_texts, y_texts, labels):
+            dx  = x_txt - x_sig
+            rad = np.clip(dx / (spread_width * 2), -max_rad, max_rad)
+            arrow = FancyArrowPatch(
+                (x_txt, y_txt),
+                (x_sig, y_tip - 0.05),
+                arrowstyle="-|>",
+                mutation_scale=12,
+                lw=0.6,
+                color="grey",
+                alpha=0.5,
+                connectionstyle=f"arc3,rad={rad}",
+                transform=ax.transData,
+            )
+            ax.add_patch(arrow)
+            ax.text(
+                x_txt,
+                y_txt + 0.02,
+                str(label),
+                rotation=45,
+                ha="left",
+                va="bottom",
+                fontsize=10,
+                clip_on=False,
+                color="black",
+                fontstyle="italic",
+                fontweight="regular",
+            )
+        last_xtext = max(x_texts)
 # ---------------------------------------------------------------------------
 # Public function
 # ---------------------------------------------------------------------------
@@ -647,7 +759,8 @@ def plot_linearm(
     # Annotation track
     # ------------------------------------------------------------------
     if annotate and annot_df is not None:
-        """
         _draw_annotation_arrows(
             ax_annot,
             annot_df,
@@ -657,9 +770,10 @@ def plot_linearm(
             chr_max=chr_max,
             spread_width=60e6,
         )
-        """
-        _draw_annotation_arrows(
+        """
+        _draw_annotation_arrows_2(
             ax=ax_annot,
             annot_df=annot_df,
             chr_col=chr_col,
@@ -667,13 +781,31 @@ def plot_linearm(
             offsets=offsets,
             chr_max=chr_max,
             spread_width=60e6,
-            isolation_threshold=80e6,   # above this → straight (Tier 1)
+            isolation_threshold=40e6,   # above this → straight (Tier 1)
             stack_threshold=10e6,       # below this → stack (Tier 3)
             max_tilt=45,                # max angleA departure from vertical
             y_tip=0.0,
             y_text=0.55,
             y_stack_step=0.12,          # vertical gap between stacked labels
         )
+        _draw_annotation_arrows_3(
+            ax=ax_annot,
+            annot_df=annot_df,
+            chr_col=chr_col,
+            label_col=label_col,
+            offsets=offsets,
+            chr_max=chr_max,
+            spread_width=60e6,
+            isolation_threshold=80e6,
+            stack_threshold=90e6,
+            y_text_base=0.55,
+            y_stack_step=0.03,
+            max_rad=0.35,
+            y_tip=0.0,
+        )
+        """
         ax_annot.set_ylim(0, 1)
         ax_annot.axis("off")
@@ -859,12 +991,11 @@ def plot_linear(
             logger.info("'SNP' column is used for annotation since '%s' column could not be resolved in hits table.", label_col)
             pass
-        logger.info(f"LABEL COL: {label}")
     # plot name
     (
         plt_name,
-        table_out
+        table_out,
+        plt_base,
     ) = get_output_paths(
         labels = t_labels,
         mode='lm',

pycmplot 0.2.1__tar.gz → 0.2.2__tar.gz

pycmplot 0.2.1tar.gz → 0.2.2tar.gz