pycmplot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycmplot/io.py ADDED
@@ -0,0 +1,342 @@
1
+ """
2
+ pycmplot.io
3
+ ===========
4
+ Summary statistics loading, delimiter detection, and sector-size computation.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import csv
10
+ import gzip
11
+ import logging
12
+ from collections import defaultdict
13
+ from pathlib import Path
14
+ from typing import Optional
15
+
16
+ import natsort
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from pycmplot.stats import get_lead_snps, get_highlight_snps
21
+ from pycmplot.annotation import get_hits_summary_table
22
+ from pycmplot.resources import ResourceConfig, default_resources
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # File utilities
29
+ # ---------------------------------------------------------------------------
30
+
31
+ def smart_open(file_path: str):
32
+ """Open a regular or gzip-compressed file transparently."""
33
+ path = Path(file_path)
34
+ if path.suffix == ".gz":
35
+ return gzip.open(file_path, "rt")
36
+ return open(file_path, "r")
37
+
38
+
39
+ def resolve_delimiter(delim: str) -> str:
40
+ """Map a human-readable delimiter name to the actual separator character."""
41
+ if not isinstance(delim, str):
42
+ raise TypeError("Delimiter must be a string.")
43
+
44
+ mapping = {
45
+ "space": " ",
46
+ "tab": "\t",
47
+ "comma": ",",
48
+ "colon": ":",
49
+ "semi-colon": ";",
50
+ "semicolon": ";",
51
+ }
52
+ key = delim.strip().lower()
53
+ if key in mapping:
54
+ return mapping[key]
55
+ if len(key) == 1:
56
+ return key # allow bare characters like '\t'
57
+ raise ValueError(
58
+ f"Invalid delimiter '{delim}'. "
59
+ "Choose from: space, tab, comma, colon, semi-colon."
60
+ )
61
+
62
+
63
+ def detect_delimiter(file_path: str, sample_size: int = 5_000):
64
+ """Automatically detect the delimiter using :mod:`csv.Sniffer`.
65
+
66
+ Returns
67
+ -------
68
+ (delimiter_str, dialect_or_None)
69
+ """
70
+ with smart_open(file_path) as f:
71
+ sample = f.read(sample_size)
72
+
73
+ try:
74
+ dialect = csv.Sniffer().sniff(sample)
75
+ return dialect.delimiter, dialect
76
+ except csv.Error:
77
+ return _fallback_delimiter(sample), None
78
+
79
+
80
+ def _fallback_delimiter(sample: str) -> str:
81
+ candidates = [",", "\t", " ", ";", "|"]
82
+ counts = {d: sample.count(d) for d in candidates}
83
+ best = max(counts, key=counts.get)
84
+ if counts[best] == 0:
85
+ raise ValueError("Unable to detect delimiter automatically.")
86
+ return best
87
+
88
+
89
+ def get_file_header(
90
+ file_path: str,
91
+ delim: Optional[str] = None,
92
+ dialect=None,
93
+ ) -> list[str]:
94
+ """Return the column names from the first line of *file_path*."""
95
+ with smart_open(file_path) as f:
96
+ try:
97
+ if delim:
98
+ reader = csv.DictReader(f)
99
+ hdr = f"{delim}".join(reader.fieldnames or []).split(delim)
100
+ elif dialect:
101
+ reader = csv.DictReader(f, dialect=dialect)
102
+ hdr = reader.fieldnames or []
103
+ else:
104
+ reader = csv.DictReader(f)
105
+ hdr = reader.fieldnames or []
106
+ except csv.Error:
107
+ logger.warning("Header could not be determined for %s", file_path)
108
+ hdr = []
109
+ return list(hdr)
110
+
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # Sector-size helpers
114
+ # ---------------------------------------------------------------------------
115
+
116
+ def _merge_min_max_lists(dicts: list[dict]) -> dict:
117
+ """Merge per-chromosome [min, max] lists across multiple sumstats."""
118
+ temp: dict = defaultdict(list)
119
+ for d in dicts:
120
+ for key, values in d.items():
121
+ temp[key].extend(values)
122
+ return {k: [min(v), max(v)] for k, v in temp.items()}
123
+
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Main loader
127
+ # ---------------------------------------------------------------------------
128
+
129
+ def get_sumstats_and_merged_sector_list(
130
+ sum_stats: list[str],
131
+ labels: list[str],
132
+ logp: bool = False,
133
+ trim_pval: Optional[float] = None,
134
+ file_info: Optional[dict] = None,
135
+ sort_tracks: Optional[str] = "chrom_len",
136
+ table_out: Optional[str] = None,
137
+ highlight: bool = False,
138
+ highlight_thresh: float = 5e-8,
139
+ signif_threshold: Optional[float] = None,
140
+ signif_line: Optional[float] = None,
141
+ suggest_threshold: Optional[float] = None,
142
+ resources: Optional[ResourceConfig] = None,
143
+ ):
144
+ """Load summary statistics and compute merged Circos sector sizes.
145
+
146
+ Parameters
147
+ ----------
148
+ sum_stats:
149
+ List of file paths to GWAS summary statistics (possibly gzip-compressed).
150
+ labels:
151
+ Track labels in the same order as *sum_stats*.
152
+ file_info:
153
+ Dict keyed by label; each value is a list
154
+ ``[col_names, col_dtypes, rename_map, sep]``.
155
+ sort_tracks:
156
+ ``'label'`` — sort tracks alphabetically by label.
157
+ ``'chrom_len'`` — sort by number of chromosomes (default).
158
+ ``None`` — preserve input order.
159
+ highlight:
160
+ Whether to flag loci for highlighting.
161
+ resources:
162
+ :class:`~pycmplot.resources.ResourceConfig` instance.
163
+
164
+ Returns
165
+ -------
166
+ (merged_sector_sizes, sumstats_loaded, hits_table, signif_lines)
167
+ """
168
+ if resources is None:
169
+ resources = default_resources
170
+
171
+ from pycmplot.liftover import liftover_position
172
+
173
+ # Build a label → file path mapping
174
+ sumstats: dict[str, list] = {
175
+ name: [path] for name, path in zip(labels, sum_stats)
176
+ }
177
+
178
+ sumstats_loaded: dict[str, list] = {}
179
+ all_lead_snps: list[pd.DataFrame] = []
180
+
181
+ for label in sumstats.keys() & (file_info or {}).keys():
182
+ sumstat_cols = file_info[label][0]
183
+ sumstat_dtypes = file_info[label][1]
184
+ sumstat_newcols= file_info[label][2]
185
+ sep = file_info[label][3]
186
+
187
+ logger.info("Loading %s from %s …", label, sumstats[label][0])
188
+ df = pd.read_csv(
189
+ sumstats[label][0],
190
+ sep=sep,
191
+ header=0,
192
+ usecols=sumstat_cols,
193
+ dtype=sumstat_dtypes,
194
+ ).rename(columns=sumstat_newcols)
195
+
196
+ # Trim insignificant variants for faster plotting
197
+ if trim_pval:
198
+ logger.info("Excluding variants with p-value less than %s ...", trim_pval)
199
+ df = df[df["P"].astype(float) <= float(trim_pval)]
200
+ else:
201
+ df = df[df["P"].astype(float) <= 1]
202
+
203
+ if logp:
204
+ logger.info("Adding a 'logP' column ...")
205
+ df["logP"] = -np.log10(df["P"])
206
+
207
+ df["LABEL"] = label
208
+
209
+ # Normalise chromosome names
210
+ logger.info('Normalizing chromosome names {"23": "X", "24": "Y", "M": "MT", "MTDNA": "MT"} ...')
211
+ df["CHR"] = (
212
+ df["CHR"]
213
+ .str.replace("chr", "", regex=False)
214
+ .dropna()
215
+ .str.upper()
216
+ .replace({"23": "X", "24": "Y", "M": "MT", "MTDNA": "MT"})
217
+ )
218
+
219
+ # Number of distinct chromosomes (for track sorting)
220
+ n_chroms = len(df["CHR"].unique()) - 1
221
+ sumstats_loaded[label] = [df, n_chroms]
222
+
223
+ # Liftover hg19 data if needed
224
+ if "BUILD" in df.columns and "hg19" in df["BUILD"].unique():
225
+ logger.info("Converting hg19 coordinates to hg38 ...")
226
+ sumstats_loaded[label][0] = liftover_position(df, resources=resources)
227
+
228
+ # Lead SNPs / highlight SNPs
229
+ if highlight:
230
+ logger.info("Extracting variants to highlight ...")
231
+ sumstats_loaded[label][0], leads = get_highlight_snps(
232
+ df=sumstats_loaded[label][0],
233
+ window=2_000_000,
234
+ highlight_thresh=highlight_thresh,
235
+ logp=True,
236
+ )
237
+ else:
238
+ leads = get_lead_snps(
239
+ df=sumstats_loaded[label][0],
240
+ highlight_thresh=signif_threshold or 5e-8,
241
+ logp=True,
242
+ )
243
+
244
+ all_lead_snps.append(leads)
245
+
246
+ # Combine lead SNPs and filter to significance threshold
247
+ all_lead_snps_df = (
248
+ pd.concat(all_lead_snps, ignore_index=True).drop_duplicates()
249
+ if all_lead_snps
250
+ else pd.DataFrame()
251
+ )
252
+ if not all_lead_snps_df.empty and signif_threshold:
253
+ all_lead_snps_df = all_lead_snps_df[
254
+ all_lead_snps_df["P"] <= signif_threshold
255
+ ]
256
+
257
+ hits_table = (
258
+ get_hits_summary_table(
259
+ leads_df=all_lead_snps_df,
260
+ table_out=table_out,
261
+ window_kb=2_000,
262
+ resources=resources,
263
+ )
264
+ if not all_lead_snps_df.empty
265
+ else pd.DataFrame()
266
+ )
267
+
268
+ # Derive significance/suggestive thresholds
269
+ if not signif_threshold:
270
+ if trim_pval:
271
+ signif_threshold = 5e-8
272
+ elif sumstats_loaded:
273
+ last_label = list(sumstats_loaded)[-1]
274
+ n = len(sumstats_loaded[last_label][0]["P"])
275
+ signif_threshold = max(0.05 / n, 5e-8)
276
+ else:
277
+ signif_threshold = 5e-8
278
+
279
+ if not suggest_threshold:
280
+ suggest_threshold = 1e-5
281
+
282
+ suggest_line = suggest_threshold
283
+ if logp:
284
+ suggest_line = -np.log10(suggest_threshold)
285
+
286
+ if signif_line is None:
287
+ signif_line = signif_threshold
288
+ if logp:
289
+ signif_line = -np.log10(signif_threshold)
290
+ else:
291
+ if logp and signif_line < 1:
292
+ signif_line = -np.log10(signif_line)
293
+
294
+ signif_lines = [
295
+ {"genome": signif_line, "suggestive": suggest_line}
296
+ for _ in sumstats
297
+ ]
298
+
299
+ # Optionally sort tracks
300
+ if sort_tracks is not None:
301
+ if sort_tracks.lower() == "label":
302
+ sumstats_loaded = dict(sorted(sumstats_loaded.items()))
303
+ else: # chrom_len
304
+ sumstats_loaded = dict(
305
+ sorted(
306
+ sumstats_loaded.items(),
307
+ key=lambda item: (item[0], natsort.natsort_keygen()(item[1][1])),
308
+ )
309
+ )
310
+
311
+ # Compute per-sumstat sector sizes (chrom → [min_pos, max_pos])
312
+ assoc_sector_sizes_list: list[dict] = []
313
+ min_dic_val = None
314
+
315
+ for df, _n in sumstats_loaded.values():
316
+ assoc = df[~(df["CHR"].str.len() > 2)].copy()
317
+ assoc["POS"] = assoc["POS"].fillna(0).astype(int)
318
+
319
+ assoc_dic: dict[str, list] = {}
320
+ for chrom in assoc["CHR"].unique():
321
+ sub = assoc[assoc["CHR"] == chrom]
322
+ lo_val = max(sub["POS"].min() - 1_000_000, 0)
323
+ hi_val = sub["POS"].max() + 1_000_000
324
+ assoc_dic[str(chrom)] = [lo_val, hi_val]
325
+
326
+ min_dic_val = min(assoc_dic.values())
327
+ assoc_sector_sizes_list.append(assoc_dic)
328
+
329
+ merged = _merge_min_max_lists(assoc_sector_sizes_list)
330
+ merged = dict(natsort.natsorted(merged.items(), key=lambda item: item[0]))
331
+
332
+ if "23" in merged:
333
+ merged["X"] = merged.pop("23")
334
+
335
+ # Add spacer sector for y-axis labelling
336
+ if min_dic_val is not None:
337
+ if len(labels) <= 5:
338
+ merged["Spacer1"] = [x + x / 2 for x in min_dic_val]
339
+ else:
340
+ merged["Spacer1"] = [x * 2 for x in min_dic_val]
341
+
342
+ return merged, sumstats_loaded, hits_table, signif_lines
pycmplot/liftover.py ADDED
@@ -0,0 +1,111 @@
1
+ """
2
+ pycmplot.liftover
3
+ =================
4
+ Genome coordinate liftover utilities (hg19 → hg38).
5
+
6
+ The :class:`pyliftover.LiftOver` object is initialised **lazily** — it is
7
+ created only when ``liftover_position`` is first called, so importing this
8
+ module never raises a :class:`FileNotFoundError` even if the chain file has
9
+ not been configured yet.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ from typing import Optional
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from pycmplot.resources import ResourceConfig, default_resources
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Lazy singleton — one LiftOver object per chain file path
26
+ # ---------------------------------------------------------------------------
27
+ _lo_cache: dict[str, object] = {}
28
+
29
+
30
+ def _get_liftover(chain_path: str):
31
+ """Return a cached :class:`~pyliftover.LiftOver` for *chain_path*."""
32
+ if chain_path not in _lo_cache:
33
+ from pyliftover import LiftOver # deferred import
34
+
35
+ logger.info("Loading LiftOver chain file: %s", chain_path)
36
+ _lo_cache[chain_path] = LiftOver(chain_path)
37
+ return _lo_cache[chain_path]
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Public helpers
42
+ # ---------------------------------------------------------------------------
43
+
44
+ def liftover_hg19_to_hg38(
45
+ chrom: str,
46
+ pos: int,
47
+ resources: Optional[ResourceConfig] = None,
48
+ ) -> Optional[int]:
49
+ """Convert a single hg19 coordinate to hg38.
50
+
51
+ Parameters
52
+ ----------
53
+ chrom:
54
+ Chromosome name **without** the ``chr`` prefix (e.g. ``"1"``, ``"X"``).
55
+ pos:
56
+ 0-based position (as expected by pyliftover).
57
+ resources:
58
+ :class:`~pycmplot.resources.ResourceConfig` instance. Falls back to
59
+ the module-level :data:`~pycmplot.resources.default_resources`.
60
+
61
+ Returns
62
+ -------
63
+ int or None
64
+ New hg38 position, or ``None`` if liftover failed for that coordinate.
65
+ """
66
+ if resources is None:
67
+ resources = default_resources
68
+
69
+ chain_path = resources.require("chain_hg19_hg38")
70
+ lo = _get_liftover(chain_path)
71
+
72
+ results = lo.convert_coordinate(f"chr{chrom}", pos)
73
+ if not results:
74
+ return None
75
+ # pyliftover returns sorted by chain score; take the best hit
76
+ _new_chrom, new_pos, _strand, _score = results[0]
77
+ return new_pos
78
+
79
+
80
+ def liftover_position(
81
+ df: pd.DataFrame,
82
+ resources: Optional[ResourceConfig] = None,
83
+ ) -> pd.DataFrame:
84
+ """Liftover all hg19 rows in *df* to hg38, in place.
85
+
86
+ Expects columns ``CHR``, ``POS``, and ``BUILD``. Rows whose ``BUILD``
87
+ is ``'hg19'`` are lifted; others are left unchanged. Rows that fail
88
+ liftover (new position == 0 or ``None``) are dropped.
89
+
90
+ Returns the modified DataFrame with two additional columns:
91
+ ``OLD_POS`` and ``OLD_BUILD``.
92
+ """
93
+ if resources is None:
94
+ resources = default_resources
95
+
96
+ df = df.copy()
97
+ df["POS"] = df["POS"].astype(int)
98
+
99
+ new_positions: list[Optional[int]] = []
100
+ for chrom, pos, build in zip(df["CHR"], df["POS"], df["BUILD"]):
101
+ if build == "hg19":
102
+ new_positions.append(liftover_hg19_to_hg38(chrom, pos, resources))
103
+ else:
104
+ new_positions.append(pos)
105
+
106
+ df["OLD_POS"] = df["POS"]
107
+ df["OLD_BUILD"] = df["BUILD"]
108
+ df["BUILD"] = "hg38"
109
+ df["POS"] = new_positions
110
+ df["POS"] = df["POS"].fillna(0).astype(int)
111
+ return df[df["POS"] != 0]
@@ -0,0 +1,261 @@
1
+ """
2
+ pycmplot.plotting.circular
3
+ ==========================
4
+ Per-chromosome circular (Circos-style) Manhattan track plotter and
5
+ track-radius calculator.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import math
12
+ from typing import Optional
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Track radius calculator
22
+ # ---------------------------------------------------------------------------
23
+
24
+ def compute_track_radii_dict(
25
+ n_tracks: int,
26
+ r_min: float = 0,
27
+ r_max: float = 100,
28
+ pad: float = 1,
29
+ annotate: bool = False,
30
+ ) -> dict[str, tuple[float, float]]:
31
+ """Compute (r_start, r_end) tuples for *n_tracks* evenly-spaced tracks.
32
+
33
+ Parameters
34
+ ----------
35
+ n_tracks:
36
+ Number of data tracks.
37
+ r_min, r_max:
38
+ Inner and outer radius of the full plotting area.
39
+ pad:
40
+ Spacing between consecutive tracks.
41
+ annotate:
42
+ If ``True``, add one extra track slot for an annotation ring.
43
+
44
+ Returns
45
+ -------
46
+ dict
47
+ ``{"track_1": (start, end), "track_2": (start, end), …}``
48
+ """
49
+ if annotate:
50
+ n_tracks += 1
51
+
52
+ total_space = r_max - r_min
53
+ usable_space = total_space - pad * (n_tracks - 1)
54
+
55
+ if usable_space <= 0:
56
+ raise ValueError(
57
+ f"Padding ({pad}) is too large for {n_tracks} tracks in "
58
+ f"radius range [{r_min}, {r_max}]."
59
+ )
60
+
61
+ track_height = usable_space / n_tracks
62
+ radii: dict[str, tuple[float, float]] = {}
63
+ current = float(r_min)
64
+
65
+ for i in range(n_tracks):
66
+ radii[f"track_{i + 1}"] = (current, current + track_height)
67
+ current += track_height + pad
68
+
69
+ return radii
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Per-chromosome circular Manhattan track
74
+ # ---------------------------------------------------------------------------
75
+
76
+ def plot_circular(
77
+ sector=None,
78
+ sector_radius=None,
79
+ annotation_r=None,
80
+ assoc: Optional[pd.DataFrame] = None,
81
+ sector_sizes: Optional[dict] = None,
82
+ chrom_label_loc=None,
83
+ chrom_label_size: float = 6,
84
+ track_label_size: float = 6,
85
+ track_label_orientation: str = "vertical",
86
+ track_index: int = 0,
87
+ assoc_label: Optional[str] = None,
88
+ logp: bool = True,
89
+ signif_line: Optional[float] = None,
90
+ signif_threshold: Optional[float] = None,
91
+ suggest_line: Optional[float] = None,
92
+ suggest_threshold: Optional[float] = None,
93
+ highlight: bool = False,
94
+ highlight_thresh: Optional[float] = None,
95
+ colors: Optional[list[str]] = None,
96
+ ) -> None:
97
+ """Plot a single chromosome's data onto a pycirclize sector track.
98
+
99
+ This function is called once per (sector, sumstat) pair in the main
100
+ circular Manhattan loop. It mutates *sector* in-place and returns
101
+ ``None``. Lead-SNP collection is handled in the calling code.
102
+
103
+ Parameters
104
+ ----------
105
+ sector:
106
+ A :class:`pycirclize.Sector` object.
107
+ sector_radius:
108
+ ``(r_start, r_end)`` tuple for this track on the sector.
109
+ assoc:
110
+ Summary statistics DataFrame for **all** chromosomes (filtered to the
111
+ current sector chromosome inside the function). Must have columns
112
+ ``CHR``, ``POS``, ``P`` (and ``logP`` if *logp* is ``True``).
113
+ sector_sizes:
114
+ Ordered dict of ``{chrom: [min_pos, max_pos]}`` for all sectors,
115
+ used to place labels on the first/last sector.
116
+ track_index:
117
+ 0-based index of the current sumstat track (used for chromosome labels).
118
+ colors:
119
+ Two alternating colours for even/odd chromosomes.
120
+ """
121
+ if colors is None:
122
+ colors = ["steelblue", "silver"]
123
+
124
+ logger.info("Processing sector: %s", sector.name)
125
+
126
+ assoc = assoc.copy()
127
+ assoc["POS"] = assoc["POS"].fillna(0).astype(int)
128
+
129
+ genome_wide_sig = signif_threshold
130
+ suggestive = suggest_threshold
131
+
132
+ assoc_uniq_chroms = list(assoc["CHR"].unique())
133
+
134
+ v_min = float(math.floor(min(assoc["logP"]))) if logp else float(math.floor(min(assoc["P"])))
135
+ v_max = float(math.ceil(max(assoc["logP"]))) if logp else float(math.ceil(max(assoc["P"])))
136
+ if logp:
137
+ v_max += 2
138
+
139
+
140
+ if pd.isna(v_max):
141
+ v_max = 0.0
142
+
143
+ sector_keys = list(sector_sizes.keys())
144
+
145
+ # ------------------------------------------------------------------
146
+ # Track label on the last (spacer) sector
147
+ # ------------------------------------------------------------------
148
+ if sector.name == sector_keys[-1]:
149
+ lbl_track = sector.add_track(sector_radius)
150
+ lbl_track.axis(fc="white", alpha=0)
151
+
152
+ lbl_track.text(
153
+ assoc_label,
154
+ x=(sector.end - sector.start) / 6,
155
+ adjust_rotation=True,
156
+ orientation=track_label_orientation,
157
+ size=float(track_label_size),
158
+ color="black",
159
+ fontstyle="normal",
160
+ fontweight="regular",
161
+ multialignment="left",
162
+ )
163
+
164
+ if sector.name not in assoc_uniq_chroms:
165
+ return
166
+
167
+ # ------------------------------------------------------------------
168
+ # Chromosome label (first track only, or chrX)
169
+ # ------------------------------------------------------------------
170
+ if track_index == 0 or sector.name == "X":
171
+ sector.text(
172
+ sector.name.replace("23", "X"),
173
+ r=chrom_label_loc,
174
+ size=chrom_label_size,
175
+ )
176
+
177
+ sector.axis(fc="none", lw=0, ec="none", alpha=0.5)
178
+
179
+ # ------------------------------------------------------------------
180
+ # Y-axis ticks on the first chromosome
181
+ # ------------------------------------------------------------------
182
+ if sector.name == sector_keys[0]:
183
+ yax_track = sector.add_track(sector_radius)
184
+ yax_track.axis(fc="white", alpha=0.08)
185
+
186
+ if logp:
187
+ tick_step = 1
188
+ yticks = []
189
+ while len(yticks) < 2 or len(yticks) > 5:
190
+ yticks = np.arange(v_min, v_max, tick_step)
191
+ tick_step += 1
192
+ else:
193
+ yticks = np.arange(v_min, v_max)
194
+
195
+ yax_track.yticks(
196
+ yticks,
197
+ labels=[str(int(t)) for t in yticks],
198
+ side="left",
199
+ vmin=v_min,
200
+ vmax=v_max,
201
+ label_size=5,
202
+ )
203
+
204
+ # ------------------------------------------------------------------
205
+ # Data track
206
+ # ------------------------------------------------------------------
207
+ assoc_chr = assoc.loc[assoc["CHR"] == sector.name]
208
+ track = sector.add_track(sector_radius, r_pad_ratio=0.05)
209
+ track.axis(fc="lightgrey", alpha=0.08)
210
+
211
+ chrom_num = sector.name.replace("X", "23").replace("Y", "24")
212
+ color = colors[0] if int(chrom_num) % 2 == 0 else colors[1]
213
+
214
+ y_col = "logP" if logp else "P"
215
+
216
+ if highlight:
217
+ sig = assoc_chr[assoc_chr["in_locus"]]
218
+ bg = assoc_chr[~assoc_chr["in_locus"]]
219
+
220
+ track.scatter(
221
+ data=bg,
222
+ x=list(bg["POS"].astype(float)),
223
+ y=list(bg[y_col].astype(float)),
224
+ vmin=v_min, vmax=v_max,
225
+ marker="o", s=6, color=color, alpha=1,
226
+ )
227
+
228
+ if not sig.empty:
229
+ track.scatter(
230
+ sig["POS"].to_numpy(),
231
+ sig[y_col].to_numpy(),
232
+ vmin=v_min, vmax=v_max,
233
+ s=6, marker="o", color="brown",
234
+ )
235
+ else:
236
+ track.scatter(
237
+ data=assoc_chr,
238
+ x=list(assoc_chr["POS"].astype(float)),
239
+ y=list(assoc_chr[y_col].astype(float)),
240
+ vmin=v_min, vmax=v_max,
241
+ marker="o", s=6, color=color, alpha=1,
242
+ )
243
+
244
+ # ------------------------------------------------------------------
245
+ # Significance lines
246
+ # ------------------------------------------------------------------
247
+ if signif_line:
248
+ track.line(
249
+ x=[sector.start, sector.end],
250
+ y=[genome_wide_sig, genome_wide_sig],
251
+ vmin=v_min, vmax=v_max,
252
+ color="orangered", linestyle="--",
253
+ )
254
+
255
+ if suggest_line:
256
+ track.line(
257
+ x=[sector.start, sector.end],
258
+ y=[suggestive, suggestive],
259
+ vmin=v_min, vmax=v_max,
260
+ color="lightblue", linestyle="--",
261
+ )