ggh4x-python 0.3.1.9000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. ggh4x/__init__.py +140 -0
  2. ggh4x/_aimed_text_grob.py +432 -0
  3. ggh4x/_borrowed_ggplot2.py +273 -0
  4. ggh4x/_cli.py +84 -0
  5. ggh4x/_datasets.py +106 -0
  6. ggh4x/_download.py +111 -0
  7. ggh4x/_facet_helpers.py +313 -0
  8. ggh4x/_facet_utils.py +649 -0
  9. ggh4x/_gap_grobs.py +606 -0
  10. ggh4x/_registry.py +10 -0
  11. ggh4x/_rlang.py +93 -0
  12. ggh4x/_utils.py +150 -0
  13. ggh4x/_vctrs.py +233 -0
  14. ggh4x/conveniences.py +601 -0
  15. ggh4x/coord_axes_inside.py +380 -0
  16. ggh4x/element_part_rect.py +545 -0
  17. ggh4x/facet_grid2.py +1018 -0
  18. ggh4x/facet_manual.py +901 -0
  19. ggh4x/facet_nested.py +776 -0
  20. ggh4x/facet_nested_wrap.py +193 -0
  21. ggh4x/facet_wrap2.py +896 -0
  22. ggh4x/geom_box.py +536 -0
  23. ggh4x/geom_outline_point.py +444 -0
  24. ggh4x/geom_pointpath.py +259 -0
  25. ggh4x/geom_polygonraster.py +252 -0
  26. ggh4x/geom_rectrug.py +489 -0
  27. ggh4x/geom_text_aimed.py +279 -0
  28. ggh4x/guide_stringlegend.py +354 -0
  29. ggh4x/help_secondary.py +549 -0
  30. ggh4x/multiscale/__init__.py +51 -0
  31. ggh4x/multiscale/_multiscale_add.py +207 -0
  32. ggh4x/multiscale/scale_listed.py +167 -0
  33. ggh4x/multiscale/scale_manual.py +478 -0
  34. ggh4x/multiscale/scale_multi.py +393 -0
  35. ggh4x/panel_scales/__init__.py +58 -0
  36. ggh4x/panel_scales/at_panel.py +115 -0
  37. ggh4x/panel_scales/facetted_pos_scales.py +647 -0
  38. ggh4x/panel_scales/force_panelsize.py +411 -0
  39. ggh4x/panel_scales/scale_facet.py +222 -0
  40. ggh4x/position_disjoint_ranges.py +229 -0
  41. ggh4x/position_lineartrans.py +242 -0
  42. ggh4x/py.typed +0 -0
  43. ggh4x/resources/faithful.csv +273 -0
  44. ggh4x/resources/iris.csv +151 -0
  45. ggh4x/resources/mtcars.csv +33 -0
  46. ggh4x/resources/pressure.csv +20 -0
  47. ggh4x/resources/volcano.csv +87 -0
  48. ggh4x/save.py +255 -0
  49. ggh4x/stat_difference.py +388 -0
  50. ggh4x/stat_funxy.py +436 -0
  51. ggh4x/stat_rle.py +290 -0
  52. ggh4x/stat_rollingkernel.py +369 -0
  53. ggh4x/stat_theodensity.py +681 -0
  54. ggh4x/strip_nested.py +448 -0
  55. ggh4x/strip_split.py +687 -0
  56. ggh4x/strip_tag.py +636 -0
  57. ggh4x/strip_themed.py +232 -0
  58. ggh4x/strip_vanilla.py +1464 -0
  59. ggh4x/themes.py +31 -0
  60. ggh4x/themes_ggh4x.py +67 -0
  61. ggh4x_python-0.3.1.9000.dist-info/METADATA +40 -0
  62. ggh4x_python-0.3.1.9000.dist-info/RECORD +64 -0
  63. ggh4x_python-0.3.1.9000.dist-info/WHEEL +4 -0
  64. ggh4x_python-0.3.1.9000.dist-info/licenses/LICENSE +3 -0
@@ -0,0 +1,273 @@
1
+ """ggplot2-internal helpers borrowed by ggh4x (R source: borrowed_ggplot2.R).
2
+
3
+ These are ggplot2 internals that ggh4x copies because they are not exported. Only the ones
4
+ ggh4x actually uses for facet layout / strip assembly are ported here; the rest are sourced
5
+ from ``ggplot2_py`` where available. The crown jewel is ``id``/``id_var`` — the radix-based
6
+ panel-id assignment that defines facet panel ordering. Ported verbatim and verified against R.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, List, Sequence
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+
16
+ __all__ = [
17
+ "id_var",
18
+ "id",
19
+ "empty",
20
+ "is_zero",
21
+ "snake_class",
22
+ "ulevels",
23
+ "unique_combs",
24
+ ]
25
+
26
+
27
+ def _is_factor(x: Any) -> bool:
28
+ return isinstance(x, pd.Categorical) or (
29
+ isinstance(x, pd.Series) and isinstance(x.dtype, pd.CategoricalDtype)
30
+ )
31
+
32
+
33
+ def id_var(x: Sequence[Any], drop: bool = False) -> np.ndarray:
34
+ """Assign integer ids to a single variable, mirroring ggplot2's ``id_var``.
35
+
36
+ Parameters
37
+ ----------
38
+ x : sequence
39
+ A vector (optionally a pandas Categorical/factor).
40
+ drop : bool
41
+ If ``True``, drop unused factor levels before id assignment.
42
+
43
+ Returns
44
+ -------
45
+ np.ndarray
46
+ 1-based integer ids with an attached ``n`` (number of distinct values) accessible
47
+ via ``result.n`` (set as an attribute on the returned ndarray subclass).
48
+ """
49
+ if len(x) == 0:
50
+ out = _IdArray(np.array([], dtype=int))
51
+ out.n = 0
52
+ return out
53
+ if _is_factor(x) and not drop:
54
+ cat = x if isinstance(x, pd.Categorical) else pd.Categorical(x)
55
+ levels = list(cat.categories)
56
+ codes = cat.codes.astype(int)
57
+ has_na = bool((codes < 0).any())
58
+ # addNA(x, ifany=TRUE): NA becomes an extra level if present
59
+ if has_na:
60
+ ids = np.where(codes < 0, len(levels) + 1, codes + 1)
61
+ n = len(levels) + 1
62
+ else:
63
+ ids = codes + 1
64
+ n = len(levels)
65
+ out = _IdArray(ids.astype(int))
66
+ out.n = n
67
+ return out
68
+ # else branch: drop=True (including factors) or non-factor.
69
+ # R: levels <- sort(unique0(x), na.last = TRUE); id <- match(x, levels).
70
+ # For a FACTOR, R's sort() orders by LEVEL order (NOT alphabetical) and
71
+ # keeps only present values (unique0); na.last puts NA at the end. Earlier
72
+ # this branch always used np.sort, which alphabetised factor levels and so
73
+ # mis-ordered facet PANEL/ROW/COL for non-alphabetical factor levels.
74
+ s = pd.Series(list(x))
75
+ has_na = bool(s.isna().any())
76
+ if _is_factor(x):
77
+ cat = x if isinstance(x, pd.Categorical) else pd.Categorical(x)
78
+ present = {int(c) for c in np.asarray(cat.codes) if c >= 0}
79
+ levels = [
80
+ cat.categories[k] for k in range(len(cat.categories)) if k in present
81
+ ]
82
+ else:
83
+ uniq = pd.unique(s.dropna())
84
+ levels = list(np.sort(uniq)) if len(uniq) else []
85
+ level_list = list(levels) + ([np.nan] if has_na else [])
86
+ lookup = {v: i + 1 for i, v in enumerate(levels)}
87
+ na_id = len(levels) + 1 if has_na else 0
88
+ ids = np.array([na_id if pd.isna(v) else lookup[v] for v in s], dtype=int)
89
+ out = _IdArray(ids)
90
+ out.n = int(len(level_list))
91
+ return out
92
+
93
+
94
+ class _IdArray(np.ndarray):
95
+ """ndarray carrying an ``n`` attribute (R's ``attr(id, 'n')``)."""
96
+
97
+ n: int
98
+
99
+ def __new__(cls, input_array: np.ndarray) -> "_IdArray":
100
+ obj = np.asarray(input_array, dtype=int).view(cls)
101
+ obj.n = 0
102
+ return obj
103
+
104
+ def __array_finalize__(self, obj: Any) -> None:
105
+ if obj is None:
106
+ return
107
+ self.n = getattr(obj, "n", 0)
108
+
109
+
110
+ def id(variables: pd.DataFrame | Sequence[Any], drop: bool = False) -> _IdArray:
111
+ """Compute a unique id per row across multiple variables, mirroring ggplot2's ``id``.
112
+
113
+ Uses radix mixing: variables are reversed (so the first varies slowest), each is
114
+ id-coded, and ids are combined as ``sum((id_i - 1) * cumprod(n_{<i})) + 1``. This is
115
+ what determines facet ``PANEL`` ordering.
116
+
117
+ Parameters
118
+ ----------
119
+ variables : pandas.DataFrame or sequence of vectors
120
+ The faceting variables (columns).
121
+ drop : bool
122
+ Drop unused combinations.
123
+
124
+ Returns
125
+ -------
126
+ _IdArray
127
+ 1-based row ids with ``.n`` = number of distinct combinations.
128
+ """
129
+ if isinstance(variables, pd.DataFrame):
130
+ nrows = len(variables)
131
+ cols = [variables[c] for c in variables.columns]
132
+ else:
133
+ nrows = None
134
+ cols = list(variables)
135
+ cols = [c for c in cols if len(c) > 0]
136
+ if len(cols) == 0:
137
+ n = nrows if nrows is not None else 0
138
+ out = _IdArray(np.arange(1, n + 1))
139
+ out.n = n
140
+ return out
141
+ if len(cols) == 1:
142
+ return id_var(cols[0], drop=drop)
143
+ ids = [id_var(c, drop=drop) for c in cols][::-1] # rev()
144
+ ndistinct = np.array([i.n for i in ids], dtype=float)
145
+ n = int(np.prod(ndistinct))
146
+ p = len(ids)
147
+ combs = np.concatenate([[1.0], np.cumprod(ndistinct[: p - 1])])
148
+ mat = np.column_stack([np.asarray(i, dtype=float) for i in ids])
149
+ res = ((mat - 1.0) @ combs + 1.0).astype(int)
150
+ if drop:
151
+ return id_var(res, drop=True)
152
+ out = _IdArray(res)
153
+ out.n = n
154
+ return out
155
+
156
+
157
+ def empty(df: Any) -> bool:
158
+ """Test whether a data frame is "empty", mirroring ggplot2's ``empty``.
159
+
160
+ Parameters
161
+ ----------
162
+ df : Any
163
+
164
+ Returns
165
+ -------
166
+ bool
167
+ ``True`` if *df* is ``None`` or has zero rows or zero columns.
168
+ """
169
+ if df is None:
170
+ return True
171
+ if isinstance(df, pd.DataFrame):
172
+ return df.shape[0] == 0 or df.shape[1] == 0
173
+ return False
174
+
175
+
176
+ def is_zero(x: Any) -> bool:
177
+ """Test for a zero/empty grob, mirroring ggplot2's ``is.zero``.
178
+
179
+ Parameters
180
+ ----------
181
+ x : Any
182
+
183
+ Returns
184
+ -------
185
+ bool
186
+ ``True`` if *x* is ``None`` or a zeroGrob/null grob.
187
+ """
188
+ if x is None:
189
+ return True
190
+ cls = type(x).__name__
191
+ return cls in ("ZeroGrob", "zeroGrob", "NullGrob") or getattr(x, "_grid_class", None) in (
192
+ "zeroGrob",
193
+ "null",
194
+ )
195
+
196
+
197
+ def snake_class(x: Any) -> str:
198
+ """Convert a class name to snake_case, mirroring ggplot2's ``snake_class``.
199
+
200
+ Parameters
201
+ ----------
202
+ x : Any
203
+ An object (its first class name is used) or a class-name string.
204
+
205
+ Returns
206
+ -------
207
+ str
208
+ e.g. ``FacetGrid2`` -> ``facet_grid2``.
209
+ """
210
+ import re
211
+
212
+ name = x if isinstance(x, str) else type(x).__name__
213
+ name = re.sub(r"([A-Za-z])([A-Z])([a-z])", r"\1_\2\3", name)
214
+ name = name.replace(".", "_")
215
+ name = re.sub(r"([a-z])([A-Z])", r"\1_\2", name)
216
+ return name.lower()
217
+
218
+
219
+ def ulevels(x: Sequence[Any]) -> np.ndarray:
220
+ """Unique sorted levels (NA included for factors), mirroring ggplot2's ``ulevels``.
221
+
222
+ Parameters
223
+ ----------
224
+ x : sequence
225
+
226
+ Returns
227
+ -------
228
+ np.ndarray
229
+ """
230
+ if _is_factor(x):
231
+ cat = x if isinstance(x, pd.Categorical) else pd.Categorical(x)
232
+ levels = list(cat.categories)
233
+ # R addNA(x, ifany=TRUE): add an <NA> level when an NA value is present.
234
+ if bool((np.asarray(cat.codes) < 0).any()):
235
+ return np.asarray(levels + [np.nan], dtype=object)
236
+ return np.asarray(levels)
237
+ s = pd.Series(list(x))
238
+ uniq = pd.unique(s.dropna())
239
+ sorted_uniq = np.sort(uniq) if len(uniq) else np.array([])
240
+ # R sort(..., na.last = TRUE): keep NA as the last level when present.
241
+ if bool(s.isna().any()):
242
+ return np.asarray(list(sorted_uniq) + [np.nan], dtype=object)
243
+ return sorted_uniq
244
+
245
+
246
+ def unique_combs(df: pd.DataFrame) -> pd.DataFrame:
247
+ """All unique combinations of the columns' levels, mirroring ggplot2's ``unique_combs``.
248
+
249
+ Parameters
250
+ ----------
251
+ df : pandas.DataFrame
252
+
253
+ Returns
254
+ -------
255
+ pandas.DataFrame
256
+ Cross-product of per-column ``ulevels``. Mirrors R
257
+ ``rev(expand.grid(rev(unique_values)))``: the FIRST column varies
258
+ slowest and the last varies fastest, and NA levels are included.
259
+ """
260
+ if df.shape[1] == 0:
261
+ return pd.DataFrame()
262
+ level_lists = {c: ulevels(df[c]) for c in df.columns}
263
+ cols = list(df.columns)
264
+ from itertools import product
265
+
266
+ # itertools.product varies its first argument slowest -> first column
267
+ # slowest, matching R's rev(expand.grid(rev(...))).
268
+ rows = list(product(*[level_lists[c] for c in cols]))
269
+ data = {c: [] for c in cols}
270
+ for combo in rows:
271
+ for c, v in zip(cols, combo):
272
+ data[c].append(v)
273
+ return pd.DataFrame(data)[cols]
ggh4x/_cli.py ADDED
@@ -0,0 +1,84 @@
1
+ """cli message shims (R source: cli package usage in ggh4x).
2
+
3
+ ggh4x calls ``cli::cli_abort`` / ``cli::cli_warn`` / ``cli::cli_inform`` for user-facing
4
+ messages. The Python port maps these to standard exceptions / ``warnings`` so failures stay
5
+ loud (per the error-handling discipline) while stripping cli's ``{.arg}`` glue markup.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ import warnings
12
+ from typing import NoReturn, Type
13
+
14
+ __all__ = ["cli_abort", "cli_warn", "cli_inform", "strip_cli_markup"]
15
+
16
+ # cli inline-markup spans like {.arg foo}, {.code x}, {.field y}, {.val 3}, {.cls C}.
17
+ _CLI_SPAN = re.compile(r"\{\.[a-zA-Z_]+\s+([^{}]*)\}")
18
+ # Leftover interpolation braces {x} -> x (we cannot evaluate R glue, just unwrap).
19
+ _CLI_BRACE = re.compile(r"\{([^{}]*)\}")
20
+
21
+
22
+ def strip_cli_markup(message: str) -> str:
23
+ """Remove cli inline-markup so a plain message remains.
24
+
25
+ Parameters
26
+ ----------
27
+ message : str
28
+ A message possibly containing cli markup such as ``{.arg x}``.
29
+
30
+ Returns
31
+ -------
32
+ str
33
+ The message with markup spans replaced by their content.
34
+ """
35
+ prev = None
36
+ out = message
37
+ while prev != out:
38
+ prev = out
39
+ out = _CLI_SPAN.sub(r"\1", out)
40
+ out = _CLI_BRACE.sub(r"\1", out)
41
+ return out
42
+
43
+
44
+ def cli_abort(message: str, error_class: Type[Exception] = ValueError) -> NoReturn:
45
+ """Raise an exception, mirroring ``cli::cli_abort``.
46
+
47
+ Parameters
48
+ ----------
49
+ message : str
50
+ Error message (cli markup is stripped).
51
+ error_class : type[Exception]
52
+ Exception type to raise (default ``ValueError``; pass ``TypeError`` where the R
53
+ error is about an input type).
54
+
55
+ Raises
56
+ ------
57
+ Exception
58
+ Always raises *error_class*.
59
+ """
60
+ raise error_class(strip_cli_markup(message))
61
+
62
+
63
+ def cli_warn(message: str, category: Type[Warning] = UserWarning) -> None:
64
+ """Emit a warning, mirroring ``cli::cli_warn``.
65
+
66
+ Parameters
67
+ ----------
68
+ message : str
69
+ Warning message (cli markup is stripped).
70
+ category : type[Warning]
71
+ Warning category (default ``UserWarning``).
72
+ """
73
+ warnings.warn(strip_cli_markup(message), category, stacklevel=2)
74
+
75
+
76
+ def cli_inform(message: str) -> None:
77
+ """Print an informational message, mirroring ``cli::cli_inform``.
78
+
79
+ Parameters
80
+ ----------
81
+ message : str
82
+ Message to print (cli markup is stripped).
83
+ """
84
+ print(strip_cli_markup(message))
ggh4x/_datasets.py ADDED
@@ -0,0 +1,106 @@
1
+ """Dataset loaders for tutorials/tests (R source: base ``datasets`` + ggplot2 datasets).
2
+
3
+ ggh4x bundles no data; its vignettes use standard R datasets. This module loads the
4
+ base-R datasets (bundled as CSVs in ``ggh4x/resources/``) and re-exports the ggplot2
5
+ datasets from ``ggplot2_py`` so tutorial/validation code is self-contained and R-faithful.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from importlib import resources
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+ __all__ = [
16
+ "load_iris",
17
+ "load_mtcars",
18
+ "load_faithful",
19
+ "load_pressure",
20
+ "load_volcano",
21
+ "mpg",
22
+ "diamonds",
23
+ "economics",
24
+ ]
25
+
26
+
27
+ def _resource_path(filename: str):
28
+ return resources.files("ggh4x.resources").joinpath(filename)
29
+
30
+
31
+ def load_iris() -> pd.DataFrame:
32
+ """Load the ``iris`` dataset (150x5), mirroring base R.
33
+
34
+ Returns
35
+ -------
36
+ pandas.DataFrame
37
+ Columns ``Sepal.Length``, ``Sepal.Width``, ``Petal.Length``, ``Petal.Width``,
38
+ ``Species`` (``Species`` as a category).
39
+ """
40
+ with resources.as_file(_resource_path("iris.csv")) as p:
41
+ df = pd.read_csv(p)
42
+ df["Species"] = pd.Categorical(df["Species"])
43
+ return df
44
+
45
+
46
+ def load_mtcars() -> pd.DataFrame:
47
+ """Load the ``mtcars`` dataset (32x11), mirroring base R.
48
+
49
+ Returns
50
+ -------
51
+ pandas.DataFrame
52
+ Model name is the index (matching R rownames); 11 numeric columns.
53
+ """
54
+ with resources.as_file(_resource_path("mtcars.csv")) as p:
55
+ df = pd.read_csv(p, index_col=0)
56
+ df.index.name = "model"
57
+ return df
58
+
59
+
60
+ def load_faithful() -> pd.DataFrame:
61
+ """Load the ``faithful`` dataset (272x2), mirroring base R.
62
+
63
+ Returns
64
+ -------
65
+ pandas.DataFrame
66
+ Columns ``eruptions`` and ``waiting``.
67
+ """
68
+ with resources.as_file(_resource_path("faithful.csv")) as p:
69
+ return pd.read_csv(p)
70
+
71
+
72
+ def load_pressure() -> pd.DataFrame:
73
+ """Load the ``pressure`` dataset (19x2), mirroring base R.
74
+
75
+ Returns
76
+ -------
77
+ pandas.DataFrame
78
+ Columns ``temperature`` and ``pressure``.
79
+ """
80
+ with resources.as_file(_resource_path("pressure.csv")) as p:
81
+ return pd.read_csv(p)
82
+
83
+
84
+ def load_volcano() -> np.ndarray:
85
+ """Load the ``volcano`` matrix (87x61), mirroring base R.
86
+
87
+ Returns
88
+ -------
89
+ numpy.ndarray
90
+ Topographic heights as a float array.
91
+ """
92
+ with resources.as_file(_resource_path("volcano.csv")) as p:
93
+ return pd.read_csv(p, header=None).to_numpy(dtype=float)
94
+
95
+
96
+ def _ggplot2_dataset(name: str) -> pd.DataFrame:
97
+ from ggplot2_py import datasets as _ds
98
+
99
+ return getattr(_ds, name)
100
+
101
+
102
+ # ggplot2 datasets re-exported from ggplot2_py (loaded lazily on attribute access).
103
+ def __getattr__(name: str): # pragma: no cover - thin re-export
104
+ if name in ("mpg", "diamonds", "economics"):
105
+ return _ggplot2_dataset(name)
106
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
ggh4x/_download.py ADDED
@@ -0,0 +1,111 @@
1
+ """Transparent download and caching for remote data assets."""
2
+
3
+ import hashlib
4
+ import sys
5
+ import urllib.request
6
+ from pathlib import Path
7
+
8
+ from ._registry import CACHE_DIR_NAME, DATA_DIR_NAME, REGISTRY
9
+
10
+ __all__ = ["resolve_data_path"]
11
+
12
+ # <pkg>-python/<import_name>/_download.py → parent.parent = <pkg>-python/
13
+ _PKG_ROOT = Path(__file__).resolve().parent.parent
14
+
15
+
16
+ def resolve_data_path(filename: str) -> Path:
17
+ """Resolve a remote data asset to a local file path.
18
+
19
+ Resolution order:
20
+
21
+ 1. ``<work_dir>/<DATA_DIR_NAME>/<filename>`` — local staging copy
22
+ 2. ``~/.cache/<CACHE_DIR_NAME>/<filename>`` — previously downloaded
23
+ 3. Download from registry URL → save to cache
24
+
25
+ Parameters
26
+ ----------
27
+ filename : str
28
+ Filename as registered in ``REGISTRY``.
29
+
30
+ Returns
31
+ -------
32
+ Path
33
+ Absolute path to the resolved local file.
34
+
35
+ Raises
36
+ ------
37
+ FileNotFoundError
38
+ If the file cannot be resolved from any source.
39
+ """
40
+ # 1. local staging dir (sibling of <pkg>-python/)
41
+ local = _PKG_ROOT.parent / DATA_DIR_NAME / filename
42
+ if local.exists():
43
+ return local
44
+
45
+ # 2. cache
46
+ cache_dir = Path.home() / ".cache" / CACHE_DIR_NAME
47
+ cached = cache_dir / filename
48
+ if cached.exists():
49
+ return cached
50
+
51
+ # 3. download
52
+ if filename not in REGISTRY:
53
+ raise FileNotFoundError(
54
+ f"\'{filename}\' not found locally and not in registry.\n"
55
+ f"Place it in: {local}"
56
+ )
57
+
58
+ entry = REGISTRY[filename]
59
+ url = entry.get("url")
60
+ if not url:
61
+ raise FileNotFoundError(
62
+ f"\'{filename}\' has no download URL in registry.\n"
63
+ f"Place it manually in: {local}"
64
+ )
65
+
66
+ cache_dir.mkdir(parents=True, exist_ok=True)
67
+ _download(url, cached)
68
+ _verify_sha256(cached, entry.get("sha256"))
69
+ return cached
70
+
71
+
72
+ def _download(url: str, dest: Path) -> None:
73
+ """Stream-download *url* to *dest* with progress."""
74
+ print(f"Downloading {dest.name} …", file=sys.stderr, flush=True)
75
+ with urllib.request.urlopen(url) as resp:
76
+ total = int(resp.headers.get("Content-Length", 0))
77
+ received = 0
78
+ with open(dest, "wb") as fout:
79
+ while True:
80
+ chunk = resp.read(1 << 16) # 64 KiB
81
+ if not chunk:
82
+ break
83
+ fout.write(chunk)
84
+ received += len(chunk)
85
+ if total:
86
+ pct = received * 100 // total
87
+ print(
88
+ f"\r {received / 1e6:.1f}/{total / 1e6:.1f} MB ({pct}%)",
89
+ end="",
90
+ file=sys.stderr,
91
+ flush=True,
92
+ )
93
+ if total:
94
+ print(file=sys.stderr)
95
+
96
+
97
+ def _verify_sha256(path: Path, expected: str | None) -> None:
98
+ """Check SHA-256; delete file and raise on mismatch."""
99
+ if not expected:
100
+ return
101
+ h = hashlib.sha256()
102
+ with open(path, "rb") as f:
103
+ for chunk in iter(lambda: f.read(1 << 16), b""):
104
+ h.update(chunk)
105
+ actual = h.hexdigest()
106
+ if actual != expected:
107
+ path.unlink(missing_ok=True)
108
+ raise RuntimeError(
109
+ f"SHA-256 mismatch for {path.name}: "
110
+ f"expected {expected[:16]}…, got {actual[:16]}…"
111
+ )