ggh4x-python 0.3.1.9000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. ggh4x/__init__.py +140 -0
  2. ggh4x/_aimed_text_grob.py +432 -0
  3. ggh4x/_borrowed_ggplot2.py +273 -0
  4. ggh4x/_cli.py +84 -0
  5. ggh4x/_datasets.py +106 -0
  6. ggh4x/_download.py +111 -0
  7. ggh4x/_facet_helpers.py +313 -0
  8. ggh4x/_facet_utils.py +649 -0
  9. ggh4x/_gap_grobs.py +606 -0
  10. ggh4x/_registry.py +10 -0
  11. ggh4x/_rlang.py +93 -0
  12. ggh4x/_utils.py +150 -0
  13. ggh4x/_vctrs.py +233 -0
  14. ggh4x/conveniences.py +601 -0
  15. ggh4x/coord_axes_inside.py +380 -0
  16. ggh4x/element_part_rect.py +545 -0
  17. ggh4x/facet_grid2.py +1018 -0
  18. ggh4x/facet_manual.py +901 -0
  19. ggh4x/facet_nested.py +776 -0
  20. ggh4x/facet_nested_wrap.py +193 -0
  21. ggh4x/facet_wrap2.py +896 -0
  22. ggh4x/geom_box.py +536 -0
  23. ggh4x/geom_outline_point.py +444 -0
  24. ggh4x/geom_pointpath.py +259 -0
  25. ggh4x/geom_polygonraster.py +252 -0
  26. ggh4x/geom_rectrug.py +489 -0
  27. ggh4x/geom_text_aimed.py +279 -0
  28. ggh4x/guide_stringlegend.py +354 -0
  29. ggh4x/help_secondary.py +549 -0
  30. ggh4x/multiscale/__init__.py +51 -0
  31. ggh4x/multiscale/_multiscale_add.py +207 -0
  32. ggh4x/multiscale/scale_listed.py +167 -0
  33. ggh4x/multiscale/scale_manual.py +478 -0
  34. ggh4x/multiscale/scale_multi.py +393 -0
  35. ggh4x/panel_scales/__init__.py +58 -0
  36. ggh4x/panel_scales/at_panel.py +115 -0
  37. ggh4x/panel_scales/facetted_pos_scales.py +647 -0
  38. ggh4x/panel_scales/force_panelsize.py +411 -0
  39. ggh4x/panel_scales/scale_facet.py +222 -0
  40. ggh4x/position_disjoint_ranges.py +229 -0
  41. ggh4x/position_lineartrans.py +242 -0
  42. ggh4x/py.typed +0 -0
  43. ggh4x/resources/faithful.csv +273 -0
  44. ggh4x/resources/iris.csv +151 -0
  45. ggh4x/resources/mtcars.csv +33 -0
  46. ggh4x/resources/pressure.csv +20 -0
  47. ggh4x/resources/volcano.csv +87 -0
  48. ggh4x/save.py +255 -0
  49. ggh4x/stat_difference.py +388 -0
  50. ggh4x/stat_funxy.py +436 -0
  51. ggh4x/stat_rle.py +290 -0
  52. ggh4x/stat_rollingkernel.py +369 -0
  53. ggh4x/stat_theodensity.py +681 -0
  54. ggh4x/strip_nested.py +448 -0
  55. ggh4x/strip_split.py +687 -0
  56. ggh4x/strip_tag.py +636 -0
  57. ggh4x/strip_themed.py +232 -0
  58. ggh4x/strip_vanilla.py +1464 -0
  59. ggh4x/themes.py +31 -0
  60. ggh4x/themes_ggh4x.py +67 -0
  61. ggh4x_python-0.3.1.9000.dist-info/METADATA +40 -0
  62. ggh4x_python-0.3.1.9000.dist-info/RECORD +64 -0
  63. ggh4x_python-0.3.1.9000.dist-info/WHEEL +4 -0
  64. ggh4x_python-0.3.1.9000.dist-info/licenses/LICENSE +3 -0
@@ -0,0 +1,681 @@
1
+ """Fitted theoretical density.
2
+
3
+ Port of ``ggh4x``'s ``stat_theodensity.R``. Estimates the parameters of a
4
+ named theoretical distribution by maximum likelihood and evaluates the
5
+ distribution's probability density (or mass) function on a grid. This is the
6
+ Python analogue of fitting a parametric distribution with
7
+ ``fitdistrplus::fitdist`` and then calling ``d<distri>`` from R's ``stats``
8
+ package.
9
+
10
+ R uses ``fitdistrplus::fitdist`` for maximum-likelihood estimation. The Python
11
+ port replaces this with :mod:`scipy.stats` MLE: continuous distributions use the
12
+ ``<dist>.fit`` method (with location/scale fixed where R's parameterization
13
+ fixes them), while discrete distributions (``pois``/``geom``/``binom``/
14
+ ``nbinom``) use closed-form or numerically optimized maximum-likelihood
15
+ estimators. A hand-built mapping table translates R distribution names and
16
+ parameterizations (e.g. R's ``gamma`` rate vs scipy's scale) into scipy
17
+ objects and parameters.
18
+
19
+ R source
20
+ --------
21
+ ``ggh4x/R/stat_theodensity.R``
22
+
23
+ Notes
24
+ -----
25
+ The single largest fidelity risk is that :func:`scipy.stats.<dist>.fit` returns
26
+ parameters in a different order and parameterization than R's ``d<distri>``
27
+ functions. The :data:`_DISTRI_TABLE` mapping captures, per R distribution name,
28
+ the scipy distribution object, the fixed-parameter constraints required to
29
+ reproduce R's MLE, and the conversion from the scipy fit tuple back into R's
30
+ named parameters. Verified against live ``fitdistrplus`` output on identical
31
+ data samples.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ from typing import Any, Callable, Dict, List, Optional, Tuple
37
+
38
+ import numpy as np
39
+ import pandas as pd
40
+
41
+ from ggplot2_py import ggproto_parent
42
+ from ggplot2_py.aes import AfterStat
43
+ from ggplot2_py.stat import StatDensity, _layer
44
+
45
+ from ._cli import cli_abort, cli_inform, cli_warn
46
+
47
+ __all__ = ["stat_theodensity", "StatTheoDensity", "_class_distri"]
48
+
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # Distribution parameterization mapping table
52
+ # ---------------------------------------------------------------------------
53
+ #
54
+ # R's ``d<distri>`` functions and ``fitdistrplus`` use parameterizations that
55
+ # differ from :mod:`scipy.stats`. Each entry maps an R distribution name to:
56
+ # - ``kind`` : "continuous" or "discrete".
57
+ # - ``fitter`` : callable ``(x, fix_arg, start_arg) -> dict`` returning the
58
+ # fitted parameters in R's named parameterization (the same
59
+ # names ``coef(fitdist(...))`` would produce, plus any fixed
60
+ # parameters).
61
+ # - ``pdf`` : callable ``(xseq, params) -> ndarray`` evaluating the
62
+ # density / mass function using R's named parameters.
63
+ #
64
+ # This indirection reproduces ``get(paste0("d", distri))`` together with the
65
+ # ``coef(fitdistrplus::fitdist(...))`` call in the R source.
66
+
67
+
68
+ def _require_scipy() -> Any:
69
+ """Import :mod:`scipy.stats`, aborting with a helpful message if absent.
70
+
71
+ Returns
72
+ -------
73
+ module
74
+ The :mod:`scipy.stats` module.
75
+
76
+ Raises
77
+ ------
78
+ ImportError
79
+ If scipy is not installed.
80
+ """
81
+ try:
82
+ from scipy import stats as _stats # noqa: WPS433 (local import by design)
83
+ except ImportError as err: # pragma: no cover - environment dependent
84
+ raise ImportError(
85
+ "The 'scipy' package is required for `stat_theodensity()`."
86
+ ) from err
87
+ return _stats
88
+
89
+
90
+ # -- Continuous fitters ------------------------------------------------------
91
+
92
+
93
+ def _fit_norm(x, fix_arg, start_arg):
94
+ st = _require_scipy()
95
+ loc, scale = st.norm.fit(x)
96
+ return {"mean": loc, "sd": scale}
97
+
98
+
99
+ def _fit_lnorm(x, fix_arg, start_arg):
100
+ st = _require_scipy()
101
+ s, _loc, scale = st.lognorm.fit(x, floc=0)
102
+ return {"meanlog": float(np.log(scale)), "sdlog": s}
103
+
104
+
105
+ def _fit_cauchy(x, fix_arg, start_arg):
106
+ st = _require_scipy()
107
+ loc, scale = st.cauchy.fit(x)
108
+ return {"location": loc, "scale": scale}
109
+
110
+
111
+ def _fit_gamma(x, fix_arg, start_arg):
112
+ st = _require_scipy()
113
+ kwargs: Dict[str, Any] = {"floc": 0}
114
+ fixed: Dict[str, Any] = {}
115
+ # R fixes ``rate`` -> scipy fixes ``scale = 1 / rate``.
116
+ if fix_arg and "rate" in fix_arg:
117
+ kwargs["fscale"] = 1.0 / float(fix_arg["rate"])
118
+ fixed["rate"] = float(fix_arg["rate"])
119
+ if fix_arg and "shape" in fix_arg:
120
+ kwargs["fa"] = float(fix_arg["shape"])
121
+ fixed["shape"] = float(fix_arg["shape"])
122
+ a, _loc, scale = st.gamma.fit(x, **kwargs)
123
+ out: Dict[str, Any] = {"shape": a, "rate": 1.0 / scale}
124
+ out.update(fixed)
125
+ return out
126
+
127
+
128
+ def _fit_weibull(x, fix_arg, start_arg):
129
+ st = _require_scipy()
130
+ c, _loc, scale = st.weibull_min.fit(x, floc=0)
131
+ return {"shape": c, "scale": scale}
132
+
133
+
134
+ def _fit_exp(x, fix_arg, start_arg):
135
+ st = _require_scipy()
136
+ _loc, scale = st.expon.fit(x, floc=0)
137
+ return {"rate": 1.0 / scale}
138
+
139
+
140
+ def _fit_logis(x, fix_arg, start_arg):
141
+ st = _require_scipy()
142
+ loc, scale = st.logistic.fit(x)
143
+ return {"location": loc, "scale": scale}
144
+
145
+
146
+ def _fit_beta(x, fix_arg, start_arg):
147
+ st = _require_scipy()
148
+ a, b, _loc, _scale = st.beta.fit(x, floc=0, fscale=1)
149
+ return {"shape1": a, "shape2": b}
150
+
151
+
152
+ def _fit_unif(x, fix_arg, start_arg):
153
+ return {"min": float(np.min(x)), "max": float(np.max(x))}
154
+
155
+
156
+ def _fit_t(x, fix_arg, start_arg):
157
+ st = _require_scipy()
158
+ # R's ``dt`` has a single ``df`` parameter (standard t, location 0,
159
+ # scale 1). fitdistrplus needs a start value; mirror by fixing loc/scale.
160
+ df, _loc, _scale = st.t.fit(x, floc=0, fscale=1)
161
+ return {"df": df}
162
+
163
+
164
+ def _fit_f(x, fix_arg, start_arg):
165
+ st = _require_scipy()
166
+ dfn, dfd, _loc, _scale = st.f.fit(x, floc=0, fscale=1)
167
+ return {"df1": dfn, "df2": dfd}
168
+
169
+
170
+ def _fit_chisq(x, fix_arg, start_arg):
171
+ # Only reached if a user forces ``chisq`` past ``setup_params`` (which
172
+ # normally remaps it to gamma). scipy chi2 has a single ``df`` parameter.
173
+ st = _require_scipy()
174
+ df, _loc, _scale = st.chi2.fit(x, floc=0, fscale=1)
175
+ return {"df": df}
176
+
177
+
178
+ # -- Discrete fitters --------------------------------------------------------
179
+
180
+
181
+ def _fit_pois(x, fix_arg, start_arg):
182
+ return {"lambda": float(np.mean(x))}
183
+
184
+
185
+ def _fit_geom(x, fix_arg, start_arg):
186
+ # R's ``dgeom`` counts failures before the first success; the MLE of the
187
+ # success probability is ``1 / (1 + mean)``.
188
+ return {"prob": 1.0 / (1.0 + float(np.mean(x)))}
189
+
190
+
191
+ def _fit_binom(x, fix_arg, start_arg):
192
+ # ``size`` must be fixed (R aborts/auto-fixes it in setup_params). The MLE
193
+ # of ``prob`` with fixed ``size`` is ``mean(x) / size``.
194
+ if not fix_arg or "size" not in fix_arg:
195
+ cli_abort("Fitting a binomial distribution requires a fixed 'size'.")
196
+ size = int(fix_arg["size"])
197
+ return {"size": size, "prob": float(np.mean(x)) / size}
198
+
199
+
200
+ def _fit_nbinom(x, fix_arg, start_arg):
201
+ # fitdistrplus returns (size, mu). Full two-parameter MLE over (size, mu),
202
+ # with ``prob = size / (size + mu)``. Matches fitdist on identical data.
203
+ st = _require_scipy()
204
+ from scipy import optimize # noqa: WPS433
205
+
206
+ x = np.asarray(x, dtype=float)
207
+ m = float(np.mean(x))
208
+ v = float(np.var(x))
209
+ size0 = (m * m / (v - m)) if v > m else 1.0
210
+ if not np.isfinite(size0) or size0 <= 0:
211
+ size0 = 1.0
212
+
213
+ def _negll(params: np.ndarray) -> float:
214
+ size, mu = params
215
+ if size <= 0 or mu <= 0:
216
+ return 1e10
217
+ prob = size / (size + mu)
218
+ return -float(np.sum(st.nbinom.logpmf(x, size, prob)))
219
+
220
+ res = optimize.minimize(
221
+ _negll,
222
+ np.array([size0, m]),
223
+ method="Nelder-Mead",
224
+ options={"xatol": 1e-8, "fatol": 1e-10, "maxiter": 10000},
225
+ )
226
+ size, mu = res.x
227
+ return {"size": float(size), "mu": float(mu)}
228
+
229
+
230
+ # -- PDF / PMF evaluators (R parameterization) -------------------------------
231
+
232
+
233
+ def _pdf_norm(xseq, p):
234
+ return _require_scipy().norm.pdf(xseq, loc=p["mean"], scale=p["sd"])
235
+
236
+
237
+ def _pdf_lnorm(xseq, p):
238
+ return _require_scipy().lognorm.pdf(
239
+ xseq, p["sdlog"], loc=0, scale=np.exp(p["meanlog"])
240
+ )
241
+
242
+
243
+ def _pdf_cauchy(xseq, p):
244
+ return _require_scipy().cauchy.pdf(xseq, loc=p["location"], scale=p["scale"])
245
+
246
+
247
+ def _pdf_gamma(xseq, p):
248
+ return _require_scipy().gamma.pdf(xseq, p["shape"], loc=0, scale=1.0 / p["rate"])
249
+
250
+
251
+ def _pdf_weibull(xseq, p):
252
+ return _require_scipy().weibull_min.pdf(xseq, p["shape"], loc=0, scale=p["scale"])
253
+
254
+
255
+ def _pdf_exp(xseq, p):
256
+ return _require_scipy().expon.pdf(xseq, loc=0, scale=1.0 / p["rate"])
257
+
258
+
259
+ def _pdf_logis(xseq, p):
260
+ return _require_scipy().logistic.pdf(xseq, loc=p["location"], scale=p["scale"])
261
+
262
+
263
+ def _pdf_beta(xseq, p):
264
+ return _require_scipy().beta.pdf(xseq, p["shape1"], p["shape2"], loc=0, scale=1)
265
+
266
+
267
+ def _pdf_unif(xseq, p):
268
+ lo, hi = p["min"], p["max"]
269
+ return _require_scipy().uniform.pdf(xseq, loc=lo, scale=hi - lo)
270
+
271
+
272
+ def _pdf_t(xseq, p):
273
+ return _require_scipy().t.pdf(xseq, p["df"])
274
+
275
+
276
+ def _pdf_f(xseq, p):
277
+ return _require_scipy().f.pdf(xseq, p["df1"], p["df2"])
278
+
279
+
280
+ def _pdf_chisq(xseq, p):
281
+ return _require_scipy().chi2.pdf(xseq, p["df"])
282
+
283
+
284
+ def _pmf_pois(xseq, p):
285
+ return _require_scipy().poisson.pmf(xseq, p["lambda"])
286
+
287
+
288
+ def _pmf_geom(xseq, p):
289
+ return _require_scipy().geom.pmf(xseq + 1, p["prob"]) # scipy geom support: 1,2,...
290
+
291
+
292
+ def _pmf_binom(xseq, p):
293
+ return _require_scipy().binom.pmf(xseq, int(p["size"]), p["prob"])
294
+
295
+
296
+ def _pmf_nbinom(xseq, p):
297
+ size, mu = p["size"], p["mu"]
298
+ prob = size / (size + mu)
299
+ return _require_scipy().nbinom.pmf(xseq, size, prob)
300
+
301
+
302
+ _DISTRI_TABLE: Dict[str, Dict[str, Any]] = {
303
+ "norm": {"kind": "continuous", "fitter": _fit_norm, "pdf": _pdf_norm},
304
+ "lnorm": {"kind": "continuous", "fitter": _fit_lnorm, "pdf": _pdf_lnorm},
305
+ "cauchy": {"kind": "continuous", "fitter": _fit_cauchy, "pdf": _pdf_cauchy},
306
+ "gamma": {"kind": "continuous", "fitter": _fit_gamma, "pdf": _pdf_gamma},
307
+ "weibull": {"kind": "continuous", "fitter": _fit_weibull, "pdf": _pdf_weibull},
308
+ "exp": {"kind": "continuous", "fitter": _fit_exp, "pdf": _pdf_exp},
309
+ "logis": {"kind": "continuous", "fitter": _fit_logis, "pdf": _pdf_logis},
310
+ "beta": {"kind": "continuous", "fitter": _fit_beta, "pdf": _pdf_beta},
311
+ "unif": {"kind": "continuous", "fitter": _fit_unif, "pdf": _pdf_unif},
312
+ "t": {"kind": "continuous", "fitter": _fit_t, "pdf": _pdf_t},
313
+ "f": {"kind": "continuous", "fitter": _fit_f, "pdf": _pdf_f},
314
+ "chisq": {"kind": "continuous", "fitter": _fit_chisq, "pdf": _pdf_chisq},
315
+ "pois": {"kind": "discrete", "fitter": _fit_pois, "pdf": _pmf_pois},
316
+ "geom": {"kind": "discrete", "fitter": _fit_geom, "pdf": _pmf_geom},
317
+ "binom": {"kind": "discrete", "fitter": _fit_binom, "pdf": _pmf_binom},
318
+ "nbinom": {"kind": "discrete", "fitter": _fit_nbinom, "pdf": _pmf_nbinom},
319
+ }
320
+
321
+ #: Distributions explicitly rejected by ``stat_theodensity`` (mirrors R).
322
+ _UNSUPPORTED = ("multinom", "hyper", "wilcox", "signrank")
323
+
324
+
325
+ # ---------------------------------------------------------------------------
326
+ # Helper: classify a distribution as discrete or continuous
327
+ # ---------------------------------------------------------------------------
328
+
329
+
330
+ def _class_distri(distri: str) -> str:
331
+ """Classify a distribution name as ``"discrete"`` or ``"continuous"``.
332
+
333
+ Port of ``class_distri`` (``stat_theodensity.R``). R first checks fixed
334
+ discrete/continuous name sets and only falls back to an empirical
335
+ ``r<distri>`` probe for user-defined distributions in the calling
336
+ environment. The Python port supports the built-in distributions of the
337
+ mapping table; unknown names raise.
338
+
339
+ Parameters
340
+ ----------
341
+ distri : str
342
+ Distribution name without the ``d``/``r``/``p``/``q`` prefix.
343
+
344
+ Returns
345
+ -------
346
+ str
347
+ ``"discrete"`` or ``"continuous"``.
348
+
349
+ Raises
350
+ ------
351
+ ValueError
352
+ If the distribution cannot be classified.
353
+ """
354
+ discrete_distris = (
355
+ "pois",
356
+ "nbinom",
357
+ "binom",
358
+ "geom",
359
+ "hyper",
360
+ "signrank",
361
+ "multinom",
362
+ "wilcox",
363
+ )
364
+ if distri in discrete_distris:
365
+ return "discrete"
366
+
367
+ conti_distris = (
368
+ "beta",
369
+ "cauchy",
370
+ "chisq",
371
+ "exp",
372
+ "f",
373
+ "gamma",
374
+ "lnorm",
375
+ "norm",
376
+ "t",
377
+ "unif",
378
+ "weibull",
379
+ "logis",
380
+ )
381
+ if distri in conti_distris:
382
+ return "continuous"
383
+
384
+ # R performs an empirical probe of a user-supplied ``r<distri>`` function in
385
+ # the calling environment. That is out of scope for the port; abort like R
386
+ # would when it cannot determine the type.
387
+ cli_abort(
388
+ f"`stat_theodensity()` failed to determine if the '{distri}' "
389
+ "distribution is discrete or continuous."
390
+ )
391
+
392
+
393
+ # ---------------------------------------------------------------------------
394
+ # ggproto
395
+ # ---------------------------------------------------------------------------
396
+
397
+
398
+ class StatTheoDensity(StatDensity):
399
+ """Fit a theoretical distribution by MLE and evaluate its density.
400
+
401
+ Extends :class:`ggplot2_py.StatDensity`. The kernel-density computation of
402
+ the parent is replaced by maximum-likelihood fitting of a named theoretical
403
+ distribution followed by evaluation of its probability density (continuous)
404
+ or mass (discrete) function.
405
+
406
+ Attributes
407
+ ----------
408
+ default_aes : dict
409
+ Inherited from :class:`StatDensity`, mapping ``x``/``y`` to
410
+ ``after_stat(density)``.
411
+ """
412
+
413
+ extra_params: List[str] = ["na_rm", "orientation"]
414
+
415
+ def compute_group(
416
+ self,
417
+ data: pd.DataFrame,
418
+ scales: Any,
419
+ distri: str = "norm",
420
+ n: int = 512,
421
+ distri_type: str = "continuous",
422
+ fix_arg: Optional[Dict[str, Any]] = None,
423
+ start_arg: Optional[Dict[str, Any]] = None,
424
+ **kwargs: Any,
425
+ ) -> pd.DataFrame:
426
+ """Fit ``distri`` to ``data['x']`` and evaluate its density on a grid.
427
+
428
+ Parameters
429
+ ----------
430
+ data : pandas.DataFrame
431
+ Must contain an ``x`` column.
432
+ scales : dict-like
433
+ Panel scales; ``scales['x'].dimension()`` provides the evaluation
434
+ range.
435
+ distri : str, default ``"norm"``
436
+ Distribution name (without prefix).
437
+ n : int, default 512
438
+ Number of equally spaced evaluation points (continuous only).
439
+ distri_type : str, default ``"continuous"``
440
+ Either ``"continuous"`` or ``"discrete"``.
441
+ fix_arg : dict, optional
442
+ Fixed parameters in R parameterization.
443
+ start_arg : dict, optional
444
+ Starting parameters (consumed by some fitters).
445
+
446
+ Returns
447
+ -------
448
+ pandas.DataFrame
449
+ Columns ``x``, ``density``, ``scaled``, ``count``, ``n`` on success;
450
+ a single NaN row with columns ``x``, ``density``, ``ndensity``,
451
+ ``count``, ``n`` on failure (``< 2`` points or estimation failure).
452
+ """
453
+ _require_scipy()
454
+
455
+ # Data to return upon failure (mirrors R's ``nulldata``).
456
+ nulldata = pd.DataFrame(
457
+ {
458
+ "x": [np.nan],
459
+ "density": [np.nan],
460
+ "ndensity": [np.nan],
461
+ "count": [np.nan],
462
+ "n": [np.nan],
463
+ }
464
+ )
465
+
466
+ entry = _DISTRI_TABLE.get(distri)
467
+ if entry is None:
468
+ cli_abort(
469
+ "The `distri` argument must have a valid density function "
470
+ f"called `d{distri}`."
471
+ )
472
+
473
+ x_all = np.asarray(data["x"].to_numpy(), dtype=float)
474
+ x = x_all[~np.isnan(x_all)]
475
+ nx = len(data["x"]) # R uses length(data$x), i.e. including NA rows.
476
+
477
+ if nx < 2:
478
+ cli_warn("Groups with fewer than two data points have been dropped.")
479
+ return nulldata
480
+
481
+ scale = (
482
+ scales.get("x")
483
+ if isinstance(scales, dict)
484
+ else getattr(scales, "x", None)
485
+ )
486
+ if scale is not None and hasattr(scale, "dimension"):
487
+ rng = tuple(scale.dimension())
488
+ else:
489
+ rng = (float(np.nanmin(x_all)), float(np.nanmax(x_all)))
490
+
491
+ if distri_type == "discrete":
492
+ xseq = np.arange(np.floor(rng[0]), np.ceil(rng[1]) + 1, 1.0)
493
+ else:
494
+ xseq = np.linspace(rng[0], rng[1], n)
495
+
496
+ # Maximum-likelihood estimation (replaces fitdistrplus::fitdist).
497
+ try:
498
+ params = entry["fitter"](x, fix_arg, start_arg)
499
+ except Exception: # noqa: BLE001 - any estimation failure -> nulldata
500
+ cli_warn(f"Failed to estimate parameters of '{distri}' distribution.")
501
+ return nulldata
502
+
503
+ par_values = np.asarray(list(params.values()), dtype=float)
504
+ if (
505
+ par_values.size == 0
506
+ or np.any(np.isnan(par_values))
507
+ or not np.all(np.isfinite(par_values))
508
+ ):
509
+ cli_warn(f"Failed to estimate parameters of '{distri}' distribution.")
510
+ return nulldata
511
+
512
+ dens = np.asarray(entry["pdf"](xseq, params), dtype=float)
513
+
514
+ dens_max = np.nanmax(dens)
515
+ return pd.DataFrame(
516
+ {
517
+ "x": xseq,
518
+ "density": dens,
519
+ "scaled": dens / dens_max,
520
+ "count": dens * nx,
521
+ "n": nx,
522
+ }
523
+ )
524
+
525
+ def setup_params(
526
+ self, data: pd.DataFrame, params: Dict[str, Any]
527
+ ) -> Dict[str, Any]:
528
+ """Classify the distribution and apply R's parameter remaps.
529
+
530
+ Parameters
531
+ ----------
532
+ data : pandas.DataFrame
533
+ Layer data; ``data['x']`` is inspected for integrality.
534
+ params : dict
535
+ Stat parameters (``distri``, ``fix_arg``, ``start_arg``, ...).
536
+
537
+ Returns
538
+ -------
539
+ dict
540
+ Updated parameters with ``distri_type`` injected and ``chisq`` /
541
+ ``binom`` remaps applied.
542
+
543
+ Raises
544
+ ------
545
+ ValueError
546
+ If a discrete distribution is requested for non-integer data.
547
+ """
548
+ distri = params.get("distri", "norm")
549
+ dtype = _class_distri(distri)
550
+ if dtype == "discrete":
551
+ x = np.asarray(data["x"].to_numpy(), dtype=float)
552
+ x = x[~np.isnan(x)]
553
+ if float(np.sum(np.abs(np.mod(x, 1)))) > 0:
554
+ cli_abort(
555
+ f"A discrete '{distri}' distribution cannot be fitted "
556
+ "to continuous data."
557
+ )
558
+ params = dict(params)
559
+ params["distri_type"] = dtype
560
+
561
+ # Chi square estimator causes trouble; estimate as gamma with rate=0.5.
562
+ if params.get("distri") == "chisq":
563
+ params["distri"] = "gamma"
564
+ fix_arg = params.get("fix_arg")
565
+ if fix_arg is None:
566
+ params["fix_arg"] = {"rate": 0.5}
567
+ else:
568
+ params["fix_arg"] = {
569
+ "shape": float(fix_arg["df"]) / 2.0,
570
+ "rate": 0.5,
571
+ }
572
+
573
+ # Binomial does not operate without a fixed size.
574
+ if params.get("distri") == "binom":
575
+ x = np.asarray(data["x"].to_numpy(), dtype=float)
576
+ x = x[~np.isnan(x)]
577
+ if params.get("fix_arg") is None:
578
+ params["fix_arg"] = {"size": int(np.max(x))}
579
+ cli_inform(
580
+ "Estimating binomial PMF with size set to maximum data value."
581
+ )
582
+ params["start_arg"] = {
583
+ "prob": float(np.mean(x)) / float(np.max(x))
584
+ }
585
+
586
+ return params
587
+
588
+
589
+ # ---------------------------------------------------------------------------
590
+ # Constructor
591
+ # ---------------------------------------------------------------------------
592
+
593
+
594
+ def stat_theodensity(
595
+ mapping: Optional[Any] = None,
596
+ data: Any = None,
597
+ geom: str = "line",
598
+ position: str = "identity",
599
+ *,
600
+ distri: str = "norm",
601
+ n: int = 512,
602
+ fix_arg: Optional[Dict[str, Any]] = None,
603
+ start_arg: Optional[Dict[str, Any]] = None,
604
+ na_rm: bool = True,
605
+ show_legend: Optional[bool] = None,
606
+ inherit_aes: bool = True,
607
+ **kwargs: Any,
608
+ ) -> Any:
609
+ """Construct a fitted-theoretical-density layer.
610
+
611
+ Estimates the parameters of ``distri`` by maximum likelihood and evaluates
612
+ its probability density function, useful for comparing histograms or kernel
613
+ density estimates against a theoretical distribution.
614
+
615
+ Parameters
616
+ ----------
617
+ mapping : aes, optional
618
+ Aesthetic mapping.
619
+ data : DataFrame or callable, optional
620
+ Layer data.
621
+ geom : str, default ``"line"``
622
+ Geometry used to render the layer.
623
+ position : str, default ``"identity"``
624
+ Position adjustment.
625
+ distri : str, default ``"norm"``
626
+ Distribution name without prefix (e.g. ``"norm"``, ``"nbinom"``). See
627
+ :data:`_DISTRI_TABLE` for supported names.
628
+ n : int, default 512
629
+ Number of equally spaced evaluation points (ignored for discrete
630
+ distributions).
631
+ fix_arg : dict, optional
632
+ Fixed parameters of the named distribution (R parameterization).
633
+ start_arg : dict, optional
634
+ Starting parameters for the estimation.
635
+ na_rm : bool, default True
636
+ Whether to silently remove missing values.
637
+ show_legend : bool, optional
638
+ Whether to show this layer in the legend.
639
+ inherit_aes : bool, default True
640
+ Whether to inherit aesthetics from the plot.
641
+ **kwargs
642
+ Additional parameters forwarded to the layer.
643
+
644
+ Returns
645
+ -------
646
+ Layer
647
+ A ggplot2_py layer.
648
+
649
+ Raises
650
+ ------
651
+ ValueError
652
+ If ``distri`` has no known density function, or names an unsupported
653
+ distribution (``multinom``/``hyper``/``wilcox``/``signrank``).
654
+ """
655
+ if distri not in _DISTRI_TABLE:
656
+ cli_abort(
657
+ "The `distri` argument must have a valid density function "
658
+ f"called `d{distri}`."
659
+ )
660
+ if distri in _UNSUPPORTED:
661
+ cli_abort(
662
+ f"`stat_theodensity()` does not support the '{distri}' distribution."
663
+ )
664
+
665
+ return _layer(
666
+ stat=StatTheoDensity,
667
+ geom=geom,
668
+ data=data,
669
+ mapping=mapping,
670
+ position=position,
671
+ show_legend=show_legend,
672
+ inherit_aes=inherit_aes,
673
+ params={
674
+ "distri": distri,
675
+ "n": n,
676
+ "fix_arg": fix_arg,
677
+ "start_arg": start_arg,
678
+ "na_rm": na_rm,
679
+ **kwargs,
680
+ },
681
+ )