onekit 3.0.0__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: onekit
3
- Version: 3.0.0
3
+ Version: 3.1.0
4
4
  Summary: All-in-One Python Kit.
5
5
  License: BSD 3-Clause
6
6
  Keywords: onekit
@@ -22,7 +22,7 @@ requires-python = ">=3.11"
22
22
  dependencies = []
23
23
 
24
24
  [tool.poetry]
25
- version = "3.0.0"
25
+ version = "3.1.0"
26
26
 
27
27
  [project.optional-dependencies]
28
28
  base = [
@@ -58,7 +58,7 @@ sphinx-copybutton = "^0.5.2"
58
58
  time-machine = "^2.16.0"
59
59
 
60
60
  [tool.poetry.group.packaging.dependencies]
61
- python-semantic-release = "^8.3.0"
61
+ python-semantic-release = "8.3.0"
62
62
 
63
63
  [tool.black]
64
64
  line-length = 88
@@ -8,6 +8,7 @@ from onekit import mathkit as mk
8
8
  __all__ = (
9
9
  "check_random_state",
10
10
  "check_vector",
11
+ "create_boolean_array",
11
12
  "digitscale",
12
13
  "stderr",
13
14
  )
@@ -91,6 +92,25 @@ def check_vector(x: ArrayLike, /, *, n_min: int = 1, n_max: int = np.inf) -> Vec
91
92
  return x
92
93
 
93
94
 
95
+ # noinspection PyTypeChecker
96
+ def create_boolean_array(data: ArrayLike, pos_label: int | str) -> np.ndarray:
97
+ """Returns a boolean array indicating positions of pos_label in input data.
98
+
99
+ Examples
100
+ --------
101
+ >>> from onekit import numpykit as npk
102
+ >>> data = [0, 1, 2, 1, 0, 1]
103
+ >>> npk.create_boolean_array(data, pos_label=1)
104
+ array([False, True, False, True, False, True])
105
+
106
+ >>> data = ["cat", "dog", "cat", "bird", "cat", "dog"]
107
+ >>> npk.create_boolean_array(data, pos_label="dog")
108
+ array([False, True, False, False, False, True])
109
+ """
110
+ data_array = np.asarray(data)
111
+ return data_array == pos_label
112
+
113
+
94
114
  def digitscale(x: ArrayLike, /, *, kind: str = "log") -> np.ndarray:
95
115
  """NumPy version of digitscale.
96
116
 
@@ -273,8 +273,8 @@ def coinflip(bias: float, /, *, seed: Seed = None) -> bool:
273
273
  return rng.random() < bias
274
274
 
275
275
 
276
- def concat_strings(sep: str, /, *strings: str | Iterable[str]) -> str:
277
- """Concatenate strings.
276
+ def concat_strings(sep: str, /, *strings: str | None | Iterable[str | None]) -> str:
277
+ """Concatenate strings, excluding None values.
278
278
 
279
279
  Examples
280
280
  --------
@@ -296,7 +296,14 @@ def concat_strings(sep: str, /, *strings: str | Iterable[str]) -> str:
296
296
  >>> list(map(ws_concat, [["Hello", "World"], ["Hi", "there"]]))
297
297
  ['Hello World', 'Hi there']
298
298
  """
299
- return sep.join(toolz.pipe(strings, flatten, curried.map(str)))
299
+ return sep.join(
300
+ toolz.pipe(
301
+ strings,
302
+ flatten,
303
+ curried.filter(lambda x: x is not None),
304
+ curried.map(str),
305
+ )
306
+ )
300
307
 
301
308
 
302
309
  def contrast_sets(x: set, y: set, /, *, n: int = 3) -> dict:
@@ -0,0 +1,126 @@
1
+ import operator
2
+ from typing import (
3
+ Iterable,
4
+ NamedTuple,
5
+ )
6
+
7
+ from scipy import (
8
+ optimize,
9
+ stats,
10
+ )
11
+
12
+ from onekit import numpykit as npk
13
+ from onekit import pythonkit as pk
14
+
15
+ __all__ = (
16
+ "BetaParams",
17
+ "compute_beta_posterior",
18
+ )
19
+
20
+
21
+ class BetaParams(NamedTuple):
22
+ """Represents the parameters of a Beta distribution."""
23
+
24
+ alpha: int | float = 1
25
+ beta: int | float = 1
26
+
27
+ @property
28
+ def mean(self) -> float:
29
+ """Compute the mean of the Beta distribution."""
30
+ return self.alpha / (self.alpha + self.beta)
31
+
32
+ @property
33
+ def mode(self) -> float | None:
34
+ """Compute the mode of the Beta distribution.
35
+
36
+ Note that the mode is undefined for alpha <= 1 or beta <= 1.
37
+ """
38
+ if self.alpha > 1 and self.beta > 1:
39
+ return (self.alpha - 1) / (self.alpha + self.beta - 2)
40
+
41
+ def hdi(self, hdi_prob: float = 0.95) -> tuple[float, float] | None:
42
+ """Compute the highest density interval (HDI) of the Beta distribution.
43
+
44
+ Note that the HDI is not computed for alpha <= 1 or beta <= 1.
45
+ """
46
+ if self.alpha > 1 and self.beta > 1:
47
+ beta_dist = stats.beta(self.alpha, self.beta)
48
+ tail_prob = 1 - hdi_prob
49
+
50
+ def interval_width(x: float) -> float:
51
+ return beta_dist.ppf(hdi_prob + x) - beta_dist.ppf(x)
52
+
53
+ hdi_tail_prob = operator.getitem(
54
+ optimize.fmin(interval_width, tail_prob, ftol=1e-12, disp=False),
55
+ 0,
56
+ )
57
+ hdi_endpoints = beta_dist.ppf([hdi_tail_prob, hdi_prob + hdi_tail_prob])
58
+ return hdi_endpoints[0], hdi_endpoints[1]
59
+
60
+ def get_summary(self, hdi_prob: float = 0.95) -> str:
61
+ """Compute summary statistics of the Beta distribution."""
62
+ mode = self.mode
63
+ mode_info = f"mode={pk.num_to_str(mode)}" if mode is not None else None
64
+
65
+ hdi_info = None
66
+ hdi_endpoints = self.hdi(hdi_prob)
67
+ if hdi_endpoints is not None:
68
+ hdi_pct = pk.num_to_str(100 * hdi_prob)
69
+ hdi_lower_endpoint, hdi_upper_endpoint = map(pk.num_to_str, hdi_endpoints)
70
+ hdi_info = f"{hdi_pct}%-HDI=[{hdi_lower_endpoint}, {hdi_upper_endpoint}]"
71
+
72
+ return pk.concat_strings(
73
+ " ",
74
+ f"{self} ->",
75
+ f"mean={pk.num_to_str(self.mean)}",
76
+ mode_info,
77
+ hdi_info,
78
+ )
79
+
80
+
81
+ def compute_beta_posterior(
82
+ data: Iterable[int | str],
83
+ prior: BetaParams | None = None,
84
+ pos_label: int | str = 1,
85
+ ) -> BetaParams:
86
+ """Update Beta prior with observed binomial data to compute posterior.
87
+
88
+ This function applies Bayesian inference to update the parameters of a Beta
89
+ distribution, given observed binomial data. The Beta distribution is commonly used
90
+ as a prior in binomial proportion estimation due to its conjugacy, simplifying the
91
+ calculation of the posterior.
92
+
93
+ Examples
94
+ --------
95
+ >>> from onekit import scipykit as sck
96
+ >>> from onekit.scipykit import BetaParams
97
+ >>> data = [1, 0, 1, 1, 0]
98
+ >>> posterior = sck.compute_beta_posterior(data)
99
+ >>> posterior.get_summary()
100
+ 'BetaParams(alpha=4, beta=3) -> mean=0.571429 mode=0.6 95%-HDI=[0.238706, 0.895169]'
101
+
102
+ >>> data = ["head", "tail", "head", "head", "tail", "head", "head", "tail"]
103
+ >>> prior = BetaParams(alpha=2, beta=2)
104
+ >>> posterior = sck.compute_beta_posterior(data, prior, pos_label="head")
105
+ >>> posterior.get_summary()
106
+ 'BetaParams(alpha=7, beta=5) -> mean=0.583333 mode=0.6 95%-HDI=[0.318232, 0.841428]'
107
+
108
+ >>> data = [1, 0, 1, 1, 0]
109
+ >>> prior = BetaParams(alpha=1, beta=1)
110
+ >>> posterior1 = sck.compute_beta_posterior(data, prior)
111
+ >>> posterior1.get_summary()
112
+ 'BetaParams(alpha=4, beta=3) -> mean=0.571429 mode=0.6 95%-HDI=[0.238706, 0.895169]'
113
+ >>> more_data = [1, 0, 1, 0, 1]
114
+ >>> posterior2 = sck.compute_beta_posterior(more_data, prior=posterior1)
115
+ >>> posterior2.get_summary()
116
+ 'BetaParams(alpha=7, beta=5) -> mean=0.583333 mode=0.6 95%-HDI=[0.318232, 0.841428]'
117
+ """
118
+ prior = prior or BetaParams()
119
+ y = npk.create_boolean_array(data, pos_label)
120
+ num_successes = y.sum()
121
+ num_trials = len(y)
122
+ posterior = BetaParams(
123
+ alpha=prior.alpha + num_successes,
124
+ beta=prior.beta + num_trials - num_successes,
125
+ )
126
+ return posterior
@@ -11,9 +11,13 @@ from matplotlib.axes import Axes
11
11
  from matplotlib.figure import Figure
12
12
  from mpl_toolkits.axes_grid1 import make_axes_locatable
13
13
  from mpl_toolkits.mplot3d.axes3d import Axes3D
14
+ from scipy import (
15
+ stats,
16
+ )
14
17
 
15
18
  from onekit import numpykit as npk
16
19
  from onekit import pythonkit as pk
20
+ from onekit.scipykit import BetaParams
17
21
 
18
22
  ArrayLike = npt.ArrayLike
19
23
 
@@ -23,6 +27,7 @@ __all__ = (
23
27
  "create_xy_points",
24
28
  "create_xyz_points",
25
29
  "discrete_cmap",
30
+ "plot_beta_distribution",
26
31
  "plot_contour",
27
32
  "plot_digitscale",
28
33
  "plot_line",
@@ -432,6 +437,128 @@ def discrete_cmap(
432
437
  return [cmap(i) for i in np.linspace(lower_bound, upper_bound, num=n)]
433
438
 
434
439
 
440
+ def plot_beta_distribution(
441
+ alpha: int | float,
442
+ beta: int | float,
443
+ hdi_prob: float = 0.95,
444
+ n_xvalues: int = 1001,
445
+ ax=None,
446
+ ) -> Axes:
447
+ """Plot Beta distribution with HDI.
448
+
449
+ See Also
450
+ --------
451
+ onekit.scipykit.BetaParams : Beta parameters
452
+
453
+ Examples
454
+ --------
455
+ >>> from onekit import vizkit as vk
456
+ >>> vk.plot_beta_distribution(alpha=2, beta=2) # doctest: +SKIP
457
+ """
458
+ ax = ax or plt.gca()
459
+ beta_params = BetaParams(alpha, beta)
460
+
461
+ def beta_density(x: float) -> float:
462
+ return stats.beta.pdf(x, beta_params.alpha, beta_params.beta)
463
+
464
+ plotter = FunctionPlotter(
465
+ beta_density,
466
+ [(0, 1)],
467
+ n_xvalues=n_xvalues,
468
+ kws_plot=dict(zorder=1, alpha=0.7),
469
+ )
470
+
471
+ _, ax, _ = plotter.plot(ax=ax)
472
+ xy_points = getattr(plotter, "_xyz_pts")
473
+ x = xy_points.x
474
+ y = xy_points.y.ravel()
475
+
476
+ hdi_endpoints = beta_params.hdi(hdi_prob)
477
+ if hdi_endpoints is not None:
478
+ hdi_lower_endpoint, hdi_upper_endpoint = hdi_endpoints
479
+ ax.fill_between(
480
+ x,
481
+ y,
482
+ where=np.logical_and(x >= hdi_lower_endpoint, x <= hdi_upper_endpoint),
483
+ color="skyblue",
484
+ alpha=0.5,
485
+ zorder=0,
486
+ )
487
+
488
+ # add HDI endpoint labels
489
+ pad = 1.2
490
+ for endpoint in [hdi_lower_endpoint, hdi_upper_endpoint]:
491
+ density_of_endpoint = beta_density(endpoint)
492
+ ax.text(endpoint, pad * density_of_endpoint, f"{endpoint:.3f}", ha="center")
493
+ ax.plot(
494
+ [endpoint, endpoint],
495
+ [0, density_of_endpoint],
496
+ color="black",
497
+ linestyle="--",
498
+ )
499
+
500
+ # draw line segments
501
+ ax.plot(
502
+ [hdi_lower_endpoint, hdi_upper_endpoint],
503
+ [beta_density(hdi_lower_endpoint), beta_density(hdi_upper_endpoint)],
504
+ color="black",
505
+ linewidth=2,
506
+ )
507
+
508
+ # add HDI info
509
+ hdi_info = f"{pk.num_to_str(100 * hdi_prob)}% HDI"
510
+ hdi_mid_point = float(np.mean([hdi_lower_endpoint, hdi_upper_endpoint]))
511
+ ax.text(
512
+ hdi_mid_point,
513
+ 1.25 * pad * beta_density(hdi_lower_endpoint),
514
+ hdi_info,
515
+ ha="center",
516
+ fontsize=12,
517
+ )
518
+
519
+ # Add mode to the legend
520
+ ax.scatter(
521
+ beta_params.mode,
522
+ 0,
523
+ marker="v",
524
+ color="darkorange",
525
+ label=f"mode={pk.num_to_str(beta_params.mode)}",
526
+ alpha=0.6,
527
+ )
528
+ else:
529
+ ax.fill_between(
530
+ x,
531
+ y,
532
+ color="skyblue",
533
+ alpha=0.5,
534
+ zorder=0,
535
+ )
536
+
537
+ # Add mean to the legend
538
+ ax.scatter(
539
+ beta_params.mean,
540
+ 0,
541
+ marker="v",
542
+ color="black",
543
+ label=f"mean={pk.num_to_str(beta_params.mean)}",
544
+ alpha=0.6,
545
+ )
546
+
547
+ ax.legend(
548
+ loc="best",
549
+ title=f"Beta(alpha={beta_params.alpha}, beta={beta_params.beta})",
550
+ )
551
+
552
+ ax.set_xlabel(r"$\theta$")
553
+ ax.set_ylabel(
554
+ r"$p(\theta \mid {}, {})$".format(beta_params.alpha, beta_params.beta)
555
+ )
556
+
557
+ ax.set_xticks(np.arange(0, 1.1, 0.1))
558
+
559
+ return ax
560
+
561
+
435
562
  def plot_contour(
436
563
  xyz_pts: XyzPoints,
437
564
  /,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes