onekit 3.0.0__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onekit-3.0.0 → onekit-3.1.0}/PKG-INFO +1 -1
- {onekit-3.0.0 → onekit-3.1.0}/pyproject.toml +2 -2
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/numpykit.py +20 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/pythonkit.py +10 -3
- onekit-3.1.0/src/onekit/scipykit.py +126 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/vizkit.py +127 -0
- {onekit-3.0.0 → onekit-3.1.0}/LICENSE +0 -0
- {onekit-3.0.0 → onekit-3.1.0}/README.md +0 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/__init__.py +0 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/dekit.py +0 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/exception.py +0 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/mathkit.py +0 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/optfunckit.py +0 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/pandaskit.py +0 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/sklearnkit.py +0 -0
- {onekit-3.0.0 → onekit-3.1.0}/src/onekit/sparkkit.py +0 -0
|
@@ -22,7 +22,7 @@ requires-python = ">=3.11"
|
|
|
22
22
|
dependencies = []
|
|
23
23
|
|
|
24
24
|
[tool.poetry]
|
|
25
|
-
version = "3.
|
|
25
|
+
version = "3.1.0"
|
|
26
26
|
|
|
27
27
|
[project.optional-dependencies]
|
|
28
28
|
base = [
|
|
@@ -58,7 +58,7 @@ sphinx-copybutton = "^0.5.2"
|
|
|
58
58
|
time-machine = "^2.16.0"
|
|
59
59
|
|
|
60
60
|
[tool.poetry.group.packaging.dependencies]
|
|
61
|
-
python-semantic-release = "
|
|
61
|
+
python-semantic-release = "8.3.0"
|
|
62
62
|
|
|
63
63
|
[tool.black]
|
|
64
64
|
line-length = 88
|
|
@@ -8,6 +8,7 @@ from onekit import mathkit as mk
|
|
|
8
8
|
__all__ = (
|
|
9
9
|
"check_random_state",
|
|
10
10
|
"check_vector",
|
|
11
|
+
"create_boolean_array",
|
|
11
12
|
"digitscale",
|
|
12
13
|
"stderr",
|
|
13
14
|
)
|
|
@@ -91,6 +92,25 @@ def check_vector(x: ArrayLike, /, *, n_min: int = 1, n_max: int = np.inf) -> Vec
|
|
|
91
92
|
return x
|
|
92
93
|
|
|
93
94
|
|
|
95
|
+
# noinspection PyTypeChecker
|
|
96
|
+
def create_boolean_array(data: ArrayLike, pos_label: int | str) -> np.ndarray:
|
|
97
|
+
"""Returns a boolean array indicating positions of pos_label in input data.
|
|
98
|
+
|
|
99
|
+
Examples
|
|
100
|
+
--------
|
|
101
|
+
>>> from onekit import numpykit as npk
|
|
102
|
+
>>> data = [0, 1, 2, 1, 0, 1]
|
|
103
|
+
>>> npk.create_boolean_array(data, pos_label=1)
|
|
104
|
+
array([False, True, False, True, False, True])
|
|
105
|
+
|
|
106
|
+
>>> data = ["cat", "dog", "cat", "bird", "cat", "dog"]
|
|
107
|
+
>>> npk.create_boolean_array(data, pos_label="dog")
|
|
108
|
+
array([False, True, False, False, False, True])
|
|
109
|
+
"""
|
|
110
|
+
data_array = np.asarray(data)
|
|
111
|
+
return data_array == pos_label
|
|
112
|
+
|
|
113
|
+
|
|
94
114
|
def digitscale(x: ArrayLike, /, *, kind: str = "log") -> np.ndarray:
|
|
95
115
|
"""NumPy version of digitscale.
|
|
96
116
|
|
|
@@ -273,8 +273,8 @@ def coinflip(bias: float, /, *, seed: Seed = None) -> bool:
|
|
|
273
273
|
return rng.random() < bias
|
|
274
274
|
|
|
275
275
|
|
|
276
|
-
def concat_strings(sep: str, /, *strings: str | Iterable[str]) -> str:
|
|
277
|
-
"""Concatenate strings.
|
|
276
|
+
def concat_strings(sep: str, /, *strings: str | None | Iterable[str | None]) -> str:
|
|
277
|
+
"""Concatenate strings, excluding None values.
|
|
278
278
|
|
|
279
279
|
Examples
|
|
280
280
|
--------
|
|
@@ -296,7 +296,14 @@ def concat_strings(sep: str, /, *strings: str | Iterable[str]) -> str:
|
|
|
296
296
|
>>> list(map(ws_concat, [["Hello", "World"], ["Hi", "there"]]))
|
|
297
297
|
['Hello World', 'Hi there']
|
|
298
298
|
"""
|
|
299
|
-
return sep.join(
|
|
299
|
+
return sep.join(
|
|
300
|
+
toolz.pipe(
|
|
301
|
+
strings,
|
|
302
|
+
flatten,
|
|
303
|
+
curried.filter(lambda x: x is not None),
|
|
304
|
+
curried.map(str),
|
|
305
|
+
)
|
|
306
|
+
)
|
|
300
307
|
|
|
301
308
|
|
|
302
309
|
def contrast_sets(x: set, y: set, /, *, n: int = 3) -> dict:
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import operator
|
|
2
|
+
from typing import (
|
|
3
|
+
Iterable,
|
|
4
|
+
NamedTuple,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
from scipy import (
|
|
8
|
+
optimize,
|
|
9
|
+
stats,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from onekit import numpykit as npk
|
|
13
|
+
from onekit import pythonkit as pk
|
|
14
|
+
|
|
15
|
+
__all__ = (
|
|
16
|
+
"BetaParams",
|
|
17
|
+
"compute_beta_posterior",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class BetaParams(NamedTuple):
|
|
22
|
+
"""Represents the parameters of a Beta distribution."""
|
|
23
|
+
|
|
24
|
+
alpha: int | float = 1
|
|
25
|
+
beta: int | float = 1
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def mean(self) -> float:
|
|
29
|
+
"""Compute the mean of the Beta distribution."""
|
|
30
|
+
return self.alpha / (self.alpha + self.beta)
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def mode(self) -> float | None:
|
|
34
|
+
"""Compute the mode of the Beta distribution.
|
|
35
|
+
|
|
36
|
+
Note that the mode is undefined for alpha <= 1 or beta <= 1.
|
|
37
|
+
"""
|
|
38
|
+
if self.alpha > 1 and self.beta > 1:
|
|
39
|
+
return (self.alpha - 1) / (self.alpha + self.beta - 2)
|
|
40
|
+
|
|
41
|
+
def hdi(self, hdi_prob: float = 0.95) -> tuple[float, float] | None:
|
|
42
|
+
"""Compute the highest density interval (HDI) of the Beta distribution.
|
|
43
|
+
|
|
44
|
+
Note that the HDI is not computed for alpha <= 1 or beta <= 1.
|
|
45
|
+
"""
|
|
46
|
+
if self.alpha > 1 and self.beta > 1:
|
|
47
|
+
beta_dist = stats.beta(self.alpha, self.beta)
|
|
48
|
+
tail_prob = 1 - hdi_prob
|
|
49
|
+
|
|
50
|
+
def interval_width(x: float) -> float:
|
|
51
|
+
return beta_dist.ppf(hdi_prob + x) - beta_dist.ppf(x)
|
|
52
|
+
|
|
53
|
+
hdi_tail_prob = operator.getitem(
|
|
54
|
+
optimize.fmin(interval_width, tail_prob, ftol=1e-12, disp=False),
|
|
55
|
+
0,
|
|
56
|
+
)
|
|
57
|
+
hdi_endpoints = beta_dist.ppf([hdi_tail_prob, hdi_prob + hdi_tail_prob])
|
|
58
|
+
return hdi_endpoints[0], hdi_endpoints[1]
|
|
59
|
+
|
|
60
|
+
def get_summary(self, hdi_prob: float = 0.95) -> str:
|
|
61
|
+
"""Compute summary statistics of the Beta distribution."""
|
|
62
|
+
mode = self.mode
|
|
63
|
+
mode_info = f"mode={pk.num_to_str(mode)}" if mode is not None else None
|
|
64
|
+
|
|
65
|
+
hdi_info = None
|
|
66
|
+
hdi_endpoints = self.hdi(hdi_prob)
|
|
67
|
+
if hdi_endpoints is not None:
|
|
68
|
+
hdi_pct = pk.num_to_str(100 * hdi_prob)
|
|
69
|
+
hdi_lower_endpoint, hdi_upper_endpoint = map(pk.num_to_str, hdi_endpoints)
|
|
70
|
+
hdi_info = f"{hdi_pct}%-HDI=[{hdi_lower_endpoint}, {hdi_upper_endpoint}]"
|
|
71
|
+
|
|
72
|
+
return pk.concat_strings(
|
|
73
|
+
" ",
|
|
74
|
+
f"{self} ->",
|
|
75
|
+
f"mean={pk.num_to_str(self.mean)}",
|
|
76
|
+
mode_info,
|
|
77
|
+
hdi_info,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def compute_beta_posterior(
|
|
82
|
+
data: Iterable[int | str],
|
|
83
|
+
prior: BetaParams | None = None,
|
|
84
|
+
pos_label: int | str = 1,
|
|
85
|
+
) -> BetaParams:
|
|
86
|
+
"""Update Beta prior with observed binomial data to compute posterior.
|
|
87
|
+
|
|
88
|
+
This function applies Bayesian inference to update the parameters of a Beta
|
|
89
|
+
distribution, given observed binomial data. The Beta distribution is commonly used
|
|
90
|
+
as a prior in binomial proportion estimation due to its conjugacy, simplifying the
|
|
91
|
+
calculation of the posterior.
|
|
92
|
+
|
|
93
|
+
Examples
|
|
94
|
+
--------
|
|
95
|
+
>>> from onekit import scipykit as sck
|
|
96
|
+
>>> from onekit.scipykit import BetaParams
|
|
97
|
+
>>> data = [1, 0, 1, 1, 0]
|
|
98
|
+
>>> posterior = sck.compute_beta_posterior(data)
|
|
99
|
+
>>> posterior.get_summary()
|
|
100
|
+
'BetaParams(alpha=4, beta=3) -> mean=0.571429 mode=0.6 95%-HDI=[0.238706, 0.895169]'
|
|
101
|
+
|
|
102
|
+
>>> data = ["head", "tail", "head", "head", "tail", "head", "head", "tail"]
|
|
103
|
+
>>> prior = BetaParams(alpha=2, beta=2)
|
|
104
|
+
>>> posterior = sck.compute_beta_posterior(data, prior, pos_label="head")
|
|
105
|
+
>>> posterior.get_summary()
|
|
106
|
+
'BetaParams(alpha=7, beta=5) -> mean=0.583333 mode=0.6 95%-HDI=[0.318232, 0.841428]'
|
|
107
|
+
|
|
108
|
+
>>> data = [1, 0, 1, 1, 0]
|
|
109
|
+
>>> prior = BetaParams(alpha=1, beta=1)
|
|
110
|
+
>>> posterior1 = sck.compute_beta_posterior(data, prior)
|
|
111
|
+
>>> posterior1.get_summary()
|
|
112
|
+
'BetaParams(alpha=4, beta=3) -> mean=0.571429 mode=0.6 95%-HDI=[0.238706, 0.895169]'
|
|
113
|
+
>>> more_data = [1, 0, 1, 0, 1]
|
|
114
|
+
>>> posterior2 = sck.compute_beta_posterior(more_data, prior=posterior1)
|
|
115
|
+
>>> posterior2.get_summary()
|
|
116
|
+
'BetaParams(alpha=7, beta=5) -> mean=0.583333 mode=0.6 95%-HDI=[0.318232, 0.841428]'
|
|
117
|
+
"""
|
|
118
|
+
prior = prior or BetaParams()
|
|
119
|
+
y = npk.create_boolean_array(data, pos_label)
|
|
120
|
+
num_successes = y.sum()
|
|
121
|
+
num_trials = len(y)
|
|
122
|
+
posterior = BetaParams(
|
|
123
|
+
alpha=prior.alpha + num_successes,
|
|
124
|
+
beta=prior.beta + num_trials - num_successes,
|
|
125
|
+
)
|
|
126
|
+
return posterior
|
|
@@ -11,9 +11,13 @@ from matplotlib.axes import Axes
|
|
|
11
11
|
from matplotlib.figure import Figure
|
|
12
12
|
from mpl_toolkits.axes_grid1 import make_axes_locatable
|
|
13
13
|
from mpl_toolkits.mplot3d.axes3d import Axes3D
|
|
14
|
+
from scipy import (
|
|
15
|
+
stats,
|
|
16
|
+
)
|
|
14
17
|
|
|
15
18
|
from onekit import numpykit as npk
|
|
16
19
|
from onekit import pythonkit as pk
|
|
20
|
+
from onekit.scipykit import BetaParams
|
|
17
21
|
|
|
18
22
|
ArrayLike = npt.ArrayLike
|
|
19
23
|
|
|
@@ -23,6 +27,7 @@ __all__ = (
|
|
|
23
27
|
"create_xy_points",
|
|
24
28
|
"create_xyz_points",
|
|
25
29
|
"discrete_cmap",
|
|
30
|
+
"plot_beta_distribution",
|
|
26
31
|
"plot_contour",
|
|
27
32
|
"plot_digitscale",
|
|
28
33
|
"plot_line",
|
|
@@ -432,6 +437,128 @@ def discrete_cmap(
|
|
|
432
437
|
return [cmap(i) for i in np.linspace(lower_bound, upper_bound, num=n)]
|
|
433
438
|
|
|
434
439
|
|
|
440
|
+
def plot_beta_distribution(
|
|
441
|
+
alpha: int | float,
|
|
442
|
+
beta: int | float,
|
|
443
|
+
hdi_prob: float = 0.95,
|
|
444
|
+
n_xvalues: int = 1001,
|
|
445
|
+
ax=None,
|
|
446
|
+
) -> Axes:
|
|
447
|
+
"""Plot Beta distribution with HDI.
|
|
448
|
+
|
|
449
|
+
See Also
|
|
450
|
+
--------
|
|
451
|
+
onekit.scipykit.BetaParams : Beta parameters
|
|
452
|
+
|
|
453
|
+
Examples
|
|
454
|
+
--------
|
|
455
|
+
>>> from onekit import vizkit as vk
|
|
456
|
+
>>> vk.plot_beta_distribution(alpha=2, beta=2) # doctest: +SKIP
|
|
457
|
+
"""
|
|
458
|
+
ax = ax or plt.gca()
|
|
459
|
+
beta_params = BetaParams(alpha, beta)
|
|
460
|
+
|
|
461
|
+
def beta_density(x: float) -> float:
|
|
462
|
+
return stats.beta.pdf(x, beta_params.alpha, beta_params.beta)
|
|
463
|
+
|
|
464
|
+
plotter = FunctionPlotter(
|
|
465
|
+
beta_density,
|
|
466
|
+
[(0, 1)],
|
|
467
|
+
n_xvalues=n_xvalues,
|
|
468
|
+
kws_plot=dict(zorder=1, alpha=0.7),
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
_, ax, _ = plotter.plot(ax=ax)
|
|
472
|
+
xy_points = getattr(plotter, "_xyz_pts")
|
|
473
|
+
x = xy_points.x
|
|
474
|
+
y = xy_points.y.ravel()
|
|
475
|
+
|
|
476
|
+
hdi_endpoints = beta_params.hdi(hdi_prob)
|
|
477
|
+
if hdi_endpoints is not None:
|
|
478
|
+
hdi_lower_endpoint, hdi_upper_endpoint = hdi_endpoints
|
|
479
|
+
ax.fill_between(
|
|
480
|
+
x,
|
|
481
|
+
y,
|
|
482
|
+
where=np.logical_and(x >= hdi_lower_endpoint, x <= hdi_upper_endpoint),
|
|
483
|
+
color="skyblue",
|
|
484
|
+
alpha=0.5,
|
|
485
|
+
zorder=0,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
# add HDI endpoint labels
|
|
489
|
+
pad = 1.2
|
|
490
|
+
for endpoint in [hdi_lower_endpoint, hdi_upper_endpoint]:
|
|
491
|
+
density_of_endpoint = beta_density(endpoint)
|
|
492
|
+
ax.text(endpoint, pad * density_of_endpoint, f"{endpoint:.3f}", ha="center")
|
|
493
|
+
ax.plot(
|
|
494
|
+
[endpoint, endpoint],
|
|
495
|
+
[0, density_of_endpoint],
|
|
496
|
+
color="black",
|
|
497
|
+
linestyle="--",
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
# draw line segments
|
|
501
|
+
ax.plot(
|
|
502
|
+
[hdi_lower_endpoint, hdi_upper_endpoint],
|
|
503
|
+
[beta_density(hdi_lower_endpoint), beta_density(hdi_upper_endpoint)],
|
|
504
|
+
color="black",
|
|
505
|
+
linewidth=2,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
# add HDI info
|
|
509
|
+
hdi_info = f"{pk.num_to_str(100 * hdi_prob)}% HDI"
|
|
510
|
+
hdi_mid_point = float(np.mean([hdi_lower_endpoint, hdi_upper_endpoint]))
|
|
511
|
+
ax.text(
|
|
512
|
+
hdi_mid_point,
|
|
513
|
+
1.25 * pad * beta_density(hdi_lower_endpoint),
|
|
514
|
+
hdi_info,
|
|
515
|
+
ha="center",
|
|
516
|
+
fontsize=12,
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# Add mode to the legend
|
|
520
|
+
ax.scatter(
|
|
521
|
+
beta_params.mode,
|
|
522
|
+
0,
|
|
523
|
+
marker="v",
|
|
524
|
+
color="darkorange",
|
|
525
|
+
label=f"mode={pk.num_to_str(beta_params.mode)}",
|
|
526
|
+
alpha=0.6,
|
|
527
|
+
)
|
|
528
|
+
else:
|
|
529
|
+
ax.fill_between(
|
|
530
|
+
x,
|
|
531
|
+
y,
|
|
532
|
+
color="skyblue",
|
|
533
|
+
alpha=0.5,
|
|
534
|
+
zorder=0,
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
# Add mean to the legend
|
|
538
|
+
ax.scatter(
|
|
539
|
+
beta_params.mean,
|
|
540
|
+
0,
|
|
541
|
+
marker="v",
|
|
542
|
+
color="black",
|
|
543
|
+
label=f"mean={pk.num_to_str(beta_params.mean)}",
|
|
544
|
+
alpha=0.6,
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
ax.legend(
|
|
548
|
+
loc="best",
|
|
549
|
+
title=f"Beta(alpha={beta_params.alpha}, beta={beta_params.beta})",
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
ax.set_xlabel(r"$\theta$")
|
|
553
|
+
ax.set_ylabel(
|
|
554
|
+
r"$p(\theta \mid {}, {})$".format(beta_params.alpha, beta_params.beta)
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
ax.set_xticks(np.arange(0, 1.1, 0.1))
|
|
558
|
+
|
|
559
|
+
return ax
|
|
560
|
+
|
|
561
|
+
|
|
435
562
|
def plot_contour(
|
|
436
563
|
xyz_pts: XyzPoints,
|
|
437
564
|
/,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|