pandas-plots 0.15.10__tar.gz → 0.15.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pandas_plots-0.15.10 → pandas_plots-0.15.13}/PKG-INFO +1 -1
- {pandas_plots-0.15.10 → pandas_plots-0.15.13}/pyproject.toml +1 -1
- {pandas_plots-0.15.10 → pandas_plots-0.15.13}/src/pandas_plots/hlp.py +23 -28
- {pandas_plots-0.15.10 → pandas_plots-0.15.13}/src/pandas_plots/tbl.py +2 -3
- pandas_plots-0.15.10/src/pandas_plots/.DS_Store +0 -0
- {pandas_plots-0.15.10 → pandas_plots-0.15.13}/README.md +0 -0
- {pandas_plots-0.15.10 → pandas_plots-0.15.13}/src/pandas_plots/__init__.py +0 -0
- {pandas_plots-0.15.10 → pandas_plots-0.15.13}/src/pandas_plots/pls.py +0 -0
- {pandas_plots-0.15.10 → pandas_plots-0.15.13}/src/pandas_plots/ven.py +0 -0
@@ -21,49 +21,44 @@ from PIL import Image
|
|
21
21
|
|
22
22
|
URL_REGEX = r"^(?:http|ftp)s?://" # https://stackoverflow.com/a/1617386
|
23
23
|
|
24
|
-
|
24
|
+
|
25
|
+
def mean_confidence_interval(df, confidence=0.95, use_median=False):
|
25
26
|
"""
|
26
|
-
Calculate the mean or median and confidence interval.
|
27
|
-
|
27
|
+
Calculate the mean or median and confidence interval of the input dataframe.
|
28
|
+
Source: https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
|
28
29
|
|
29
30
|
Parameters:
|
30
|
-
|
31
|
+
df (array-like): The input dataframe.
|
31
32
|
confidence (float, optional): The confidence level for the interval. Defaults to 0.95.
|
32
|
-
use_median (bool, optional): If True, calculates median and
|
33
|
-
n_bootstraps (int, optional): Number of bootstrap samples for median CI. Only used if use_median is True.
|
33
|
+
use_median (bool, optional): If True, calculates median and confidence interval instead of mean. Defaults to False.
|
34
34
|
|
35
35
|
Returns:
|
36
|
-
tuple: A tuple containing the central value (mean or median),
|
36
|
+
tuple: A tuple containing the central value (mean or median), interval, lower bound, and upper bound.
|
37
37
|
"""
|
38
|
-
|
39
|
-
if
|
40
|
-
return
|
41
|
-
a = 1.0 * np.array(
|
38
|
+
df = to_series(df)
|
39
|
+
if df is None:
|
40
|
+
return None
|
41
|
+
a = 1.0 * np.array(df)
|
42
42
|
n = len(a)
|
43
43
|
|
44
44
|
if use_median:
|
45
|
-
if n < 2: # Cannot bootstrap with n < 2
|
46
|
-
return np.median(a), np.nan, np.nan, np.nan
|
47
|
-
|
48
|
-
bootstrapped_medians = []
|
49
|
-
for _ in range(n_bootstraps):
|
50
|
-
sample = np.random.choice(a, size=n, replace=True)
|
51
|
-
bootstrapped_medians.append(np.median(sample))
|
52
|
-
|
53
45
|
median = np.median(a)
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
margin = (upper_bound - lower_bound) / 2 # Simple approximation for margin based on interval width
|
58
|
-
return median, margin, lower_bound, upper_bound
|
46
|
+
se = 1.253 * scipy.stats.sem(a) # Approximate standard error for median
|
47
|
+
margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
|
48
|
+
return median, margin, median - margin, median + margin
|
59
49
|
else:
|
60
|
-
mean = np.mean(a)
|
61
|
-
if n <= 1:
|
62
|
-
return mean, np.nan, np.nan, np.nan
|
63
|
-
se = scipy.stats.sem(a)
|
50
|
+
mean, se = np.mean(a), scipy.stats.sem(a)
|
64
51
|
margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
|
65
52
|
return mean, margin, mean - margin, mean + margin
|
66
53
|
|
54
|
+
# # * Alternative
|
55
|
+
# # from statistics import NormalDist
|
56
|
+
# def confidence_interval(data, confidence=0.95):
|
57
|
+
# dist = NormalDist.from_samples(data)
|
58
|
+
# z = NormalDist().inv_cdf((1 + confidence) / 2.)
|
59
|
+
# h = dist.stdev * z / ((len(data) - 1) ** .5)
|
60
|
+
# return dist.mean - h, dist.mean + h
|
61
|
+
|
67
62
|
|
68
63
|
def to_series(df) -> pd.Series | None:
|
69
64
|
"""
|
@@ -112,7 +112,7 @@ def describe_df(
|
|
112
112
|
df=df,
|
113
113
|
caption="dataframe",
|
114
114
|
use_plot=True,
|
115
|
-
renderer=
|
115
|
+
renderer=None,
|
116
116
|
template="plotly",
|
117
117
|
fig_cols=3,
|
118
118
|
fig_offset=None,
|
@@ -264,7 +264,7 @@ def describe_df(
|
|
264
264
|
)
|
265
265
|
|
266
266
|
fig.show(
|
267
|
-
renderer,
|
267
|
+
renderer=renderer or os.getenv("RENDERER"),
|
268
268
|
width=fig_width * fig_cols, # <-- Set width here
|
269
269
|
height=fig_rowheight * fig_rows, # <-- Set height here
|
270
270
|
)
|
@@ -273,7 +273,6 @@ def describe_df(
|
|
273
273
|
import missingno as msno
|
274
274
|
msno.matrix(df_, figsize=missing_figsize)
|
275
275
|
|
276
|
-
|
277
276
|
def pivot_df(
|
278
277
|
df: pd.DataFrame,
|
279
278
|
dropna: bool = False,
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|