pandas-plots 0.15.10__tar.gz → 0.15.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pandas-plots
3
- Version: 0.15.10
3
+ Version: 0.15.13
4
4
  Summary: A collection of helper for table handling and visualization
5
5
  Keywords: tables,pivot,plotly,venn,plot,vizualization
6
6
  Author: smeisegeier
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pandas-plots"
3
- version = "0.15.10"
3
+ version = "0.15.13"
4
4
  description = "A collection of helper for table handling and visualization"
5
5
  long_description = "file: README.md"
6
6
  long_description_content_type = "text/markdown"
@@ -21,49 +21,44 @@ from PIL import Image
21
21
 
22
22
  URL_REGEX = r"^(?:http|ftp)s?://" # https://stackoverflow.com/a/1617386
23
23
 
24
- def mean_confidence_interval(data, confidence=0.95, use_median=False, n_bootstraps=1000):
24
+
25
+ def mean_confidence_interval(df, confidence=0.95, use_median=False):
25
26
  """
26
- Calculate the mean or median and confidence interval.
27
- For median, uses bootstrapping for a more robust confidence interval.
27
+ Calculate the mean or median and confidence interval of the input dataframe.
28
+ Source: https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
28
29
 
29
30
  Parameters:
30
- data (array-like): The input data.
31
+ df (array-like): The input dataframe.
31
32
  confidence (float, optional): The confidence level for the interval. Defaults to 0.95.
32
- use_median (bool, optional): If True, calculates median and its confidence interval. Defaults to False.
33
- n_bootstraps (int, optional): Number of bootstrap samples for median CI. Only used if use_median is True.
33
+ use_median (bool, optional): If True, calculates median and confidence interval instead of mean. Defaults to False.
34
34
 
35
35
  Returns:
36
- tuple: A tuple containing the central value (mean or median), margin of error, lower bound, and upper bound.
36
+ tuple: A tuple containing the central value (mean or median), interval, lower bound, and upper bound.
37
37
  """
38
- data = to_series(data)
39
- if data is None or len(data) == 0:
40
- return np.nan, np.nan, np.nan, np.nan
41
- a = 1.0 * np.array(data)
38
+ df = to_series(df)
39
+ if df is None:
40
+ return None
41
+ a = 1.0 * np.array(df)
42
42
  n = len(a)
43
43
 
44
44
  if use_median:
45
- if n < 2: # Cannot bootstrap with n < 2
46
- return np.median(a), np.nan, np.nan, np.nan
47
-
48
- bootstrapped_medians = []
49
- for _ in range(n_bootstraps):
50
- sample = np.random.choice(a, size=n, replace=True)
51
- bootstrapped_medians.append(np.median(sample))
52
-
53
45
  median = np.median(a)
54
- alpha = (1 - confidence) / 2
55
- lower_bound = np.percentile(bootstrapped_medians, alpha * 100)
56
- upper_bound = np.percentile(bootstrapped_medians, (1 - alpha) * 100)
57
- margin = (upper_bound - lower_bound) / 2 # Simple approximation for margin based on interval width
58
- return median, margin, lower_bound, upper_bound
46
+ se = 1.253 * scipy.stats.sem(a) # Approximate standard error for median
47
+ margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
48
+ return median, margin, median - margin, median + margin
59
49
  else:
60
- mean = np.mean(a)
61
- if n <= 1:
62
- return mean, np.nan, np.nan, np.nan
63
- se = scipy.stats.sem(a)
50
+ mean, se = np.mean(a), scipy.stats.sem(a)
64
51
  margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
65
52
  return mean, margin, mean - margin, mean + margin
66
53
 
54
+ # # * Alternative
55
+ # # from statistics import NormalDist
56
+ # def confidence_interval(data, confidence=0.95):
57
+ # dist = NormalDist.from_samples(data)
58
+ # z = NormalDist().inv_cdf((1 + confidence) / 2.)
59
+ # h = dist.stdev * z / ((len(data) - 1) ** .5)
60
+ # return dist.mean - h, dist.mean + h
61
+
67
62
 
68
63
  def to_series(df) -> pd.Series | None:
69
64
  """
@@ -112,7 +112,7 @@ def describe_df(
112
112
  df=df,
113
113
  caption="dataframe",
114
114
  use_plot=True,
115
- renderer="png",
115
+ renderer=None,
116
116
  template="plotly",
117
117
  fig_cols=3,
118
118
  fig_offset=None,
@@ -264,7 +264,7 @@ def describe_df(
264
264
  )
265
265
 
266
266
  fig.show(
267
- renderer,
267
+ renderer=renderer or os.getenv("RENDERER"),
268
268
  width=fig_width * fig_cols, # <-- Set width here
269
269
  height=fig_rowheight * fig_rows, # <-- Set height here
270
270
  )
@@ -273,7 +273,6 @@ def describe_df(
273
273
  import missingno as msno
274
274
  msno.matrix(df_, figsize=missing_figsize)
275
275
 
276
-
277
276
  def pivot_df(
278
277
  df: pd.DataFrame,
279
278
  dropna: bool = False,
File without changes