pandas-plots 0.12.19__py3-none-any.whl → 0.12.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pandas_plots/hlp.py CHANGED
@@ -22,29 +22,34 @@ from PIL import Image
22
22
  URL_REGEX = r"^(?:http|ftp)s?://" # https://stackoverflow.com/a/1617386
23
23
 
24
24
 
25
- def mean_confidence_interval(df, confidence=0.95):
25
+ def mean_confidence_interval(df, confidence=0.95, use_median=False):
26
26
  """
27
- Calculate the mean and confidence interval of the input dataframe.
28
- source: https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
27
+ Calculate the mean or median and confidence interval of the input dataframe.
28
+ Source: https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
29
29
 
30
30
  Parameters:
31
31
  df (array-like): The input dataframe.
32
32
  confidence (float, optional): The confidence level for the interval. Defaults to 0.95.
33
+ use_median (bool, optional): If True, calculates median and confidence interval instead of mean. Defaults to False.
33
34
 
34
35
  Returns:
35
- tuple: A tuple containing the mean, interval, lower bound, and upper bound.
36
+ tuple: A tuple containing the central value (mean or median), interval, lower bound, and upper bound.
36
37
  """
37
38
  df = to_series(df)
38
39
  if df is None:
39
40
  return None
40
41
  a = 1.0 * np.array(df)
41
42
  n = len(a)
42
- mean, se = np.mean(a), scipy.stats.sem(a)
43
- # * calculate the margin of error for the confidence interval using the t-distribution with the specified confidence level.
44
- margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
45
- lower = mean - margin
46
- upper = mean + margin
47
- return mean, margin, lower, upper
43
+
44
+ if use_median:
45
+ median = np.median(a)
46
+ se = 1.253 * scipy.stats.sem(a) # Approximate standard error for median
47
+ margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
48
+ return median, margin, median - margin, median + margin
49
+ else:
50
+ mean, se = np.mean(a), scipy.stats.sem(a)
51
+ margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
52
+ return mean, margin, mean - margin, mean + margin
48
53
 
49
54
  # # * Alternative
50
55
  # # from statistics import NormalDist
@@ -522,33 +527,47 @@ def find_cols(all_cols: list[str], stubs: list[str] = None) -> list[str]:
522
527
  # * extend objects to enable chaining
523
528
  pd.DataFrame.find_cols = find_cols
524
529
 
525
- def add_measures_to_pyg_config(json_path: str, nodes: list[tuple[str, str]]) -> None:
530
+
531
+ def add_measures_to_pyg_config(json_path: str, nodes: list[tuple[str, str]] = [("cnt_tum", "count(distinct z_tum_id)")], strict: bool = False) -> None:
526
532
  """
527
- Reads a pygwalker json config file, adds new measures from given nodes if not already present, and writes back to the file.
533
+ Reads a pygwalker JSON config file, adds new measures from given nodes if not already present, and writes back to the file.
528
534
 
529
535
  Parameters
530
536
  ----------
531
537
  json_path : `str`
532
- The path to the pyg_json config file.
533
- nodes : `list[tuple[str, str]]`
534
- A list of tuples, where the first element in the tuple is the name of the measure and the second element is the SQL expression that defines the measure.
538
+ The path to the pygwalker JSON config file.
539
+ nodes : `list[tuple[str, str]]`, optional
540
+ A list of tuples, where the first element in the tuple is the name of the measure and the second element is the SQL expression that defines the measure. Default is `[('cnt_tum', 'count(distinct z_tum_id)')]`.
541
+ strict : `bool`, optional
542
+ If True, raises an error if the file does not exist or if JSON parsing fails. If False, the function exits silently in such cases. Default is False.
535
543
 
536
544
  Returns
537
545
  -------
538
546
  None
539
-
547
+
540
548
  Example
541
549
  -------
542
- `node = [("cnt_tum", "count(distinct z_tum_id)")]`
543
- """
550
+ default: `add_measures_to_pyg_config('config.json', [('cnt_tum', 'count(distinct z_tum_id)')], strict=True)`
544
551
 
545
- with open(json_path, "r", encoding="utf-8") as file:
546
- config = json.load(file)
552
+ usage: start pygwalker with empty config file but defined config path. make changes on the chart, save the config file. then run this function again - measures will be added
553
+ """
554
+ if not os.path.exists(json_path):
555
+ if strict:
556
+ raise FileNotFoundError(f"File not found: {json_path}")
557
+ return
558
+
559
+ try:
560
+ with open(json_path, "r", encoding="utf-8") as file:
561
+ config = json.load(file)
562
+ except json.JSONDecodeError:
563
+ if strict:
564
+ raise
565
+ return
547
566
 
548
567
  for node in nodes:
549
568
  fid = uuid.uuid4().hex
550
569
 
551
- # Define the measure
570
+ # * Define the measure
552
571
  new_json_node = {
553
572
  "analyticType": "measure",
554
573
  "fid": f"{fid}",
@@ -563,13 +582,13 @@ def add_measures_to_pyg_config(json_path: str, nodes: list[tuple[str, str]]) ->
563
582
  }
564
583
  }
565
584
 
566
- # Get the measures list
567
- measures = config["config"][0]["encodings"]["measures"]
585
+ # * Get the measures list
586
+ measures = config.get("config", [{}])[0].get("encodings", {}).get("measures", [])
568
587
 
569
- # Ensure the measure is present
588
+ # * Ensure the measure is present
570
589
  if not any(measure.get("name") == node[0] for measure in measures):
571
590
  measures.append(new_json_node)
572
591
 
573
- # Write the updated JSON back to the file
592
+ # * Write the updated JSON back to the file
574
593
  with open(json_path, "w", encoding="utf-8") as file:
575
594
  json.dump(config, file, indent=2)
pandas_plots/pls.py CHANGED
@@ -500,6 +500,7 @@ def plot_bars(
500
500
  width: int = 1600,
501
501
  title: str = None,
502
502
  use_ci: bool = False,
503
+ ci_agg: Literal["mean", "median"] = "mean",
503
504
  precision: int = 0,
504
505
  renderer: Literal["png", "svg", None] = "png",
505
506
  png_path: Path | str = None,
@@ -569,9 +570,9 @@ def plot_bars(
569
570
  dropna=False,
570
571
  )
571
572
  .agg(
572
- mean=(col_name, "mean"),
573
+ mean=(col_name, ci_agg),
573
574
  # * retrieve margin from custom func
574
- margin=(col_name, lambda x: mean_confidence_interval(x)[1]),
575
+ margin=(col_name, lambda x: mean_confidence_interval(x, use_median = (ci_agg == "median"))[1]),
575
576
  )
576
577
  .reset_index()
577
578
  )
@@ -653,7 +654,7 @@ def plot_bars(
653
654
 
654
655
  # * title str n
655
656
  _title_str_n = (
656
- f", n={n_len:_} ({n:_})" if not use_ci else f", n={n_len:_})<br><sub>ci(95) on means<sub>"
657
+ f", n={n_len:_} ({n:_})" if not use_ci else f", n={n_len:_})<br><sub>ci(95) on {ci_agg}s<sub>"
657
658
  )
658
659
 
659
660
  # * title str na
@@ -965,6 +966,7 @@ def plot_box(
965
966
  violin: bool = False,
966
967
  x_min: float = None,
967
968
  x_max: float = None,
969
+ use_log: bool = False,
968
970
  png_path: Path | str = None,
969
971
  ) -> object:
970
972
  """
@@ -977,10 +979,13 @@ def plot_box(
977
979
  height: The height of the plot.
978
980
  width: The width of the plot.
979
981
  annotations: Whether to add annotations to the plot.
980
- violin: Use violin plot or not
981
- x_min: The minimum value for the x-axis scale (max and min must be set)
982
- x_max: The maximum value for the x-axis scale (max and min must be set)
983
- summary: Whether to add a summary table to the plot
982
+ summary: Whether to add a summary table to the plot.
983
+ caption: The caption for the plot.
984
+ title: The title of the plot.
985
+ violin: Use violin plot or not.
986
+ x_min: The minimum value for the x-axis scale (max and min must be set).
987
+ x_max: The maximum value for the x-axis scale (max and min must be set).
988
+ use_log: Use logarithmic scale for the axis.
984
989
  png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
985
990
 
986
991
  Returns:
@@ -993,7 +998,7 @@ def plot_box(
993
998
  # * drop na to keep scipy sane
994
999
  n_ = len(ser)
995
1000
  ser.dropna(inplace=True)
996
- n = len(ser)
1001
+ # n = len(ser)
997
1002
 
998
1003
  # hack
999
1004
  median = ser.median()
@@ -1011,7 +1016,6 @@ def plot_box(
1011
1016
  lvl3 = height * 0.25
1012
1017
 
1013
1018
  caption = _set_caption(caption)
1014
-
1015
1019
  dict = {
1016
1020
  "data_frame": ser,
1017
1021
  "orientation": "h",
@@ -1020,7 +1024,9 @@ def plot_box(
1020
1024
  "width": width,
1021
1025
  "points": points,
1022
1026
  # 'box':True,
1023
- "title": f"{caption}[{ser.name}], n = {n_:_}({n:_})" if not title else title,
1027
+ "log_x": use_log, # * logarithmic scale, axis is always x
1028
+ # "notched": True,
1029
+ "title": f"{caption}[{ser.name}], n = {n_:_}" if not title else title,
1024
1030
  }
1025
1031
 
1026
1032
  fig = px.violin(**{**dict, "box": True}) if violin else px.box(**dict)
@@ -1119,6 +1125,8 @@ def plot_boxes(
1119
1125
  annotations: bool = True,
1120
1126
  summary: bool = True,
1121
1127
  title: str = None,
1128
+ use_log: bool = False,
1129
+ box_width: float = 0.5,
1122
1130
  png_path: Path | str = None,
1123
1131
  ) -> object:
1124
1132
  """
@@ -1133,6 +1141,7 @@ def plot_boxes(
1133
1141
  width (int): The width of the plot.
1134
1142
  annotations (bool): Whether to add annotations to the plot.
1135
1143
  summary (bool): Whether to add a summary to the plot.
1144
+ use_log (bool): Whether to use logarithmic scale for the plot.
1136
1145
  png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
1137
1146
 
1138
1147
  Returns:
@@ -1170,11 +1179,14 @@ def plot_boxes(
1170
1179
  df,
1171
1180
  x=df.iloc[:, 0],
1172
1181
  y=df.iloc[:, 1],
1182
+ color=df.iloc[:, 0],
1173
1183
  template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
1174
1184
  orientation="v",
1175
1185
  height=height,
1176
1186
  width=width,
1177
1187
  points=points,
1188
+ log_y=use_log,
1189
+ # color_discrete_sequence=px.colors.qualitative.Plotly,
1178
1190
  title=(
1179
1191
  f"{caption}[{df.columns[0]}] on [{df.columns[1]}], n = {len(df):_.0f}"
1180
1192
  if not title
@@ -1245,6 +1257,9 @@ def plot_boxes(
1245
1257
 
1246
1258
  fig.update_xaxes(title_text=df.columns[0])
1247
1259
  fig.update_yaxes(title_text=df.columns[1])
1260
+ fig.update_layout(boxmode="group") # Ensures boxes are not too compressed
1261
+ fig.update_layout(showlegend=False)
1262
+ fig.update_traces(marker=dict(size=7), width=box_width) # Adjust width (default ~0.5)
1248
1263
 
1249
1264
  fig.show("png")
1250
1265
  if summary:
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pandas-plots
3
- Version: 0.12.19
3
+ Version: 0.12.21
4
4
  Summary: A collection of helper for table handling and visualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
7
7
  Author-email: dexterDSDo@googlemail.com
8
- License: "MIT"
8
+ License: MIT
9
9
  Project-URL: Documentation, https://github.com/smeisegeier/pandas-plots
10
10
  Project-URL: Source Code, https://github.com/smeisegeier/pandas-plots
11
11
  Project-URL: Bug Tracker, https://github.com/smeisegeier/pandas-plots/issues
@@ -0,0 +1,11 @@
1
+ pandas_plots/hlp.py,sha256=uq-uXKgb9DtsrW_2cBmU-tf_akfEAcvPW2ma6YmKx7Y,20789
2
+ pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
3
+ pandas_plots/pls.py,sha256=f890q1wYIGecRyzGxsMcmGPkE8v4JZmqXU56VWzC2ao,49029
4
+ pandas_plots/tbl.py,sha256=LxMKJh4qkGuQZ1DdCZIq1tMS26F6elsqbe_uabvQx4E,32535
5
+ pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
6
+ pandas_plots-0.12.21.dist-info/licenses/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
7
+ pandas_plots-0.12.21.dist-info/METADATA,sha256=UM2wZnlloV1PCL2wYPwIFUCIRE4zoVagON1AgqrSsxU,7564
8
+ pandas_plots-0.12.21.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
9
+ pandas_plots-0.12.21.dist-info/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
10
+ pandas_plots-0.12.21.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
11
+ pandas_plots-0.12.21.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (77.0.1)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,11 +0,0 @@
1
- pandas_plots/hlp.py,sha256=hlz7kKe6iDsz6Ov5YadX9zT5E01DHy7gHdVPgG8P7nQ,19656
2
- pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
3
- pandas_plots/pls.py,sha256=S9hKQTPp8XIL9RmzvfeSQE4t2jm18bG4bejwcd-dJF4,48236
4
- pandas_plots/tbl.py,sha256=LxMKJh4qkGuQZ1DdCZIq1tMS26F6elsqbe_uabvQx4E,32535
5
- pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
6
- pandas_plots-0.12.19.dist-info/licenses/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
7
- pandas_plots-0.12.19.dist-info/METADATA,sha256=qvKq6iuJK8dg9hXtwz8f0djqvtjdXmCQBbly_opRf40,7566
8
- pandas_plots-0.12.19.dist-info/WHEEL,sha256=tTnHoFhvKQHCh4jz3yCn0WPTYIy7wXx3CJtJ7SJGV7c,91
9
- pandas_plots-0.12.19.dist-info/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
10
- pandas_plots-0.12.19.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
11
- pandas_plots-0.12.19.dist-info/RECORD,,