pandas-plots 0.15.2__tar.gz → 0.15.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/PKG-INFO +2 -2
  2. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/pyproject.toml +2 -2
  3. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/pandas_plots/pls.py +28 -11
  4. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/pandas_plots/tbl.py +3 -1
  5. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/test.ipynb +1548 -1205
  6. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/uv.lock +1888 -1698
  7. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/.gitignore +0 -0
  8. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/.python-version +0 -0
  9. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/LICENSE +0 -0
  10. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/README.md +0 -0
  11. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/img/2024-02-13-00-40-27.png +0 -0
  12. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/img/2024-02-14-20-49-00.png +0 -0
  13. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/img/2024-02-19-20-49-52.png +0 -0
  14. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/img/2024-03-02-17-33-43.png +0 -0
  15. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/img/2024-03-24-09-59-32.png +0 -0
  16. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/assets/Rplots.pdf +0 -0
  17. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/assets/dsich.csv +0 -0
  18. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/assets/facets.csv +0 -0
  19. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/pandas_plots/__init__.py +0 -0
  20. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/pandas_plots/hlp.py +0 -0
  21. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/pandas_plots/ven.py +0 -0
  22. {pandas_plots-0.15.2 → pandas_plots-0.15.4}/src/test.r +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pandas-plots
3
- Version: 0.15.2
3
+ Version: 0.15.4
4
4
  Summary: A collection of helper for table handling and visualization
5
5
  Project-URL: Homepage, https://github.com/smeisegeier/pandas-plots
6
6
  Project-URL: Repository, https://github.com/smeisegeier/pandas-plots
@@ -16,7 +16,7 @@ Classifier: Programming Language :: Python :: 3
16
16
  Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Topic :: Scientific/Engineering
18
18
  Requires-Python: >=3.10
19
- Requires-Dist: connection-helper>=0.11.2
19
+ Requires-Dist: connection-helper>=0.12
20
20
  Requires-Dist: dataframe-image>=0.2.6
21
21
  Requires-Dist: duckdb>=1.3.0
22
22
  Requires-Dist: jinja2>=3.1.4
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pandas-plots"
3
- version = "0.15.2"
3
+ version = "0.15.4"
4
4
  description = "A collection of helper for table handling and visualization"
5
5
  long_description = "file: README.md"
6
6
  long_description_content_type = "text/markdown"
@@ -35,7 +35,7 @@ dependencies = [
35
35
  "duckdb>=1.3.0",
36
36
  "nbformat>=4.2.0",
37
37
  "dataframe_image>=0.2.6",
38
- "connection-helper>=0.11.2",
38
+ "connection-helper>=0.12",
39
39
  ]
40
40
 
41
41
  [project.urls]
@@ -375,19 +375,24 @@ def plot_stacked_bars(
375
375
  caption = _set_caption(caption)
376
376
 
377
377
  # * after grouping add cols for pct and formatting
378
- df["cnt_pct_all_only"] = df["value"].apply(lambda x: f"{(x / n) * 100:.{precision}f}%")
379
- df["cnt_pct_bar_only"] = (df["value"] / bar_totals * 100).apply(lambda x: f"{x:.{precision}f}%")
378
+ df["cnt_pct_all_only"] = (df["value"] / n * 100).apply(lambda x: f"{(x):.{precision}f}%")
379
+ df["cnt_pct_bar_only"] = (df["value"] / bar_totals * 100).apply(lambda x: f"{(x):.{precision}f}%")
380
380
 
381
381
  # * format output
382
382
  df["cnt_str"] = df["value"].apply(lambda x: f"{x:_.{precision}f}")
383
383
 
384
384
  divider2 = "<br>" if orientation == "v" else " "
385
385
 
386
+ # Modify this section
386
387
  df["cnt_pct_all_str"] = df.apply(
387
- lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_all_only']})", axis=1
388
+ lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_all_only']})"
389
+ if (row["value"] / n * 100) >= 5 else row["cnt_str"],
390
+ axis=1
388
391
  )
389
392
  df["cnt_pct_bar_str"] = df.apply(
390
- lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_bar_only']})", axis=1
393
+ lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_bar_only']})"
394
+ if (row["value"] / bar_totals.loc[row.name] * 100) >= 5 else row["cnt_str"],
395
+ axis=1
391
396
  )
392
397
 
393
398
  text_to_show = "cnt_str"
@@ -564,13 +569,21 @@ def plot_bars(
564
569
 
565
570
  # * if df, check if valid
566
571
  if isinstance(df_in, pd.DataFrame):
567
- if len(df_in.columns) != 2:
568
- print("❌ df must have exactly 2 columns")
569
- return
570
- elif not (df_in.iloc[:, 0].dtype.kind in ["O", "b"]) or not (
571
- df_in.iloc[:, 1].dtype.kind in ["i", "f"]
572
- ):
573
- print("❌ df must have string and numeric columns (in that order).")
572
+ if len(df_in.columns) == 1:
573
+ if not (df_in.iloc[:, 0].dtype.kind in ["O", "b"]):
574
+ print("❌ df must have 1 column of object or bool type.")
575
+ return
576
+ else:
577
+ df_in = df_in.value_counts(dropna=dropna).to_frame().reset_index()
578
+ use_ci = False
579
+ elif len(df_in.columns) == 2:
580
+ if not (df_in.iloc[:, 0].dtype.kind in ["O", "b"]) or not (
581
+ df_in.iloc[:, 1].dtype.kind in ["i", "f"]
582
+ ):
583
+ print("❌ df must have string and numeric columns (in that order).")
584
+ return
585
+ else:
586
+ print("❌ df must have exactly 1 or 2 columns")
574
587
  return
575
588
  else:
576
589
  print("❌ input must be series or dataframe.")
@@ -1194,6 +1207,10 @@ def plot_boxes(
1194
1207
  xlvl1 = -50
1195
1208
  xlvl2 = 0
1196
1209
  xlvl3 = 50
1210
+
1211
+ # * type of col0 must be str, not object. otherwise px.box will fail since sorting will fail
1212
+ if pd.api.types.is_object_dtype(df.iloc[:, 0]):
1213
+ df.iloc[:, 0] = df.iloc[:, 0].astype(str)
1197
1214
 
1198
1215
  # * unique items
1199
1216
  # Sort the unique items alphabetically
@@ -75,6 +75,7 @@ def describe_df(
75
75
  top_n_uniques: int = 5,
76
76
  top_n_chars_in_index: int = 0,
77
77
  top_n_chars_in_columns: int = 0,
78
+ missing_figsize: tuple[int, int] = (26, 6),
78
79
  ):
79
80
  """
80
81
  This function takes a pandas DataFrame and a caption as input parameters and prints out the caption as a styled header, followed by the shape of the DataFrame and the list of column names. For each column, it prints out the column name, the number of unique values, and the column data type. If the column is a numeric column with more than 100 unique values, it also prints out the minimum, mean, maximum, and sum values. Otherwise, it prints out the first 100 unique values of the column.
@@ -94,6 +95,7 @@ def describe_df(
94
95
  top_n_uniques (int): number of uniques to display
95
96
  top_n_chars_in_index (int): number of characters to display on plot axis
96
97
  top_n_chars_in_columns (int): number of characters to display on plot axis. If set, minimum is 10.
98
+ missing_figsize (tuple[int, int]): figsize for missing plot (default (26, 6)
97
99
 
98
100
  usage:
99
101
  describe_df(
@@ -252,7 +254,7 @@ def describe_df(
252
254
 
253
255
  if use_missing:
254
256
  import missingno as msno
255
- msno.matrix(df_, figsize=(12, 5))
257
+ msno.matrix(df_, figsize=missing_figsize)
256
258
 
257
259
 
258
260
  def pivot_df(