rgwfuncs 0.0.59__py3-none-any.whl → 0.0.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/df_lib.py CHANGED
@@ -643,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
643
643
  top_n_values = frequency.nlargest(n)
644
644
  report[column] = {str(value): str(count)
645
645
  for value, count in top_n_values.items()}
646
- print(
647
- f"Top {n} unique values for column '{column}':\n{
648
- json.dumps(
649
- report[column],
650
- indent=2)}\n")
646
+ print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
651
647
  else:
652
648
  print(f"Column '{column}' does not exist in the DataFrame.")
653
649
  else:
@@ -690,11 +686,7 @@ def bottom_n_unique_values(
690
686
  report[column] = {
691
687
  str(value): str(count) for value,
692
688
  count in bottom_n_values.items()}
693
- print(
694
- f"Bottom {n} unique values for column '{column}':\n{
695
- json.dumps(
696
- report[column],
697
- indent=2)}\n")
689
+ print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
698
690
  else:
699
691
  print(f"Column '{column}' does not exist in the DataFrame.")
700
692
  else:
@@ -753,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
753
745
  - ValueError: If the DataFrame is `None`.
754
746
  """
755
747
  if df is not None:
756
- memory_usage = df.memory_usage(deep=True).sum(
757
- ) / (1024 * 1024) # Convert bytes to MB
748
+ memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
758
749
  print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
759
750
  else:
760
751
  raise ValueError("No DataFrame to print. Please provide a DataFrame.")
@@ -1234,9 +1225,7 @@ def append_ranged_classification_column(
1234
1225
  for r in range_list
1235
1226
  )
1236
1227
 
1237
- labels = [f"{pad_number(range_list[i],
1238
- max_integer_length)} to {pad_number(range_list[i + 1],
1239
- max_integer_length)}" for i in range(len(range_list) - 1)]
1228
+ labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
1240
1229
 
1241
1230
  # Ensure the target column is numeric
1242
1231
  df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
@@ -1379,8 +1368,7 @@ def rename_columns(df: pd.DataFrame,
1379
1368
  A new DataFrame with columns renamed.
1380
1369
  """
1381
1370
  if df is None:
1382
- raise ValueError(
1383
- "No DataFrame to rename columns. Please provide a valid DataFrame.")
1371
+ raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
1384
1372
 
1385
1373
  return df.rename(columns=rename_pairs)
1386
1374
 
@@ -1398,8 +1386,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
1398
1386
  A new DataFrame sorted by specified columns.
1399
1387
  """
1400
1388
  if df is None:
1401
- raise ValueError(
1402
- "No DataFrame to sort. Please provide a valid DataFrame.")
1389
+ raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
1403
1390
 
1404
1391
  col_names = []
1405
1392
  asc_order = []
@@ -1434,8 +1421,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1434
1421
  A new DataFrame with XGB_TYPE labels appended.
1435
1422
  """
1436
1423
  if df is None:
1437
- raise ValueError(
1438
- "No DataFrame to add labels. Please provide a valid DataFrame.")
1424
+ raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
1439
1425
 
1440
1426
  ratios = list(map(int, ratio_str.split(':')))
1441
1427
  total_ratio = sum(ratios)
@@ -1452,8 +1438,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1452
1438
  labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
1453
1439
  validate_rows + ['TEST'] * test_rows
1454
1440
  else:
1455
- raise ValueError(
1456
- "Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1441
+ raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1457
1442
 
1458
1443
  df_with_labels = df.copy()
1459
1444
  df_with_labels['XGB_TYPE'] = labels
@@ -1483,8 +1468,7 @@ def append_xgb_regression_predictions(
1483
1468
  DataFrame with predictions appended.
1484
1469
  """
1485
1470
  if df is None or 'XGB_TYPE' not in df.columns:
1486
- raise ValueError(
1487
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1471
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1488
1472
 
1489
1473
  features = feature_cols.replace(' ', '').split(',')
1490
1474
 
@@ -1558,8 +1542,7 @@ def append_xgb_logistic_regression_predictions(
1558
1542
  DataFrame with predictions appended.
1559
1543
  """
1560
1544
  if df is None or 'XGB_TYPE' not in df.columns:
1561
- raise ValueError(
1562
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1545
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1563
1546
 
1564
1547
  features = feature_cols.replace(' ', '').split(',')
1565
1548
 
@@ -1603,8 +1586,7 @@ def append_xgb_logistic_regression_predictions(
1603
1586
  if model_path:
1604
1587
  model.save_model(model_path)
1605
1588
 
1606
- columns_order = [col for col in df.columns if col not in [
1607
- 'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1589
+ columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1608
1590
  df = df[columns_order]
1609
1591
 
1610
1592
  return df
@@ -1852,8 +1834,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1852
1834
  ValueError: If the DataFrames do not have the same columns.
1853
1835
  """
1854
1836
  if set(df1.columns) != set(df2.columns):
1855
- raise ValueError(
1856
- "Both DataFrames must have the same columns for a union join")
1837
+ raise ValueError("Both DataFrames must have the same columns for a union join")
1857
1838
 
1858
1839
  result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1859
1840
  return result_df
@@ -1874,8 +1855,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1874
1855
  ValueError: If the DataFrames do not have the same columns.
1875
1856
  """
1876
1857
  if set(df1.columns) != set(df2.columns):
1877
- raise ValueError(
1878
- "Both DataFrames must have the same columns for a bag union join")
1858
+ raise ValueError("Both DataFrames must have the same columns for a bag union join")
1879
1859
 
1880
1860
  result_df = pd.concat([df1, df2], ignore_index=True)
1881
1861
  return result_df
@@ -2024,12 +2004,7 @@ def sync_dataframe_to_sqlite_database(
2024
2004
  cursor.execute(f"PRAGMA table_info({new_table_name})")
2025
2005
  if cursor.fetchall() == []: # Table does not exist
2026
2006
  # Create a table using the DataFrame's column names and types
2027
- columns_with_types = ', '.join(
2028
- f'"{col}" {
2029
- map_dtype(dtype)}' for col,
2030
- dtype in zip(
2031
- df.columns,
2032
- df.dtypes))
2007
+ columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
2033
2008
  create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
2034
2009
  conn.execute(create_table_query)
2035
2010
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.59
3
+ Version: 0.0.60
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1,12 +1,12 @@
1
1
  rgwfuncs/__init__.py,sha256=-rcdj4_9zq82h0Tl00S9GvEqDYh7yhPCNhnhBs3mZCg,1676
2
2
  rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
3
- rgwfuncs/df_lib.py,sha256=EJRGHlYIol5UshuGO6m8MoYNjZMmPSU7W-sh1SDclfw,68947
3
+ rgwfuncs/df_lib.py,sha256=XhqHYcrXGEOOqB4Z0Y-ASViy6_R_Df5f7ZGh66RIP6w,68420
4
4
  rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
5
5
  rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
6
6
  rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
7
- rgwfuncs-0.0.59.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
- rgwfuncs-0.0.59.dist-info/METADATA,sha256=h6wuBkH2hs3T4wdMCa8ACuhP3xUrUKU3iSZ7Hfg0RoU,58951
9
- rgwfuncs-0.0.59.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
- rgwfuncs-0.0.59.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
- rgwfuncs-0.0.59.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
- rgwfuncs-0.0.59.dist-info/RECORD,,
7
+ rgwfuncs-0.0.60.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
+ rgwfuncs-0.0.60.dist-info/METADATA,sha256=JareUKefKcxdm0rXKzudN9zjw9ljII5o3Llx7o1JrcA,58951
9
+ rgwfuncs-0.0.60.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
+ rgwfuncs-0.0.60.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
+ rgwfuncs-0.0.60.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
+ rgwfuncs-0.0.60.dist-info/RECORD,,