rgwfuncs 0.0.59__py3-none-any.whl → 0.0.60__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
rgwfuncs/df_lib.py CHANGED
@@ -643,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
643
643
  top_n_values = frequency.nlargest(n)
644
644
  report[column] = {str(value): str(count)
645
645
  for value, count in top_n_values.items()}
646
- print(
647
- f"Top {n} unique values for column '{column}':\n{
648
- json.dumps(
649
- report[column],
650
- indent=2)}\n")
646
+ print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
651
647
  else:
652
648
  print(f"Column '{column}' does not exist in the DataFrame.")
653
649
  else:
@@ -690,11 +686,7 @@ def bottom_n_unique_values(
690
686
  report[column] = {
691
687
  str(value): str(count) for value,
692
688
  count in bottom_n_values.items()}
693
- print(
694
- f"Bottom {n} unique values for column '{column}':\n{
695
- json.dumps(
696
- report[column],
697
- indent=2)}\n")
689
+ print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
698
690
  else:
699
691
  print(f"Column '{column}' does not exist in the DataFrame.")
700
692
  else:
@@ -753,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
753
745
  - ValueError: If the DataFrame is `None`.
754
746
  """
755
747
  if df is not None:
756
- memory_usage = df.memory_usage(deep=True).sum(
757
- ) / (1024 * 1024) # Convert bytes to MB
748
+ memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
758
749
  print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
759
750
  else:
760
751
  raise ValueError("No DataFrame to print. Please provide a DataFrame.")
@@ -1234,9 +1225,7 @@ def append_ranged_classification_column(
1234
1225
  for r in range_list
1235
1226
  )
1236
1227
 
1237
- labels = [f"{pad_number(range_list[i],
1238
- max_integer_length)} to {pad_number(range_list[i + 1],
1239
- max_integer_length)}" for i in range(len(range_list) - 1)]
1228
+ labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
1240
1229
 
1241
1230
  # Ensure the target column is numeric
1242
1231
  df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
@@ -1379,8 +1368,7 @@ def rename_columns(df: pd.DataFrame,
1379
1368
  A new DataFrame with columns renamed.
1380
1369
  """
1381
1370
  if df is None:
1382
- raise ValueError(
1383
- "No DataFrame to rename columns. Please provide a valid DataFrame.")
1371
+ raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
1384
1372
 
1385
1373
  return df.rename(columns=rename_pairs)
1386
1374
 
@@ -1398,8 +1386,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
1398
1386
  A new DataFrame sorted by specified columns.
1399
1387
  """
1400
1388
  if df is None:
1401
- raise ValueError(
1402
- "No DataFrame to sort. Please provide a valid DataFrame.")
1389
+ raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
1403
1390
 
1404
1391
  col_names = []
1405
1392
  asc_order = []
@@ -1434,8 +1421,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1434
1421
  A new DataFrame with XGB_TYPE labels appended.
1435
1422
  """
1436
1423
  if df is None:
1437
- raise ValueError(
1438
- "No DataFrame to add labels. Please provide a valid DataFrame.")
1424
+ raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
1439
1425
 
1440
1426
  ratios = list(map(int, ratio_str.split(':')))
1441
1427
  total_ratio = sum(ratios)
@@ -1452,8 +1438,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1452
1438
  labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
1453
1439
  validate_rows + ['TEST'] * test_rows
1454
1440
  else:
1455
- raise ValueError(
1456
- "Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1441
+ raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1457
1442
 
1458
1443
  df_with_labels = df.copy()
1459
1444
  df_with_labels['XGB_TYPE'] = labels
@@ -1483,8 +1468,7 @@ def append_xgb_regression_predictions(
1483
1468
  DataFrame with predictions appended.
1484
1469
  """
1485
1470
  if df is None or 'XGB_TYPE' not in df.columns:
1486
- raise ValueError(
1487
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1471
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1488
1472
 
1489
1473
  features = feature_cols.replace(' ', '').split(',')
1490
1474
 
@@ -1558,8 +1542,7 @@ def append_xgb_logistic_regression_predictions(
1558
1542
  DataFrame with predictions appended.
1559
1543
  """
1560
1544
  if df is None or 'XGB_TYPE' not in df.columns:
1561
- raise ValueError(
1562
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1545
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1563
1546
 
1564
1547
  features = feature_cols.replace(' ', '').split(',')
1565
1548
 
@@ -1603,8 +1586,7 @@ def append_xgb_logistic_regression_predictions(
1603
1586
  if model_path:
1604
1587
  model.save_model(model_path)
1605
1588
 
1606
- columns_order = [col for col in df.columns if col not in [
1607
- 'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1589
+ columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1608
1590
  df = df[columns_order]
1609
1591
 
1610
1592
  return df
@@ -1852,8 +1834,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1852
1834
  ValueError: If the DataFrames do not have the same columns.
1853
1835
  """
1854
1836
  if set(df1.columns) != set(df2.columns):
1855
- raise ValueError(
1856
- "Both DataFrames must have the same columns for a union join")
1837
+ raise ValueError("Both DataFrames must have the same columns for a union join")
1857
1838
 
1858
1839
  result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1859
1840
  return result_df
@@ -1874,8 +1855,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1874
1855
  ValueError: If the DataFrames do not have the same columns.
1875
1856
  """
1876
1857
  if set(df1.columns) != set(df2.columns):
1877
- raise ValueError(
1878
- "Both DataFrames must have the same columns for a bag union join")
1858
+ raise ValueError("Both DataFrames must have the same columns for a bag union join")
1879
1859
 
1880
1860
  result_df = pd.concat([df1, df2], ignore_index=True)
1881
1861
  return result_df
@@ -2024,12 +2004,7 @@ def sync_dataframe_to_sqlite_database(
2024
2004
  cursor.execute(f"PRAGMA table_info({new_table_name})")
2025
2005
  if cursor.fetchall() == []: # Table does not exist
2026
2006
  # Create a table using the DataFrame's column names and types
2027
- columns_with_types = ', '.join(
2028
- f'"{col}" {
2029
- map_dtype(dtype)}' for col,
2030
- dtype in zip(
2031
- df.columns,
2032
- df.dtypes))
2007
+ columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
2033
2008
  create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
2034
2009
  conn.execute(create_table_query)
2035
2010
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.59
3
+ Version: 0.0.60
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1,12 +1,12 @@
1
1
  rgwfuncs/__init__.py,sha256=-rcdj4_9zq82h0Tl00S9GvEqDYh7yhPCNhnhBs3mZCg,1676
2
2
  rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
3
- rgwfuncs/df_lib.py,sha256=EJRGHlYIol5UshuGO6m8MoYNjZMmPSU7W-sh1SDclfw,68947
3
+ rgwfuncs/df_lib.py,sha256=XhqHYcrXGEOOqB4Z0Y-ASViy6_R_Df5f7ZGh66RIP6w,68420
4
4
  rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
5
5
  rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
6
6
  rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
7
- rgwfuncs-0.0.59.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
- rgwfuncs-0.0.59.dist-info/METADATA,sha256=h6wuBkH2hs3T4wdMCa8ACuhP3xUrUKU3iSZ7Hfg0RoU,58951
9
- rgwfuncs-0.0.59.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
- rgwfuncs-0.0.59.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
- rgwfuncs-0.0.59.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
- rgwfuncs-0.0.59.dist-info/RECORD,,
7
+ rgwfuncs-0.0.60.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
+ rgwfuncs-0.0.60.dist-info/METADATA,sha256=JareUKefKcxdm0rXKzudN9zjw9ljII5o3Llx7o1JrcA,58951
9
+ rgwfuncs-0.0.60.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
+ rgwfuncs-0.0.60.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
+ rgwfuncs-0.0.60.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
+ rgwfuncs-0.0.60.dist-info/RECORD,,