rgwfuncs 0.0.59__py3-none-any.whl → 0.0.61__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/df_lib.py CHANGED
@@ -643,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
643
643
  top_n_values = frequency.nlargest(n)
644
644
  report[column] = {str(value): str(count)
645
645
  for value, count in top_n_values.items()}
646
- print(
647
- f"Top {n} unique values for column '{column}':\n{
648
- json.dumps(
649
- report[column],
650
- indent=2)}\n")
646
+ print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
651
647
  else:
652
648
  print(f"Column '{column}' does not exist in the DataFrame.")
653
649
  else:
@@ -690,11 +686,7 @@ def bottom_n_unique_values(
690
686
  report[column] = {
691
687
  str(value): str(count) for value,
692
688
  count in bottom_n_values.items()}
693
- print(
694
- f"Bottom {n} unique values for column '{column}':\n{
695
- json.dumps(
696
- report[column],
697
- indent=2)}\n")
689
+ print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
698
690
  else:
699
691
  print(f"Column '{column}' does not exist in the DataFrame.")
700
692
  else:
@@ -753,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
753
745
  - ValueError: If the DataFrame is `None`.
754
746
  """
755
747
  if df is not None:
756
- memory_usage = df.memory_usage(deep=True).sum(
757
- ) / (1024 * 1024) # Convert bytes to MB
748
+ memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
758
749
  print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
759
750
  else:
760
751
  raise ValueError("No DataFrame to print. Please provide a DataFrame.")
@@ -900,8 +891,7 @@ def send_dataframe_via_telegram(
900
891
  'caption': message or ''}
901
892
  files = {'document': file}
902
893
  response = requests.post(
903
- f"https://api.telegram.org/bot{
904
- bot_config['bot_token']}/sendDocument",
894
+ f"https://api.telegram.org/bot{bot_config['bot_token']}/sendDocument",
905
895
  data=payload,
906
896
  files=files)
907
897
  if remove_after_send and os.path.exists(file_name):
@@ -1010,8 +1000,7 @@ def send_data_to_email(
1010
1000
  encoders.encode_base64(part)
1011
1001
  part.add_header(
1012
1002
  'Content-Disposition',
1013
- f'attachment; filename={
1014
- os.path.basename(tmp_file_name)}')
1003
+ f'attachment; filename={os.path.basename(tmp_file_name)}')
1015
1004
  message.attach(part)
1016
1005
 
1017
1006
  if remove_after_send and os.path.exists(tmp_file_name):
@@ -1234,9 +1223,7 @@ def append_ranged_classification_column(
1234
1223
  for r in range_list
1235
1224
  )
1236
1225
 
1237
- labels = [f"{pad_number(range_list[i],
1238
- max_integer_length)} to {pad_number(range_list[i + 1],
1239
- max_integer_length)}" for i in range(len(range_list) - 1)]
1226
+ labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
1240
1227
 
1241
1228
  # Ensure the target column is numeric
1242
1229
  df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
@@ -1379,8 +1366,7 @@ def rename_columns(df: pd.DataFrame,
1379
1366
  A new DataFrame with columns renamed.
1380
1367
  """
1381
1368
  if df is None:
1382
- raise ValueError(
1383
- "No DataFrame to rename columns. Please provide a valid DataFrame.")
1369
+ raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
1384
1370
 
1385
1371
  return df.rename(columns=rename_pairs)
1386
1372
 
@@ -1398,8 +1384,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
1398
1384
  A new DataFrame sorted by specified columns.
1399
1385
  """
1400
1386
  if df is None:
1401
- raise ValueError(
1402
- "No DataFrame to sort. Please provide a valid DataFrame.")
1387
+ raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
1403
1388
 
1404
1389
  col_names = []
1405
1390
  asc_order = []
@@ -1434,8 +1419,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1434
1419
  A new DataFrame with XGB_TYPE labels appended.
1435
1420
  """
1436
1421
  if df is None:
1437
- raise ValueError(
1438
- "No DataFrame to add labels. Please provide a valid DataFrame.")
1422
+ raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
1439
1423
 
1440
1424
  ratios = list(map(int, ratio_str.split(':')))
1441
1425
  total_ratio = sum(ratios)
@@ -1452,8 +1436,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1452
1436
  labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
1453
1437
  validate_rows + ['TEST'] * test_rows
1454
1438
  else:
1455
- raise ValueError(
1456
- "Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1439
+ raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1457
1440
 
1458
1441
  df_with_labels = df.copy()
1459
1442
  df_with_labels['XGB_TYPE'] = labels
@@ -1483,8 +1466,7 @@ def append_xgb_regression_predictions(
1483
1466
  DataFrame with predictions appended.
1484
1467
  """
1485
1468
  if df is None or 'XGB_TYPE' not in df.columns:
1486
- raise ValueError(
1487
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1469
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1488
1470
 
1489
1471
  features = feature_cols.replace(' ', '').split(',')
1490
1472
 
@@ -1558,8 +1540,7 @@ def append_xgb_logistic_regression_predictions(
1558
1540
  DataFrame with predictions appended.
1559
1541
  """
1560
1542
  if df is None or 'XGB_TYPE' not in df.columns:
1561
- raise ValueError(
1562
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1543
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1563
1544
 
1564
1545
  features = feature_cols.replace(' ', '').split(',')
1565
1546
 
@@ -1603,8 +1584,7 @@ def append_xgb_logistic_regression_predictions(
1603
1584
  if model_path:
1604
1585
  model.save_model(model_path)
1605
1586
 
1606
- columns_order = [col for col in df.columns if col not in [
1607
- 'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1587
+ columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1608
1588
  df = df[columns_order]
1609
1589
 
1610
1590
  return df
@@ -1852,8 +1832,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1852
1832
  ValueError: If the DataFrames do not have the same columns.
1853
1833
  """
1854
1834
  if set(df1.columns) != set(df2.columns):
1855
- raise ValueError(
1856
- "Both DataFrames must have the same columns for a union join")
1835
+ raise ValueError("Both DataFrames must have the same columns for a union join")
1857
1836
 
1858
1837
  result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1859
1838
  return result_df
@@ -1874,8 +1853,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1874
1853
  ValueError: If the DataFrames do not have the same columns.
1875
1854
  """
1876
1855
  if set(df1.columns) != set(df2.columns):
1877
- raise ValueError(
1878
- "Both DataFrames must have the same columns for a bag union join")
1856
+ raise ValueError("Both DataFrames must have the same columns for a bag union join")
1879
1857
 
1880
1858
  result_df = pd.concat([df1, df2], ignore_index=True)
1881
1859
  return result_df
@@ -2024,12 +2002,7 @@ def sync_dataframe_to_sqlite_database(
2024
2002
  cursor.execute(f"PRAGMA table_info({new_table_name})")
2025
2003
  if cursor.fetchall() == []: # Table does not exist
2026
2004
  # Create a table using the DataFrame's column names and types
2027
- columns_with_types = ', '.join(
2028
- f'"{col}" {
2029
- map_dtype(dtype)}' for col,
2030
- dtype in zip(
2031
- df.columns,
2032
- df.dtypes))
2005
+ columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
2033
2006
  create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
2034
2007
  conn.execute(create_table_query)
2035
2008
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.59
3
+ Version: 0.0.61
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1,12 +1,12 @@
1
1
  rgwfuncs/__init__.py,sha256=-rcdj4_9zq82h0Tl00S9GvEqDYh7yhPCNhnhBs3mZCg,1676
2
2
  rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
3
- rgwfuncs/df_lib.py,sha256=EJRGHlYIol5UshuGO6m8MoYNjZMmPSU7W-sh1SDclfw,68947
3
+ rgwfuncs/df_lib.py,sha256=xjacr7JrjR8gZ1xqqOVvevFzK9N646wm5w1YEIxbGsE,68370
4
4
  rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
5
5
  rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
6
6
  rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
7
- rgwfuncs-0.0.59.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
- rgwfuncs-0.0.59.dist-info/METADATA,sha256=h6wuBkH2hs3T4wdMCa8ACuhP3xUrUKU3iSZ7Hfg0RoU,58951
9
- rgwfuncs-0.0.59.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
- rgwfuncs-0.0.59.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
- rgwfuncs-0.0.59.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
- rgwfuncs-0.0.59.dist-info/RECORD,,
7
+ rgwfuncs-0.0.61.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
+ rgwfuncs-0.0.61.dist-info/METADATA,sha256=UcHNBUomOeWcpes0FL2qZysH37TtOPPR58DofFphl4o,58951
9
+ rgwfuncs-0.0.61.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
+ rgwfuncs-0.0.61.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
+ rgwfuncs-0.0.61.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
+ rgwfuncs-0.0.61.dist-info/RECORD,,