rgwfuncs 0.0.59__py3-none-any.whl → 0.0.61__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
rgwfuncs/df_lib.py CHANGED
@@ -643,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
643
643
  top_n_values = frequency.nlargest(n)
644
644
  report[column] = {str(value): str(count)
645
645
  for value, count in top_n_values.items()}
646
- print(
647
- f"Top {n} unique values for column '{column}':\n{
648
- json.dumps(
649
- report[column],
650
- indent=2)}\n")
646
+ print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
651
647
  else:
652
648
  print(f"Column '{column}' does not exist in the DataFrame.")
653
649
  else:
@@ -690,11 +686,7 @@ def bottom_n_unique_values(
690
686
  report[column] = {
691
687
  str(value): str(count) for value,
692
688
  count in bottom_n_values.items()}
693
- print(
694
- f"Bottom {n} unique values for column '{column}':\n{
695
- json.dumps(
696
- report[column],
697
- indent=2)}\n")
689
+ print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
698
690
  else:
699
691
  print(f"Column '{column}' does not exist in the DataFrame.")
700
692
  else:
@@ -753,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
753
745
  - ValueError: If the DataFrame is `None`.
754
746
  """
755
747
  if df is not None:
756
- memory_usage = df.memory_usage(deep=True).sum(
757
- ) / (1024 * 1024) # Convert bytes to MB
748
+ memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
758
749
  print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
759
750
  else:
760
751
  raise ValueError("No DataFrame to print. Please provide a DataFrame.")
@@ -900,8 +891,7 @@ def send_dataframe_via_telegram(
900
891
  'caption': message or ''}
901
892
  files = {'document': file}
902
893
  response = requests.post(
903
- f"https://api.telegram.org/bot{
904
- bot_config['bot_token']}/sendDocument",
894
+ f"https://api.telegram.org/bot{bot_config['bot_token']}/sendDocument",
905
895
  data=payload,
906
896
  files=files)
907
897
  if remove_after_send and os.path.exists(file_name):
@@ -1010,8 +1000,7 @@ def send_data_to_email(
1010
1000
  encoders.encode_base64(part)
1011
1001
  part.add_header(
1012
1002
  'Content-Disposition',
1013
- f'attachment; filename={
1014
- os.path.basename(tmp_file_name)}')
1003
+ f'attachment; filename={os.path.basename(tmp_file_name)}')
1015
1004
  message.attach(part)
1016
1005
 
1017
1006
  if remove_after_send and os.path.exists(tmp_file_name):
@@ -1234,9 +1223,7 @@ def append_ranged_classification_column(
1234
1223
  for r in range_list
1235
1224
  )
1236
1225
 
1237
- labels = [f"{pad_number(range_list[i],
1238
- max_integer_length)} to {pad_number(range_list[i + 1],
1239
- max_integer_length)}" for i in range(len(range_list) - 1)]
1226
+ labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
1240
1227
 
1241
1228
  # Ensure the target column is numeric
1242
1229
  df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
@@ -1379,8 +1366,7 @@ def rename_columns(df: pd.DataFrame,
1379
1366
  A new DataFrame with columns renamed.
1380
1367
  """
1381
1368
  if df is None:
1382
- raise ValueError(
1383
- "No DataFrame to rename columns. Please provide a valid DataFrame.")
1369
+ raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
1384
1370
 
1385
1371
  return df.rename(columns=rename_pairs)
1386
1372
 
@@ -1398,8 +1384,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
1398
1384
  A new DataFrame sorted by specified columns.
1399
1385
  """
1400
1386
  if df is None:
1401
- raise ValueError(
1402
- "No DataFrame to sort. Please provide a valid DataFrame.")
1387
+ raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
1403
1388
 
1404
1389
  col_names = []
1405
1390
  asc_order = []
@@ -1434,8 +1419,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1434
1419
  A new DataFrame with XGB_TYPE labels appended.
1435
1420
  """
1436
1421
  if df is None:
1437
- raise ValueError(
1438
- "No DataFrame to add labels. Please provide a valid DataFrame.")
1422
+ raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
1439
1423
 
1440
1424
  ratios = list(map(int, ratio_str.split(':')))
1441
1425
  total_ratio = sum(ratios)
@@ -1452,8 +1436,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1452
1436
  labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
1453
1437
  validate_rows + ['TEST'] * test_rows
1454
1438
  else:
1455
- raise ValueError(
1456
- "Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1439
+ raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1457
1440
 
1458
1441
  df_with_labels = df.copy()
1459
1442
  df_with_labels['XGB_TYPE'] = labels
@@ -1483,8 +1466,7 @@ def append_xgb_regression_predictions(
1483
1466
  DataFrame with predictions appended.
1484
1467
  """
1485
1468
  if df is None or 'XGB_TYPE' not in df.columns:
1486
- raise ValueError(
1487
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1469
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1488
1470
 
1489
1471
  features = feature_cols.replace(' ', '').split(',')
1490
1472
 
@@ -1558,8 +1540,7 @@ def append_xgb_logistic_regression_predictions(
1558
1540
  DataFrame with predictions appended.
1559
1541
  """
1560
1542
  if df is None or 'XGB_TYPE' not in df.columns:
1561
- raise ValueError(
1562
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1543
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1563
1544
 
1564
1545
  features = feature_cols.replace(' ', '').split(',')
1565
1546
 
@@ -1603,8 +1584,7 @@ def append_xgb_logistic_regression_predictions(
1603
1584
  if model_path:
1604
1585
  model.save_model(model_path)
1605
1586
 
1606
- columns_order = [col for col in df.columns if col not in [
1607
- 'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1587
+ columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1608
1588
  df = df[columns_order]
1609
1589
 
1610
1590
  return df
@@ -1852,8 +1832,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1852
1832
  ValueError: If the DataFrames do not have the same columns.
1853
1833
  """
1854
1834
  if set(df1.columns) != set(df2.columns):
1855
- raise ValueError(
1856
- "Both DataFrames must have the same columns for a union join")
1835
+ raise ValueError("Both DataFrames must have the same columns for a union join")
1857
1836
 
1858
1837
  result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1859
1838
  return result_df
@@ -1874,8 +1853,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1874
1853
  ValueError: If the DataFrames do not have the same columns.
1875
1854
  """
1876
1855
  if set(df1.columns) != set(df2.columns):
1877
- raise ValueError(
1878
- "Both DataFrames must have the same columns for a bag union join")
1856
+ raise ValueError("Both DataFrames must have the same columns for a bag union join")
1879
1857
 
1880
1858
  result_df = pd.concat([df1, df2], ignore_index=True)
1881
1859
  return result_df
@@ -2024,12 +2002,7 @@ def sync_dataframe_to_sqlite_database(
2024
2002
  cursor.execute(f"PRAGMA table_info({new_table_name})")
2025
2003
  if cursor.fetchall() == []: # Table does not exist
2026
2004
  # Create a table using the DataFrame's column names and types
2027
- columns_with_types = ', '.join(
2028
- f'"{col}" {
2029
- map_dtype(dtype)}' for col,
2030
- dtype in zip(
2031
- df.columns,
2032
- df.dtypes))
2005
+ columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
2033
2006
  create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
2034
2007
  conn.execute(create_table_query)
2035
2008
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.59
3
+ Version: 0.0.61
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1,12 +1,12 @@
1
1
  rgwfuncs/__init__.py,sha256=-rcdj4_9zq82h0Tl00S9GvEqDYh7yhPCNhnhBs3mZCg,1676
2
2
  rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
3
- rgwfuncs/df_lib.py,sha256=EJRGHlYIol5UshuGO6m8MoYNjZMmPSU7W-sh1SDclfw,68947
3
+ rgwfuncs/df_lib.py,sha256=xjacr7JrjR8gZ1xqqOVvevFzK9N646wm5w1YEIxbGsE,68370
4
4
  rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
5
5
  rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
6
6
  rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
7
- rgwfuncs-0.0.59.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
- rgwfuncs-0.0.59.dist-info/METADATA,sha256=h6wuBkH2hs3T4wdMCa8ACuhP3xUrUKU3iSZ7Hfg0RoU,58951
9
- rgwfuncs-0.0.59.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
- rgwfuncs-0.0.59.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
- rgwfuncs-0.0.59.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
- rgwfuncs-0.0.59.dist-info/RECORD,,
7
+ rgwfuncs-0.0.61.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
+ rgwfuncs-0.0.61.dist-info/METADATA,sha256=UcHNBUomOeWcpes0FL2qZysH37TtOPPR58DofFphl4o,58951
9
+ rgwfuncs-0.0.61.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
+ rgwfuncs-0.0.61.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
+ rgwfuncs-0.0.61.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
+ rgwfuncs-0.0.61.dist-info/RECORD,,