rgwfuncs 0.0.58__tar.gz → 0.0.60__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.58
3
+ Version: 0.0.60
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rgwfuncs"
7
- version = "0.0.58"
7
+ version = "0.0.60"
8
8
  authors = [
9
9
  { name = "Ryan Gerard Wilson", email = "ryangerardwilson@gmail.com" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = rgwfuncs
3
- version = 0.0.58
3
+ version = 0.0.60
4
4
  author = Ryan Gerard Wilson
5
5
  author_email = ryangerardwilson@gmail.com
6
6
  description = A functional programming paradigm for mathematical modelling and data science
@@ -212,9 +212,7 @@ def update_rows(
212
212
 
213
213
  invalid_cols = [col for col in updates if col not in df.columns]
214
214
  if invalid_cols:
215
- raise ValueError(
216
- f"Columns {
217
- ', '.join(invalid_cols)} do not exist in the DataFrame.")
215
+ raise ValueError(f"Columns {', '.join(invalid_cols)} do not exist in the DataFrame.")
218
216
 
219
217
  new_df = df.copy()
220
218
  for col_name, new_value in updates.items():
@@ -645,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
645
643
  top_n_values = frequency.nlargest(n)
646
644
  report[column] = {str(value): str(count)
647
645
  for value, count in top_n_values.items()}
648
- print(
649
- f"Top {n} unique values for column '{column}':\n{
650
- json.dumps(
651
- report[column],
652
- indent=2)}\n")
646
+ print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
653
647
  else:
654
648
  print(f"Column '{column}' does not exist in the DataFrame.")
655
649
  else:
@@ -692,11 +686,7 @@ def bottom_n_unique_values(
692
686
  report[column] = {
693
687
  str(value): str(count) for value,
694
688
  count in bottom_n_values.items()}
695
- print(
696
- f"Bottom {n} unique values for column '{column}':\n{
697
- json.dumps(
698
- report[column],
699
- indent=2)}\n")
689
+ print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
700
690
  else:
701
691
  print(f"Column '{column}' does not exist in the DataFrame.")
702
692
  else:
@@ -755,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
755
745
  - ValueError: If the DataFrame is `None`.
756
746
  """
757
747
  if df is not None:
758
- memory_usage = df.memory_usage(deep=True).sum(
759
- ) / (1024 * 1024) # Convert bytes to MB
748
+ memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
760
749
  print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
761
750
  else:
762
751
  raise ValueError("No DataFrame to print. Please provide a DataFrame.")
@@ -1236,9 +1225,7 @@ def append_ranged_classification_column(
1236
1225
  for r in range_list
1237
1226
  )
1238
1227
 
1239
- labels = [f"{pad_number(range_list[i],
1240
- max_integer_length)} to {pad_number(range_list[i + 1],
1241
- max_integer_length)}" for i in range(len(range_list) - 1)]
1228
+ labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
1242
1229
 
1243
1230
  # Ensure the target column is numeric
1244
1231
  df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
@@ -1381,8 +1368,7 @@ def rename_columns(df: pd.DataFrame,
1381
1368
  A new DataFrame with columns renamed.
1382
1369
  """
1383
1370
  if df is None:
1384
- raise ValueError(
1385
- "No DataFrame to rename columns. Please provide a valid DataFrame.")
1371
+ raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
1386
1372
 
1387
1373
  return df.rename(columns=rename_pairs)
1388
1374
 
@@ -1400,8 +1386,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
1400
1386
  A new DataFrame sorted by specified columns.
1401
1387
  """
1402
1388
  if df is None:
1403
- raise ValueError(
1404
- "No DataFrame to sort. Please provide a valid DataFrame.")
1389
+ raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
1405
1390
 
1406
1391
  col_names = []
1407
1392
  asc_order = []
@@ -1436,8 +1421,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1436
1421
  A new DataFrame with XGB_TYPE labels appended.
1437
1422
  """
1438
1423
  if df is None:
1439
- raise ValueError(
1440
- "No DataFrame to add labels. Please provide a valid DataFrame.")
1424
+ raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
1441
1425
 
1442
1426
  ratios = list(map(int, ratio_str.split(':')))
1443
1427
  total_ratio = sum(ratios)
@@ -1454,8 +1438,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
1454
1438
  labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
1455
1439
  validate_rows + ['TEST'] * test_rows
1456
1440
  else:
1457
- raise ValueError(
1458
- "Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1441
+ raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
1459
1442
 
1460
1443
  df_with_labels = df.copy()
1461
1444
  df_with_labels['XGB_TYPE'] = labels
@@ -1485,8 +1468,7 @@ def append_xgb_regression_predictions(
1485
1468
  DataFrame with predictions appended.
1486
1469
  """
1487
1470
  if df is None or 'XGB_TYPE' not in df.columns:
1488
- raise ValueError(
1489
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1471
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1490
1472
 
1491
1473
  features = feature_cols.replace(' ', '').split(',')
1492
1474
 
@@ -1560,8 +1542,7 @@ def append_xgb_logistic_regression_predictions(
1560
1542
  DataFrame with predictions appended.
1561
1543
  """
1562
1544
  if df is None or 'XGB_TYPE' not in df.columns:
1563
- raise ValueError(
1564
- "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1545
+ raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
1565
1546
 
1566
1547
  features = feature_cols.replace(' ', '').split(',')
1567
1548
 
@@ -1605,8 +1586,7 @@ def append_xgb_logistic_regression_predictions(
1605
1586
  if model_path:
1606
1587
  model.save_model(model_path)
1607
1588
 
1608
- columns_order = [col for col in df.columns if col not in [
1609
- 'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1589
+ columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
1610
1590
  df = df[columns_order]
1611
1591
 
1612
1592
  return df
@@ -1854,8 +1834,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1854
1834
  ValueError: If the DataFrames do not have the same columns.
1855
1835
  """
1856
1836
  if set(df1.columns) != set(df2.columns):
1857
- raise ValueError(
1858
- "Both DataFrames must have the same columns for a union join")
1837
+ raise ValueError("Both DataFrames must have the same columns for a union join")
1859
1838
 
1860
1839
  result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1861
1840
  return result_df
@@ -1876,8 +1855,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1876
1855
  ValueError: If the DataFrames do not have the same columns.
1877
1856
  """
1878
1857
  if set(df1.columns) != set(df2.columns):
1879
- raise ValueError(
1880
- "Both DataFrames must have the same columns for a bag union join")
1858
+ raise ValueError("Both DataFrames must have the same columns for a bag union join")
1881
1859
 
1882
1860
  result_df = pd.concat([df1, df2], ignore_index=True)
1883
1861
  return result_df
@@ -2026,12 +2004,7 @@ def sync_dataframe_to_sqlite_database(
2026
2004
  cursor.execute(f"PRAGMA table_info({new_table_name})")
2027
2005
  if cursor.fetchall() == []: # Table does not exist
2028
2006
  # Create a table using the DataFrame's column names and types
2029
- columns_with_types = ', '.join(
2030
- f'"{col}" {
2031
- map_dtype(dtype)}' for col,
2032
- dtype in zip(
2033
- df.columns,
2034
- df.dtypes))
2007
+ columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
2035
2008
  create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
2036
2009
  conn.execute(create_table_query)
2037
2010
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.58
3
+ Version: 0.0.60
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
File without changes
File without changes