rgwfuncs 0.0.59__py3-none-any.whl → 0.0.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +14 -39
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/METADATA +1 -1
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/RECORD +7 -7
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -643,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
|
|
643
643
|
top_n_values = frequency.nlargest(n)
|
644
644
|
report[column] = {str(value): str(count)
|
645
645
|
for value, count in top_n_values.items()}
|
646
|
-
print(
|
647
|
-
f"Top {n} unique values for column '{column}':\n{
|
648
|
-
json.dumps(
|
649
|
-
report[column],
|
650
|
-
indent=2)}\n")
|
646
|
+
print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
|
651
647
|
else:
|
652
648
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
653
649
|
else:
|
@@ -690,11 +686,7 @@ def bottom_n_unique_values(
|
|
690
686
|
report[column] = {
|
691
687
|
str(value): str(count) for value,
|
692
688
|
count in bottom_n_values.items()}
|
693
|
-
print(
|
694
|
-
f"Bottom {n} unique values for column '{column}':\n{
|
695
|
-
json.dumps(
|
696
|
-
report[column],
|
697
|
-
indent=2)}\n")
|
689
|
+
print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
|
698
690
|
else:
|
699
691
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
700
692
|
else:
|
@@ -753,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
|
|
753
745
|
- ValueError: If the DataFrame is `None`.
|
754
746
|
"""
|
755
747
|
if df is not None:
|
756
|
-
memory_usage = df.memory_usage(deep=True).sum(
|
757
|
-
) / (1024 * 1024) # Convert bytes to MB
|
748
|
+
memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
|
758
749
|
print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
|
759
750
|
else:
|
760
751
|
raise ValueError("No DataFrame to print. Please provide a DataFrame.")
|
@@ -1234,9 +1225,7 @@ def append_ranged_classification_column(
|
|
1234
1225
|
for r in range_list
|
1235
1226
|
)
|
1236
1227
|
|
1237
|
-
labels = [f"{pad_number(range_list[i],
|
1238
|
-
max_integer_length)} to {pad_number(range_list[i + 1],
|
1239
|
-
max_integer_length)}" for i in range(len(range_list) - 1)]
|
1228
|
+
labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
|
1240
1229
|
|
1241
1230
|
# Ensure the target column is numeric
|
1242
1231
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
@@ -1379,8 +1368,7 @@ def rename_columns(df: pd.DataFrame,
|
|
1379
1368
|
A new DataFrame with columns renamed.
|
1380
1369
|
"""
|
1381
1370
|
if df is None:
|
1382
|
-
raise ValueError(
|
1383
|
-
"No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1371
|
+
raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1384
1372
|
|
1385
1373
|
return df.rename(columns=rename_pairs)
|
1386
1374
|
|
@@ -1398,8 +1386,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
|
|
1398
1386
|
A new DataFrame sorted by specified columns.
|
1399
1387
|
"""
|
1400
1388
|
if df is None:
|
1401
|
-
raise ValueError(
|
1402
|
-
"No DataFrame to sort. Please provide a valid DataFrame.")
|
1389
|
+
raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
|
1403
1390
|
|
1404
1391
|
col_names = []
|
1405
1392
|
asc_order = []
|
@@ -1434,8 +1421,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1434
1421
|
A new DataFrame with XGB_TYPE labels appended.
|
1435
1422
|
"""
|
1436
1423
|
if df is None:
|
1437
|
-
raise ValueError(
|
1438
|
-
"No DataFrame to add labels. Please provide a valid DataFrame.")
|
1424
|
+
raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
|
1439
1425
|
|
1440
1426
|
ratios = list(map(int, ratio_str.split(':')))
|
1441
1427
|
total_ratio = sum(ratios)
|
@@ -1452,8 +1438,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1452
1438
|
labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
|
1453
1439
|
validate_rows + ['TEST'] * test_rows
|
1454
1440
|
else:
|
1455
|
-
raise ValueError(
|
1456
|
-
"Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1441
|
+
raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1457
1442
|
|
1458
1443
|
df_with_labels = df.copy()
|
1459
1444
|
df_with_labels['XGB_TYPE'] = labels
|
@@ -1483,8 +1468,7 @@ def append_xgb_regression_predictions(
|
|
1483
1468
|
DataFrame with predictions appended.
|
1484
1469
|
"""
|
1485
1470
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1486
|
-
raise ValueError(
|
1487
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1471
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1488
1472
|
|
1489
1473
|
features = feature_cols.replace(' ', '').split(',')
|
1490
1474
|
|
@@ -1558,8 +1542,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1558
1542
|
DataFrame with predictions appended.
|
1559
1543
|
"""
|
1560
1544
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1561
|
-
raise ValueError(
|
1562
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1545
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1563
1546
|
|
1564
1547
|
features = feature_cols.replace(' ', '').split(',')
|
1565
1548
|
|
@@ -1603,8 +1586,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1603
1586
|
if model_path:
|
1604
1587
|
model.save_model(model_path)
|
1605
1588
|
|
1606
|
-
columns_order = [col for col in df.columns if col not in [
|
1607
|
-
'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1589
|
+
columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1608
1590
|
df = df[columns_order]
|
1609
1591
|
|
1610
1592
|
return df
|
@@ -1852,8 +1834,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1852
1834
|
ValueError: If the DataFrames do not have the same columns.
|
1853
1835
|
"""
|
1854
1836
|
if set(df1.columns) != set(df2.columns):
|
1855
|
-
raise ValueError(
|
1856
|
-
"Both DataFrames must have the same columns for a union join")
|
1837
|
+
raise ValueError("Both DataFrames must have the same columns for a union join")
|
1857
1838
|
|
1858
1839
|
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1859
1840
|
return result_df
|
@@ -1874,8 +1855,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1874
1855
|
ValueError: If the DataFrames do not have the same columns.
|
1875
1856
|
"""
|
1876
1857
|
if set(df1.columns) != set(df2.columns):
|
1877
|
-
raise ValueError(
|
1878
|
-
"Both DataFrames must have the same columns for a bag union join")
|
1858
|
+
raise ValueError("Both DataFrames must have the same columns for a bag union join")
|
1879
1859
|
|
1880
1860
|
result_df = pd.concat([df1, df2], ignore_index=True)
|
1881
1861
|
return result_df
|
@@ -2024,12 +2004,7 @@ def sync_dataframe_to_sqlite_database(
|
|
2024
2004
|
cursor.execute(f"PRAGMA table_info({new_table_name})")
|
2025
2005
|
if cursor.fetchall() == []: # Table does not exist
|
2026
2006
|
# Create a table using the DataFrame's column names and types
|
2027
|
-
columns_with_types = ', '.join(
|
2028
|
-
f'"{col}" {
|
2029
|
-
map_dtype(dtype)}' for col,
|
2030
|
-
dtype in zip(
|
2031
|
-
df.columns,
|
2032
|
-
df.dtypes))
|
2007
|
+
columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
|
2033
2008
|
create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
|
2034
2009
|
conn.execute(create_table_query)
|
2035
2010
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
rgwfuncs/__init__.py,sha256=-rcdj4_9zq82h0Tl00S9GvEqDYh7yhPCNhnhBs3mZCg,1676
|
2
2
|
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=
|
3
|
+
rgwfuncs/df_lib.py,sha256=XhqHYcrXGEOOqB4Z0Y-ASViy6_R_Df5f7ZGh66RIP6w,68420
|
4
4
|
rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
|
5
5
|
rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
|
6
6
|
rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
|
7
|
-
rgwfuncs-0.0.
|
8
|
-
rgwfuncs-0.0.
|
9
|
-
rgwfuncs-0.0.
|
10
|
-
rgwfuncs-0.0.
|
11
|
-
rgwfuncs-0.0.
|
12
|
-
rgwfuncs-0.0.
|
7
|
+
rgwfuncs-0.0.60.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.60.dist-info/METADATA,sha256=JareUKefKcxdm0rXKzudN9zjw9ljII5o3Llx7o1JrcA,58951
|
9
|
+
rgwfuncs-0.0.60.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
+
rgwfuncs-0.0.60.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.60.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.60.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|