rgwfuncs 0.0.59__py3-none-any.whl → 0.0.60__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- rgwfuncs/df_lib.py +14 -39
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/METADATA +1 -1
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/RECORD +7 -7
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.60.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -643,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
|
|
643
643
|
top_n_values = frequency.nlargest(n)
|
644
644
|
report[column] = {str(value): str(count)
|
645
645
|
for value, count in top_n_values.items()}
|
646
|
-
print(
|
647
|
-
f"Top {n} unique values for column '{column}':\n{
|
648
|
-
json.dumps(
|
649
|
-
report[column],
|
650
|
-
indent=2)}\n")
|
646
|
+
print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
|
651
647
|
else:
|
652
648
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
653
649
|
else:
|
@@ -690,11 +686,7 @@ def bottom_n_unique_values(
|
|
690
686
|
report[column] = {
|
691
687
|
str(value): str(count) for value,
|
692
688
|
count in bottom_n_values.items()}
|
693
|
-
print(
|
694
|
-
f"Bottom {n} unique values for column '{column}':\n{
|
695
|
-
json.dumps(
|
696
|
-
report[column],
|
697
|
-
indent=2)}\n")
|
689
|
+
print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
|
698
690
|
else:
|
699
691
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
700
692
|
else:
|
@@ -753,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
|
|
753
745
|
- ValueError: If the DataFrame is `None`.
|
754
746
|
"""
|
755
747
|
if df is not None:
|
756
|
-
memory_usage = df.memory_usage(deep=True).sum(
|
757
|
-
) / (1024 * 1024) # Convert bytes to MB
|
748
|
+
memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
|
758
749
|
print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
|
759
750
|
else:
|
760
751
|
raise ValueError("No DataFrame to print. Please provide a DataFrame.")
|
@@ -1234,9 +1225,7 @@ def append_ranged_classification_column(
|
|
1234
1225
|
for r in range_list
|
1235
1226
|
)
|
1236
1227
|
|
1237
|
-
labels = [f"{pad_number(range_list[i],
|
1238
|
-
max_integer_length)} to {pad_number(range_list[i + 1],
|
1239
|
-
max_integer_length)}" for i in range(len(range_list) - 1)]
|
1228
|
+
labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
|
1240
1229
|
|
1241
1230
|
# Ensure the target column is numeric
|
1242
1231
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
@@ -1379,8 +1368,7 @@ def rename_columns(df: pd.DataFrame,
|
|
1379
1368
|
A new DataFrame with columns renamed.
|
1380
1369
|
"""
|
1381
1370
|
if df is None:
|
1382
|
-
raise ValueError(
|
1383
|
-
"No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1371
|
+
raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1384
1372
|
|
1385
1373
|
return df.rename(columns=rename_pairs)
|
1386
1374
|
|
@@ -1398,8 +1386,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
|
|
1398
1386
|
A new DataFrame sorted by specified columns.
|
1399
1387
|
"""
|
1400
1388
|
if df is None:
|
1401
|
-
raise ValueError(
|
1402
|
-
"No DataFrame to sort. Please provide a valid DataFrame.")
|
1389
|
+
raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
|
1403
1390
|
|
1404
1391
|
col_names = []
|
1405
1392
|
asc_order = []
|
@@ -1434,8 +1421,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1434
1421
|
A new DataFrame with XGB_TYPE labels appended.
|
1435
1422
|
"""
|
1436
1423
|
if df is None:
|
1437
|
-
raise ValueError(
|
1438
|
-
"No DataFrame to add labels. Please provide a valid DataFrame.")
|
1424
|
+
raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
|
1439
1425
|
|
1440
1426
|
ratios = list(map(int, ratio_str.split(':')))
|
1441
1427
|
total_ratio = sum(ratios)
|
@@ -1452,8 +1438,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1452
1438
|
labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
|
1453
1439
|
validate_rows + ['TEST'] * test_rows
|
1454
1440
|
else:
|
1455
|
-
raise ValueError(
|
1456
|
-
"Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1441
|
+
raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1457
1442
|
|
1458
1443
|
df_with_labels = df.copy()
|
1459
1444
|
df_with_labels['XGB_TYPE'] = labels
|
@@ -1483,8 +1468,7 @@ def append_xgb_regression_predictions(
|
|
1483
1468
|
DataFrame with predictions appended.
|
1484
1469
|
"""
|
1485
1470
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1486
|
-
raise ValueError(
|
1487
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1471
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1488
1472
|
|
1489
1473
|
features = feature_cols.replace(' ', '').split(',')
|
1490
1474
|
|
@@ -1558,8 +1542,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1558
1542
|
DataFrame with predictions appended.
|
1559
1543
|
"""
|
1560
1544
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1561
|
-
raise ValueError(
|
1562
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1545
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1563
1546
|
|
1564
1547
|
features = feature_cols.replace(' ', '').split(',')
|
1565
1548
|
|
@@ -1603,8 +1586,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1603
1586
|
if model_path:
|
1604
1587
|
model.save_model(model_path)
|
1605
1588
|
|
1606
|
-
columns_order = [col for col in df.columns if col not in [
|
1607
|
-
'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1589
|
+
columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1608
1590
|
df = df[columns_order]
|
1609
1591
|
|
1610
1592
|
return df
|
@@ -1852,8 +1834,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1852
1834
|
ValueError: If the DataFrames do not have the same columns.
|
1853
1835
|
"""
|
1854
1836
|
if set(df1.columns) != set(df2.columns):
|
1855
|
-
raise ValueError(
|
1856
|
-
"Both DataFrames must have the same columns for a union join")
|
1837
|
+
raise ValueError("Both DataFrames must have the same columns for a union join")
|
1857
1838
|
|
1858
1839
|
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1859
1840
|
return result_df
|
@@ -1874,8 +1855,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1874
1855
|
ValueError: If the DataFrames do not have the same columns.
|
1875
1856
|
"""
|
1876
1857
|
if set(df1.columns) != set(df2.columns):
|
1877
|
-
raise ValueError(
|
1878
|
-
"Both DataFrames must have the same columns for a bag union join")
|
1858
|
+
raise ValueError("Both DataFrames must have the same columns for a bag union join")
|
1879
1859
|
|
1880
1860
|
result_df = pd.concat([df1, df2], ignore_index=True)
|
1881
1861
|
return result_df
|
@@ -2024,12 +2004,7 @@ def sync_dataframe_to_sqlite_database(
|
|
2024
2004
|
cursor.execute(f"PRAGMA table_info({new_table_name})")
|
2025
2005
|
if cursor.fetchall() == []: # Table does not exist
|
2026
2006
|
# Create a table using the DataFrame's column names and types
|
2027
|
-
columns_with_types = ', '.join(
|
2028
|
-
f'"{col}" {
|
2029
|
-
map_dtype(dtype)}' for col,
|
2030
|
-
dtype in zip(
|
2031
|
-
df.columns,
|
2032
|
-
df.dtypes))
|
2007
|
+
columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
|
2033
2008
|
create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
|
2034
2009
|
conn.execute(create_table_query)
|
2035
2010
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
rgwfuncs/__init__.py,sha256=-rcdj4_9zq82h0Tl00S9GvEqDYh7yhPCNhnhBs3mZCg,1676
|
2
2
|
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=
|
3
|
+
rgwfuncs/df_lib.py,sha256=XhqHYcrXGEOOqB4Z0Y-ASViy6_R_Df5f7ZGh66RIP6w,68420
|
4
4
|
rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
|
5
5
|
rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
|
6
6
|
rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
|
7
|
-
rgwfuncs-0.0.
|
8
|
-
rgwfuncs-0.0.
|
9
|
-
rgwfuncs-0.0.
|
10
|
-
rgwfuncs-0.0.
|
11
|
-
rgwfuncs-0.0.
|
12
|
-
rgwfuncs-0.0.
|
7
|
+
rgwfuncs-0.0.60.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.60.dist-info/METADATA,sha256=JareUKefKcxdm0rXKzudN9zjw9ljII5o3Llx7o1JrcA,58951
|
9
|
+
rgwfuncs-0.0.60.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
+
rgwfuncs-0.0.60.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.60.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.60.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|