rgwfuncs 0.0.59__py3-none-any.whl → 0.0.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +16 -43
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/METADATA +1 -1
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/RECORD +7 -7
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -643,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
|
|
643
643
|
top_n_values = frequency.nlargest(n)
|
644
644
|
report[column] = {str(value): str(count)
|
645
645
|
for value, count in top_n_values.items()}
|
646
|
-
print(
|
647
|
-
f"Top {n} unique values for column '{column}':\n{
|
648
|
-
json.dumps(
|
649
|
-
report[column],
|
650
|
-
indent=2)}\n")
|
646
|
+
print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
|
651
647
|
else:
|
652
648
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
653
649
|
else:
|
@@ -690,11 +686,7 @@ def bottom_n_unique_values(
|
|
690
686
|
report[column] = {
|
691
687
|
str(value): str(count) for value,
|
692
688
|
count in bottom_n_values.items()}
|
693
|
-
print(
|
694
|
-
f"Bottom {n} unique values for column '{column}':\n{
|
695
|
-
json.dumps(
|
696
|
-
report[column],
|
697
|
-
indent=2)}\n")
|
689
|
+
print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
|
698
690
|
else:
|
699
691
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
700
692
|
else:
|
@@ -753,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
|
|
753
745
|
- ValueError: If the DataFrame is `None`.
|
754
746
|
"""
|
755
747
|
if df is not None:
|
756
|
-
memory_usage = df.memory_usage(deep=True).sum(
|
757
|
-
) / (1024 * 1024) # Convert bytes to MB
|
748
|
+
memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
|
758
749
|
print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
|
759
750
|
else:
|
760
751
|
raise ValueError("No DataFrame to print. Please provide a DataFrame.")
|
@@ -900,8 +891,7 @@ def send_dataframe_via_telegram(
|
|
900
891
|
'caption': message or ''}
|
901
892
|
files = {'document': file}
|
902
893
|
response = requests.post(
|
903
|
-
f"https://api.telegram.org/bot{
|
904
|
-
bot_config['bot_token']}/sendDocument",
|
894
|
+
f"https://api.telegram.org/bot{bot_config['bot_token']}/sendDocument",
|
905
895
|
data=payload,
|
906
896
|
files=files)
|
907
897
|
if remove_after_send and os.path.exists(file_name):
|
@@ -1010,8 +1000,7 @@ def send_data_to_email(
|
|
1010
1000
|
encoders.encode_base64(part)
|
1011
1001
|
part.add_header(
|
1012
1002
|
'Content-Disposition',
|
1013
|
-
f'attachment; filename={
|
1014
|
-
os.path.basename(tmp_file_name)}')
|
1003
|
+
f'attachment; filename={os.path.basename(tmp_file_name)}')
|
1015
1004
|
message.attach(part)
|
1016
1005
|
|
1017
1006
|
if remove_after_send and os.path.exists(tmp_file_name):
|
@@ -1234,9 +1223,7 @@ def append_ranged_classification_column(
|
|
1234
1223
|
for r in range_list
|
1235
1224
|
)
|
1236
1225
|
|
1237
|
-
labels = [f"{pad_number(range_list[i],
|
1238
|
-
max_integer_length)} to {pad_number(range_list[i + 1],
|
1239
|
-
max_integer_length)}" for i in range(len(range_list) - 1)]
|
1226
|
+
labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
|
1240
1227
|
|
1241
1228
|
# Ensure the target column is numeric
|
1242
1229
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
@@ -1379,8 +1366,7 @@ def rename_columns(df: pd.DataFrame,
|
|
1379
1366
|
A new DataFrame with columns renamed.
|
1380
1367
|
"""
|
1381
1368
|
if df is None:
|
1382
|
-
raise ValueError(
|
1383
|
-
"No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1369
|
+
raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1384
1370
|
|
1385
1371
|
return df.rename(columns=rename_pairs)
|
1386
1372
|
|
@@ -1398,8 +1384,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
|
|
1398
1384
|
A new DataFrame sorted by specified columns.
|
1399
1385
|
"""
|
1400
1386
|
if df is None:
|
1401
|
-
raise ValueError(
|
1402
|
-
"No DataFrame to sort. Please provide a valid DataFrame.")
|
1387
|
+
raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
|
1403
1388
|
|
1404
1389
|
col_names = []
|
1405
1390
|
asc_order = []
|
@@ -1434,8 +1419,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1434
1419
|
A new DataFrame with XGB_TYPE labels appended.
|
1435
1420
|
"""
|
1436
1421
|
if df is None:
|
1437
|
-
raise ValueError(
|
1438
|
-
"No DataFrame to add labels. Please provide a valid DataFrame.")
|
1422
|
+
raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
|
1439
1423
|
|
1440
1424
|
ratios = list(map(int, ratio_str.split(':')))
|
1441
1425
|
total_ratio = sum(ratios)
|
@@ -1452,8 +1436,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1452
1436
|
labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
|
1453
1437
|
validate_rows + ['TEST'] * test_rows
|
1454
1438
|
else:
|
1455
|
-
raise ValueError(
|
1456
|
-
"Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1439
|
+
raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1457
1440
|
|
1458
1441
|
df_with_labels = df.copy()
|
1459
1442
|
df_with_labels['XGB_TYPE'] = labels
|
@@ -1483,8 +1466,7 @@ def append_xgb_regression_predictions(
|
|
1483
1466
|
DataFrame with predictions appended.
|
1484
1467
|
"""
|
1485
1468
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1486
|
-
raise ValueError(
|
1487
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1469
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1488
1470
|
|
1489
1471
|
features = feature_cols.replace(' ', '').split(',')
|
1490
1472
|
|
@@ -1558,8 +1540,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1558
1540
|
DataFrame with predictions appended.
|
1559
1541
|
"""
|
1560
1542
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1561
|
-
raise ValueError(
|
1562
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1543
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1563
1544
|
|
1564
1545
|
features = feature_cols.replace(' ', '').split(',')
|
1565
1546
|
|
@@ -1603,8 +1584,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1603
1584
|
if model_path:
|
1604
1585
|
model.save_model(model_path)
|
1605
1586
|
|
1606
|
-
columns_order = [col for col in df.columns if col not in [
|
1607
|
-
'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1587
|
+
columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1608
1588
|
df = df[columns_order]
|
1609
1589
|
|
1610
1590
|
return df
|
@@ -1852,8 +1832,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1852
1832
|
ValueError: If the DataFrames do not have the same columns.
|
1853
1833
|
"""
|
1854
1834
|
if set(df1.columns) != set(df2.columns):
|
1855
|
-
raise ValueError(
|
1856
|
-
"Both DataFrames must have the same columns for a union join")
|
1835
|
+
raise ValueError("Both DataFrames must have the same columns for a union join")
|
1857
1836
|
|
1858
1837
|
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1859
1838
|
return result_df
|
@@ -1874,8 +1853,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1874
1853
|
ValueError: If the DataFrames do not have the same columns.
|
1875
1854
|
"""
|
1876
1855
|
if set(df1.columns) != set(df2.columns):
|
1877
|
-
raise ValueError(
|
1878
|
-
"Both DataFrames must have the same columns for a bag union join")
|
1856
|
+
raise ValueError("Both DataFrames must have the same columns for a bag union join")
|
1879
1857
|
|
1880
1858
|
result_df = pd.concat([df1, df2], ignore_index=True)
|
1881
1859
|
return result_df
|
@@ -2024,12 +2002,7 @@ def sync_dataframe_to_sqlite_database(
|
|
2024
2002
|
cursor.execute(f"PRAGMA table_info({new_table_name})")
|
2025
2003
|
if cursor.fetchall() == []: # Table does not exist
|
2026
2004
|
# Create a table using the DataFrame's column names and types
|
2027
|
-
columns_with_types = ', '.join(
|
2028
|
-
f'"{col}" {
|
2029
|
-
map_dtype(dtype)}' for col,
|
2030
|
-
dtype in zip(
|
2031
|
-
df.columns,
|
2032
|
-
df.dtypes))
|
2005
|
+
columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
|
2033
2006
|
create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
|
2034
2007
|
conn.execute(create_table_query)
|
2035
2008
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
rgwfuncs/__init__.py,sha256=-rcdj4_9zq82h0Tl00S9GvEqDYh7yhPCNhnhBs3mZCg,1676
|
2
2
|
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=
|
3
|
+
rgwfuncs/df_lib.py,sha256=xjacr7JrjR8gZ1xqqOVvevFzK9N646wm5w1YEIxbGsE,68370
|
4
4
|
rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
|
5
5
|
rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
|
6
6
|
rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
|
7
|
-
rgwfuncs-0.0.
|
8
|
-
rgwfuncs-0.0.
|
9
|
-
rgwfuncs-0.0.
|
10
|
-
rgwfuncs-0.0.
|
11
|
-
rgwfuncs-0.0.
|
12
|
-
rgwfuncs-0.0.
|
7
|
+
rgwfuncs-0.0.61.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.61.dist-info/METADATA,sha256=UcHNBUomOeWcpes0FL2qZysH37TtOPPR58DofFphl4o,58951
|
9
|
+
rgwfuncs-0.0.61.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
+
rgwfuncs-0.0.61.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.61.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.61.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|