rgwfuncs 0.0.59__py3-none-any.whl → 0.0.61__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- rgwfuncs/df_lib.py +16 -43
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/METADATA +1 -1
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/RECORD +7 -7
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.59.dist-info → rgwfuncs-0.0.61.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -643,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
|
|
643
643
|
top_n_values = frequency.nlargest(n)
|
644
644
|
report[column] = {str(value): str(count)
|
645
645
|
for value, count in top_n_values.items()}
|
646
|
-
print(
|
647
|
-
f"Top {n} unique values for column '{column}':\n{
|
648
|
-
json.dumps(
|
649
|
-
report[column],
|
650
|
-
indent=2)}\n")
|
646
|
+
print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
|
651
647
|
else:
|
652
648
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
653
649
|
else:
|
@@ -690,11 +686,7 @@ def bottom_n_unique_values(
|
|
690
686
|
report[column] = {
|
691
687
|
str(value): str(count) for value,
|
692
688
|
count in bottom_n_values.items()}
|
693
|
-
print(
|
694
|
-
f"Bottom {n} unique values for column '{column}':\n{
|
695
|
-
json.dumps(
|
696
|
-
report[column],
|
697
|
-
indent=2)}\n")
|
689
|
+
print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
|
698
690
|
else:
|
699
691
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
700
692
|
else:
|
@@ -753,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
|
|
753
745
|
- ValueError: If the DataFrame is `None`.
|
754
746
|
"""
|
755
747
|
if df is not None:
|
756
|
-
memory_usage = df.memory_usage(deep=True).sum(
|
757
|
-
) / (1024 * 1024) # Convert bytes to MB
|
748
|
+
memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
|
758
749
|
print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
|
759
750
|
else:
|
760
751
|
raise ValueError("No DataFrame to print. Please provide a DataFrame.")
|
@@ -900,8 +891,7 @@ def send_dataframe_via_telegram(
|
|
900
891
|
'caption': message or ''}
|
901
892
|
files = {'document': file}
|
902
893
|
response = requests.post(
|
903
|
-
f"https://api.telegram.org/bot{
|
904
|
-
bot_config['bot_token']}/sendDocument",
|
894
|
+
f"https://api.telegram.org/bot{bot_config['bot_token']}/sendDocument",
|
905
895
|
data=payload,
|
906
896
|
files=files)
|
907
897
|
if remove_after_send and os.path.exists(file_name):
|
@@ -1010,8 +1000,7 @@ def send_data_to_email(
|
|
1010
1000
|
encoders.encode_base64(part)
|
1011
1001
|
part.add_header(
|
1012
1002
|
'Content-Disposition',
|
1013
|
-
f'attachment; filename={
|
1014
|
-
os.path.basename(tmp_file_name)}')
|
1003
|
+
f'attachment; filename={os.path.basename(tmp_file_name)}')
|
1015
1004
|
message.attach(part)
|
1016
1005
|
|
1017
1006
|
if remove_after_send and os.path.exists(tmp_file_name):
|
@@ -1234,9 +1223,7 @@ def append_ranged_classification_column(
|
|
1234
1223
|
for r in range_list
|
1235
1224
|
)
|
1236
1225
|
|
1237
|
-
labels = [f"{pad_number(range_list[i],
|
1238
|
-
max_integer_length)} to {pad_number(range_list[i + 1],
|
1239
|
-
max_integer_length)}" for i in range(len(range_list) - 1)]
|
1226
|
+
labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
|
1240
1227
|
|
1241
1228
|
# Ensure the target column is numeric
|
1242
1229
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
@@ -1379,8 +1366,7 @@ def rename_columns(df: pd.DataFrame,
|
|
1379
1366
|
A new DataFrame with columns renamed.
|
1380
1367
|
"""
|
1381
1368
|
if df is None:
|
1382
|
-
raise ValueError(
|
1383
|
-
"No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1369
|
+
raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1384
1370
|
|
1385
1371
|
return df.rename(columns=rename_pairs)
|
1386
1372
|
|
@@ -1398,8 +1384,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
|
|
1398
1384
|
A new DataFrame sorted by specified columns.
|
1399
1385
|
"""
|
1400
1386
|
if df is None:
|
1401
|
-
raise ValueError(
|
1402
|
-
"No DataFrame to sort. Please provide a valid DataFrame.")
|
1387
|
+
raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
|
1403
1388
|
|
1404
1389
|
col_names = []
|
1405
1390
|
asc_order = []
|
@@ -1434,8 +1419,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1434
1419
|
A new DataFrame with XGB_TYPE labels appended.
|
1435
1420
|
"""
|
1436
1421
|
if df is None:
|
1437
|
-
raise ValueError(
|
1438
|
-
"No DataFrame to add labels. Please provide a valid DataFrame.")
|
1422
|
+
raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
|
1439
1423
|
|
1440
1424
|
ratios = list(map(int, ratio_str.split(':')))
|
1441
1425
|
total_ratio = sum(ratios)
|
@@ -1452,8 +1436,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1452
1436
|
labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
|
1453
1437
|
validate_rows + ['TEST'] * test_rows
|
1454
1438
|
else:
|
1455
|
-
raise ValueError(
|
1456
|
-
"Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1439
|
+
raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1457
1440
|
|
1458
1441
|
df_with_labels = df.copy()
|
1459
1442
|
df_with_labels['XGB_TYPE'] = labels
|
@@ -1483,8 +1466,7 @@ def append_xgb_regression_predictions(
|
|
1483
1466
|
DataFrame with predictions appended.
|
1484
1467
|
"""
|
1485
1468
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1486
|
-
raise ValueError(
|
1487
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1469
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1488
1470
|
|
1489
1471
|
features = feature_cols.replace(' ', '').split(',')
|
1490
1472
|
|
@@ -1558,8 +1540,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1558
1540
|
DataFrame with predictions appended.
|
1559
1541
|
"""
|
1560
1542
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1561
|
-
raise ValueError(
|
1562
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1543
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1563
1544
|
|
1564
1545
|
features = feature_cols.replace(' ', '').split(',')
|
1565
1546
|
|
@@ -1603,8 +1584,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1603
1584
|
if model_path:
|
1604
1585
|
model.save_model(model_path)
|
1605
1586
|
|
1606
|
-
columns_order = [col for col in df.columns if col not in [
|
1607
|
-
'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1587
|
+
columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1608
1588
|
df = df[columns_order]
|
1609
1589
|
|
1610
1590
|
return df
|
@@ -1852,8 +1832,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1852
1832
|
ValueError: If the DataFrames do not have the same columns.
|
1853
1833
|
"""
|
1854
1834
|
if set(df1.columns) != set(df2.columns):
|
1855
|
-
raise ValueError(
|
1856
|
-
"Both DataFrames must have the same columns for a union join")
|
1835
|
+
raise ValueError("Both DataFrames must have the same columns for a union join")
|
1857
1836
|
|
1858
1837
|
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1859
1838
|
return result_df
|
@@ -1874,8 +1853,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1874
1853
|
ValueError: If the DataFrames do not have the same columns.
|
1875
1854
|
"""
|
1876
1855
|
if set(df1.columns) != set(df2.columns):
|
1877
|
-
raise ValueError(
|
1878
|
-
"Both DataFrames must have the same columns for a bag union join")
|
1856
|
+
raise ValueError("Both DataFrames must have the same columns for a bag union join")
|
1879
1857
|
|
1880
1858
|
result_df = pd.concat([df1, df2], ignore_index=True)
|
1881
1859
|
return result_df
|
@@ -2024,12 +2002,7 @@ def sync_dataframe_to_sqlite_database(
|
|
2024
2002
|
cursor.execute(f"PRAGMA table_info({new_table_name})")
|
2025
2003
|
if cursor.fetchall() == []: # Table does not exist
|
2026
2004
|
# Create a table using the DataFrame's column names and types
|
2027
|
-
columns_with_types = ', '.join(
|
2028
|
-
f'"{col}" {
|
2029
|
-
map_dtype(dtype)}' for col,
|
2030
|
-
dtype in zip(
|
2031
|
-
df.columns,
|
2032
|
-
df.dtypes))
|
2005
|
+
columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
|
2033
2006
|
create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
|
2034
2007
|
conn.execute(create_table_query)
|
2035
2008
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
rgwfuncs/__init__.py,sha256=-rcdj4_9zq82h0Tl00S9GvEqDYh7yhPCNhnhBs3mZCg,1676
|
2
2
|
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=
|
3
|
+
rgwfuncs/df_lib.py,sha256=xjacr7JrjR8gZ1xqqOVvevFzK9N646wm5w1YEIxbGsE,68370
|
4
4
|
rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
|
5
5
|
rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
|
6
6
|
rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
|
7
|
-
rgwfuncs-0.0.
|
8
|
-
rgwfuncs-0.0.
|
9
|
-
rgwfuncs-0.0.
|
10
|
-
rgwfuncs-0.0.
|
11
|
-
rgwfuncs-0.0.
|
12
|
-
rgwfuncs-0.0.
|
7
|
+
rgwfuncs-0.0.61.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.61.dist-info/METADATA,sha256=UcHNBUomOeWcpes0FL2qZysH37TtOPPR58DofFphl4o,58951
|
9
|
+
rgwfuncs-0.0.61.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
+
rgwfuncs-0.0.61.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.61.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.61.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|