rgwfuncs 0.0.58__tar.gz → 0.0.60__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {rgwfuncs-0.0.58/src/rgwfuncs.egg-info → rgwfuncs-0.0.60}/PKG-INFO +1 -1
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/pyproject.toml +1 -1
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/setup.cfg +1 -1
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs/df_lib.py +15 -42
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60/src/rgwfuncs.egg-info}/PKG-INFO +1 -1
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/LICENSE +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/README.md +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs/__init__.py +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs/algebra_lib.py +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs/docs_lib.py +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs/interactive_shell_lib.py +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs/str_lib.py +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs.egg-info/SOURCES.txt +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs.egg-info/dependency_links.txt +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs.egg-info/entry_points.txt +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs.egg-info/requires.txt +0 -0
- {rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs.egg-info/top_level.txt +0 -0
@@ -212,9 +212,7 @@ def update_rows(
|
|
212
212
|
|
213
213
|
invalid_cols = [col for col in updates if col not in df.columns]
|
214
214
|
if invalid_cols:
|
215
|
-
raise ValueError(
|
216
|
-
f"Columns {
|
217
|
-
', '.join(invalid_cols)} do not exist in the DataFrame.")
|
215
|
+
raise ValueError(f"Columns {', '.join(invalid_cols)} do not exist in the DataFrame.")
|
218
216
|
|
219
217
|
new_df = df.copy()
|
220
218
|
for col_name, new_value in updates.items():
|
@@ -645,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
|
|
645
643
|
top_n_values = frequency.nlargest(n)
|
646
644
|
report[column] = {str(value): str(count)
|
647
645
|
for value, count in top_n_values.items()}
|
648
|
-
print(
|
649
|
-
f"Top {n} unique values for column '{column}':\n{
|
650
|
-
json.dumps(
|
651
|
-
report[column],
|
652
|
-
indent=2)}\n")
|
646
|
+
print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
|
653
647
|
else:
|
654
648
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
655
649
|
else:
|
@@ -692,11 +686,7 @@ def bottom_n_unique_values(
|
|
692
686
|
report[column] = {
|
693
687
|
str(value): str(count) for value,
|
694
688
|
count in bottom_n_values.items()}
|
695
|
-
print(
|
696
|
-
f"Bottom {n} unique values for column '{column}':\n{
|
697
|
-
json.dumps(
|
698
|
-
report[column],
|
699
|
-
indent=2)}\n")
|
689
|
+
print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
|
700
690
|
else:
|
701
691
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
702
692
|
else:
|
@@ -755,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
|
|
755
745
|
- ValueError: If the DataFrame is `None`.
|
756
746
|
"""
|
757
747
|
if df is not None:
|
758
|
-
memory_usage = df.memory_usage(deep=True).sum(
|
759
|
-
) / (1024 * 1024) # Convert bytes to MB
|
748
|
+
memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert bytes to MB
|
760
749
|
print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
|
761
750
|
else:
|
762
751
|
raise ValueError("No DataFrame to print. Please provide a DataFrame.")
|
@@ -1236,9 +1225,7 @@ def append_ranged_classification_column(
|
|
1236
1225
|
for r in range_list
|
1237
1226
|
)
|
1238
1227
|
|
1239
|
-
labels = [f"{pad_number(range_list[i],
|
1240
|
-
max_integer_length)} to {pad_number(range_list[i + 1],
|
1241
|
-
max_integer_length)}" for i in range(len(range_list) - 1)]
|
1228
|
+
labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
|
1242
1229
|
|
1243
1230
|
# Ensure the target column is numeric
|
1244
1231
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
@@ -1381,8 +1368,7 @@ def rename_columns(df: pd.DataFrame,
|
|
1381
1368
|
A new DataFrame with columns renamed.
|
1382
1369
|
"""
|
1383
1370
|
if df is None:
|
1384
|
-
raise ValueError(
|
1385
|
-
"No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1371
|
+
raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1386
1372
|
|
1387
1373
|
return df.rename(columns=rename_pairs)
|
1388
1374
|
|
@@ -1400,8 +1386,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
|
|
1400
1386
|
A new DataFrame sorted by specified columns.
|
1401
1387
|
"""
|
1402
1388
|
if df is None:
|
1403
|
-
raise ValueError(
|
1404
|
-
"No DataFrame to sort. Please provide a valid DataFrame.")
|
1389
|
+
raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
|
1405
1390
|
|
1406
1391
|
col_names = []
|
1407
1392
|
asc_order = []
|
@@ -1436,8 +1421,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1436
1421
|
A new DataFrame with XGB_TYPE labels appended.
|
1437
1422
|
"""
|
1438
1423
|
if df is None:
|
1439
|
-
raise ValueError(
|
1440
|
-
"No DataFrame to add labels. Please provide a valid DataFrame.")
|
1424
|
+
raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
|
1441
1425
|
|
1442
1426
|
ratios = list(map(int, ratio_str.split(':')))
|
1443
1427
|
total_ratio = sum(ratios)
|
@@ -1454,8 +1438,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1454
1438
|
labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
|
1455
1439
|
validate_rows + ['TEST'] * test_rows
|
1456
1440
|
else:
|
1457
|
-
raise ValueError(
|
1458
|
-
"Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1441
|
+
raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1459
1442
|
|
1460
1443
|
df_with_labels = df.copy()
|
1461
1444
|
df_with_labels['XGB_TYPE'] = labels
|
@@ -1485,8 +1468,7 @@ def append_xgb_regression_predictions(
|
|
1485
1468
|
DataFrame with predictions appended.
|
1486
1469
|
"""
|
1487
1470
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1488
|
-
raise ValueError(
|
1489
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1471
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1490
1472
|
|
1491
1473
|
features = feature_cols.replace(' ', '').split(',')
|
1492
1474
|
|
@@ -1560,8 +1542,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1560
1542
|
DataFrame with predictions appended.
|
1561
1543
|
"""
|
1562
1544
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1563
|
-
raise ValueError(
|
1564
|
-
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1545
|
+
raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1565
1546
|
|
1566
1547
|
features = feature_cols.replace(' ', '').split(',')
|
1567
1548
|
|
@@ -1605,8 +1586,7 @@ def append_xgb_logistic_regression_predictions(
|
|
1605
1586
|
if model_path:
|
1606
1587
|
model.save_model(model_path)
|
1607
1588
|
|
1608
|
-
columns_order = [col for col in df.columns if col not in [
|
1609
|
-
'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1589
|
+
columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1610
1590
|
df = df[columns_order]
|
1611
1591
|
|
1612
1592
|
return df
|
@@ -1854,8 +1834,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1854
1834
|
ValueError: If the DataFrames do not have the same columns.
|
1855
1835
|
"""
|
1856
1836
|
if set(df1.columns) != set(df2.columns):
|
1857
|
-
raise ValueError(
|
1858
|
-
"Both DataFrames must have the same columns for a union join")
|
1837
|
+
raise ValueError("Both DataFrames must have the same columns for a union join")
|
1859
1838
|
|
1860
1839
|
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1861
1840
|
return result_df
|
@@ -1876,8 +1855,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1876
1855
|
ValueError: If the DataFrames do not have the same columns.
|
1877
1856
|
"""
|
1878
1857
|
if set(df1.columns) != set(df2.columns):
|
1879
|
-
raise ValueError(
|
1880
|
-
"Both DataFrames must have the same columns for a bag union join")
|
1858
|
+
raise ValueError("Both DataFrames must have the same columns for a bag union join")
|
1881
1859
|
|
1882
1860
|
result_df = pd.concat([df1, df2], ignore_index=True)
|
1883
1861
|
return result_df
|
@@ -2026,12 +2004,7 @@ def sync_dataframe_to_sqlite_database(
|
|
2026
2004
|
cursor.execute(f"PRAGMA table_info({new_table_name})")
|
2027
2005
|
if cursor.fetchall() == []: # Table does not exist
|
2028
2006
|
# Create a table using the DataFrame's column names and types
|
2029
|
-
columns_with_types = ', '.join(
|
2030
|
-
f'"{col}" {
|
2031
|
-
map_dtype(dtype)}' for col,
|
2032
|
-
dtype in zip(
|
2033
|
-
df.columns,
|
2034
|
-
df.dtypes))
|
2007
|
+
columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
|
2035
2008
|
create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
|
2036
2009
|
conn.execute(create_table_query)
|
2037
2010
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|