rgwfuncs 0.0.91__py3-none-any.whl → 0.0.93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +62 -70
- rgwfuncs/str_lib.py +0 -2
- {rgwfuncs-0.0.91.dist-info → rgwfuncs-0.0.93.dist-info}/METADATA +13 -14
- rgwfuncs-0.0.93.dist-info/RECORD +12 -0
- {rgwfuncs-0.0.91.dist-info → rgwfuncs-0.0.93.dist-info}/WHEEL +1 -1
- rgwfuncs-0.0.91.dist-info/RECORD +0 -12
- {rgwfuncs-0.0.91.dist-info → rgwfuncs-0.0.93.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.91.dist-info → rgwfuncs-0.0.93.dist-info}/licenses/LICENSE +0 -0
- {rgwfuncs-0.0.91.dist-info → rgwfuncs-0.0.93.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -911,7 +911,6 @@ def send_dataframe_via_telegram(
|
|
911
911
|
else:
|
912
912
|
raise ValueError("Config must be either a path string or a dictionary")
|
913
913
|
|
914
|
-
|
915
914
|
config = get_config(config)
|
916
915
|
|
917
916
|
bot_config = next(
|
@@ -1232,17 +1231,13 @@ def order_columns(df: pd.DataFrame, column_order_str: str) -> pd.DataFrame:
|
|
1232
1231
|
return df[new_order]
|
1233
1232
|
|
1234
1233
|
|
1235
|
-
def append_ranged_classification_column(
|
1236
|
-
df: pd.DataFrame,
|
1237
|
-
ranges: str,
|
1238
|
-
target_col: str,
|
1239
|
-
new_col_name: str) -> pd.DataFrame:
|
1234
|
+
def append_ranged_classification_column(df: pd.DataFrame, ranges: List[Union[int, float]], target_col: str, new_col_name: str) -> pd.DataFrame:
|
1240
1235
|
"""
|
1241
1236
|
Append a ranged classification column to the DataFrame.
|
1242
1237
|
|
1243
1238
|
Parameters:
|
1244
1239
|
df: The DataFrame to modify.
|
1245
|
-
ranges: A
|
1240
|
+
ranges: A list of numeric range boundaries (integers or floats, last bin extends to infinity).
|
1246
1241
|
target_col: The column to analyze.
|
1247
1242
|
new_col_name: The name of the new classification column.
|
1248
1243
|
|
@@ -1259,52 +1254,55 @@ def append_ranged_classification_column(
|
|
1259
1254
|
else:
|
1260
1255
|
return str(int(number)).zfill(integer_length)
|
1261
1256
|
|
1262
|
-
|
1263
|
-
has_decimals = any(
|
1257
|
+
# Check if any numbers in ranges are decimals
|
1258
|
+
has_decimals = any(isinstance(r, float) and r % 1 != 0 for r in ranges)
|
1264
1259
|
|
1265
1260
|
if has_decimals:
|
1266
|
-
range_list = [float(r) for r in
|
1261
|
+
range_list = [float(r) for r in ranges] + [float('inf')]
|
1267
1262
|
|
1268
1263
|
max_decimal_length = max(
|
1269
|
-
len(str(r).split('.')[1])
|
1270
|
-
for r in
|
1271
|
-
if '.' in str(r)
|
1264
|
+
len(str(r).split('.')[1]) if isinstance(r, float) and r % 1 != 0 else 0
|
1265
|
+
for r in ranges
|
1272
1266
|
)
|
1273
1267
|
|
1274
1268
|
max_integer_length = max(
|
1275
1269
|
len(str(int(float(r))))
|
1276
|
-
for r in
|
1270
|
+
for r in ranges
|
1277
1271
|
)
|
1278
1272
|
|
1279
1273
|
labels = []
|
1280
|
-
|
1281
|
-
for i in range(len(range_list) - 1):
|
1274
|
+
for i in range(len(ranges)):
|
1282
1275
|
start = pad_number(
|
1283
|
-
|
1284
|
-
max_integer_length,
|
1285
|
-
max_decimal_length,
|
1286
|
-
decimal=True
|
1287
|
-
)
|
1288
|
-
|
1289
|
-
end = pad_number(
|
1290
|
-
range_list[i + 1],
|
1276
|
+
ranges[i],
|
1291
1277
|
max_integer_length,
|
1292
1278
|
max_decimal_length,
|
1293
1279
|
decimal=True
|
1294
1280
|
)
|
1295
|
-
|
1296
|
-
|
1281
|
+
if i == len(ranges) - 1:
|
1282
|
+
label = f"{start}+"
|
1283
|
+
else:
|
1284
|
+
end = pad_number(
|
1285
|
+
ranges[i + 1],
|
1286
|
+
max_integer_length,
|
1287
|
+
max_decimal_length,
|
1288
|
+
decimal=True
|
1289
|
+
)
|
1290
|
+
label = f"{start} - {end}"
|
1297
1291
|
labels.append(label)
|
1298
1292
|
|
1299
1293
|
else:
|
1300
|
-
range_list = [int(r) for r in
|
1294
|
+
range_list = [int(r) for r in ranges] + [float('inf')]
|
1295
|
+
max_integer_length = max(len(str(int(r))) for r in ranges)
|
1301
1296
|
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1297
|
+
labels = []
|
1298
|
+
for i in range(len(ranges)):
|
1299
|
+
start = pad_number(ranges[i], max_integer_length)
|
1300
|
+
if i == len(ranges) - 1:
|
1301
|
+
label = f"{start}+"
|
1302
|
+
else:
|
1303
|
+
end = pad_number(ranges[i + 1], max_integer_length)
|
1304
|
+
label = f"{start} - {end}"
|
1305
|
+
labels.append(label)
|
1308
1306
|
|
1309
1307
|
# Ensure the target column is numeric
|
1310
1308
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
@@ -1313,22 +1311,19 @@ def append_ranged_classification_column(
|
|
1313
1311
|
bins=range_list,
|
1314
1312
|
labels=labels,
|
1315
1313
|
right=False,
|
1316
|
-
include_lowest=True
|
1314
|
+
include_lowest=True
|
1315
|
+
)
|
1317
1316
|
|
1318
1317
|
return df
|
1319
1318
|
|
1320
1319
|
|
1321
|
-
def append_percentile_classification_column(
|
1322
|
-
df: pd.DataFrame,
|
1323
|
-
percentiles: str,
|
1324
|
-
target_col: str,
|
1325
|
-
new_col_name: str) -> pd.DataFrame:
|
1320
|
+
def append_percentile_classification_column(df: pd.DataFrame, percentiles: List[Union[int, float]], target_col: str, new_col_name: str) -> pd.DataFrame:
|
1326
1321
|
"""
|
1327
1322
|
Append a percentile classification column to the DataFrame.
|
1328
1323
|
|
1329
1324
|
Parameters:
|
1330
1325
|
df: The DataFrame to modify.
|
1331
|
-
percentiles: A
|
1326
|
+
percentiles: A list of percentile values (0-100, integers or floats).
|
1332
1327
|
target_col: The column to analyze.
|
1333
1328
|
new_col_name: The name of the new classification column.
|
1334
1329
|
|
@@ -1345,40 +1340,42 @@ def append_percentile_classification_column(
|
|
1345
1340
|
else:
|
1346
1341
|
return str(int(number)).zfill(integer_length)
|
1347
1342
|
|
1348
|
-
|
1349
|
-
has_decimals = any(
|
1343
|
+
# Check if any numbers in percentiles are decimals
|
1344
|
+
has_decimals = any(isinstance(p, float) and p % 1 != 0 for p in percentiles)
|
1350
1345
|
|
1351
1346
|
if has_decimals:
|
1352
|
-
percentiles_list = [float(p) for p in
|
1353
|
-
max_decimal_length = max(
|
1354
|
-
|
1347
|
+
percentiles_list = [float(p) for p in percentiles]
|
1348
|
+
max_decimal_length = max(
|
1349
|
+
len(str(p).split('.')[1]) if isinstance(p, float) and p % 1 != 0 else 0
|
1350
|
+
for p in percentiles
|
1351
|
+
)
|
1352
|
+
max_integer_length = max(len(str(int(float(p)))) for p in percentiles)
|
1355
1353
|
|
1356
1354
|
labels = []
|
1357
|
-
|
1358
1355
|
for i in range(len(percentiles_list) - 1):
|
1359
1356
|
start = pad_number(
|
1360
1357
|
percentiles_list[i],
|
1361
1358
|
max_integer_length,
|
1362
1359
|
max_decimal_length,
|
1363
|
-
decimal=True
|
1360
|
+
decimal=True
|
1361
|
+
)
|
1364
1362
|
end = pad_number(
|
1365
|
-
percentiles_list[i + 1],
|
1366
|
-
|
1367
|
-
|
1363
|
+
percentiles_list[i + 1],
|
1364
|
+
max_integer_length,
|
1365
|
+
max_decimal_length,
|
1366
|
+
decimal=True
|
1367
|
+
)
|
1368
|
+
label = f"{start} - {end}"
|
1368
1369
|
labels.append(label)
|
1369
1370
|
else:
|
1370
|
-
percentiles_list = [int(p) for p in
|
1371
|
-
|
1371
|
+
percentiles_list = [int(p) for p in percentiles]
|
1372
1372
|
max_integer_length = max(len(str(p)) for p in percentiles_list)
|
1373
1373
|
|
1374
1374
|
labels = []
|
1375
|
-
|
1376
1375
|
for i in range(len(percentiles_list) - 1):
|
1377
1376
|
start = pad_number(percentiles_list[i], max_integer_length)
|
1378
|
-
|
1379
1377
|
end = pad_number(percentiles_list[i + 1], max_integer_length)
|
1380
|
-
|
1381
|
-
label = f"{start} to {end}"
|
1378
|
+
label = f"{start} - {end}"
|
1382
1379
|
labels.append(label)
|
1383
1380
|
|
1384
1381
|
# Ensure the target column is numeric
|
@@ -1389,22 +1386,19 @@ def append_percentile_classification_column(
|
|
1389
1386
|
df[target_col],
|
1390
1387
|
bins=quantiles,
|
1391
1388
|
labels=labels,
|
1392
|
-
include_lowest=True
|
1389
|
+
include_lowest=True
|
1390
|
+
)
|
1393
1391
|
|
1394
1392
|
return df
|
1395
1393
|
|
1396
1394
|
|
1397
|
-
def append_ranged_date_classification_column(
|
1398
|
-
df: pd.DataFrame,
|
1399
|
-
date_ranges: str,
|
1400
|
-
target_col: str,
|
1401
|
-
new_col_name: str) -> pd.DataFrame:
|
1395
|
+
def append_ranged_date_classification_column(df: pd.DataFrame, date_ranges: list[str], target_col: str, new_col_name: str) -> pd.DataFrame:
|
1402
1396
|
"""
|
1403
1397
|
Append a ranged date classification column to the DataFrame.
|
1404
1398
|
|
1405
1399
|
Parameters:
|
1406
1400
|
df: The DataFrame to modify.
|
1407
|
-
date_ranges: A
|
1401
|
+
date_ranges: A list of date strings in a format pandas can parse (e.g., ['2020-01-01', '2020-06-30', '2020-12-31']).
|
1408
1402
|
target_col: The date column to analyze.
|
1409
1403
|
new_col_name: The name of the new date classification column.
|
1410
1404
|
|
@@ -1412,28 +1406,26 @@ def append_ranged_date_classification_column(
|
|
1412
1406
|
A new DataFrame with the date classification column appended.
|
1413
1407
|
"""
|
1414
1408
|
|
1415
|
-
date_list = [pd.to_datetime(date) for date in date_ranges
|
1409
|
+
date_list = [pd.to_datetime(date) for date in date_ranges]
|
1416
1410
|
|
1417
1411
|
labels = []
|
1418
|
-
|
1419
1412
|
for i in range(len(date_list) - 1):
|
1420
1413
|
start_date = date_list[i].strftime('%Y-%m-%d')
|
1421
1414
|
end_date = date_list[i + 1].strftime('%Y-%m-%d')
|
1422
|
-
label = f"{start_date}
|
1415
|
+
label = f"{start_date} - {end_date}"
|
1423
1416
|
labels.append(label)
|
1424
1417
|
|
1425
1418
|
df[new_col_name] = pd.cut(
|
1426
1419
|
pd.to_datetime(df[target_col]),
|
1427
1420
|
bins=date_list,
|
1428
1421
|
labels=labels,
|
1429
|
-
right=False
|
1422
|
+
right=False
|
1423
|
+
)
|
1430
1424
|
|
1431
1425
|
return df
|
1432
1426
|
|
1433
1427
|
|
1434
|
-
def rename_columns(df: pd.DataFrame,
|
1435
|
-
rename_pairs: Dict[str,
|
1436
|
-
str]) -> pd.DataFrame:
|
1428
|
+
def rename_columns(df: pd.DataFrame, rename_pairs: Dict[str, str]) -> pd.DataFrame:
|
1437
1429
|
"""
|
1438
1430
|
Rename columns in the DataFrame.
|
1439
1431
|
|
rgwfuncs/str_lib.py
CHANGED
@@ -45,8 +45,6 @@ def send_telegram_message(preset_name: str, message: str, config: Optional[Union
|
|
45
45
|
else:
|
46
46
|
raise ValueError("Config must be either a path string or a dictionary")
|
47
47
|
|
48
|
-
|
49
|
-
|
50
48
|
def get_telegram_preset(config: dict, preset_name: str) -> dict:
|
51
49
|
"""Get the Telegram preset configuration."""
|
52
50
|
presets = config.get("telegram_bot_presets", [])
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.93
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -1249,7 +1249,7 @@ Append a ranged classification column to the DataFrame.
|
|
1249
1249
|
|
1250
1250
|
• Parameters:
|
1251
1251
|
- df (pd.DataFrame)
|
1252
|
-
- ranges (
|
1252
|
+
- ranges (list[int | float]): List of numeric range boundaries (e.g., [0, 10, 20, 30]), last bin extends to infinity.
|
1253
1253
|
- `target_col` (str): The column to classify.
|
1254
1254
|
- `new_col_name` (str): Name of the new classification column.
|
1255
1255
|
|
@@ -1257,14 +1257,14 @@ Append a ranged classification column to the DataFrame.
|
|
1257
1257
|
- pd.DataFrame
|
1258
1258
|
|
1259
1259
|
• Example:
|
1260
|
-
|
1260
|
+
|
1261
1261
|
from rgwfuncs import append_ranged_classification_column
|
1262
1262
|
import pandas as pd
|
1263
1263
|
|
1264
1264
|
df = pd.DataFrame({'Scores': [5, 12, 25]})
|
1265
|
-
df_classified = append_ranged_classification_column(df,
|
1265
|
+
df_classified = append_ranged_classification_column(df, [0, 10, 20, 30], 'Scores', 'ScoreRange')
|
1266
1266
|
print(df_classified)
|
1267
|
-
|
1267
|
+
|
1268
1268
|
|
1269
1269
|
--------------------------------------------------------------------------------
|
1270
1270
|
|
@@ -1273,7 +1273,7 @@ Append a percentile classification column to the DataFrame.
|
|
1273
1273
|
|
1274
1274
|
• Parameters:
|
1275
1275
|
- df (pd.DataFrame)
|
1276
|
-
- percentiles (
|
1276
|
+
- percentiles (list[int | float]): List of percentile values (0-100, e.g., [25, 50, 75]).
|
1277
1277
|
- `target_col` (str)
|
1278
1278
|
- `new_col_name` (str)
|
1279
1279
|
|
@@ -1281,14 +1281,14 @@ Append a percentile classification column to the DataFrame.
|
|
1281
1281
|
- pd.DataFrame
|
1282
1282
|
|
1283
1283
|
• Example:
|
1284
|
-
|
1284
|
+
|
1285
1285
|
from rgwfuncs import append_percentile_classification_column
|
1286
1286
|
import pandas as pd
|
1287
1287
|
|
1288
1288
|
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
|
1289
|
-
df_classified = append_percentile_classification_column(df,
|
1289
|
+
df_classified = append_percentile_classification_column(df, [25, 50, 75], 'Values', 'ValuePercentile')
|
1290
1290
|
print(df_classified)
|
1291
|
-
|
1291
|
+
|
1292
1292
|
|
1293
1293
|
--------------------------------------------------------------------------------
|
1294
1294
|
|
@@ -1297,7 +1297,7 @@ Append a ranged date classification column to the DataFrame.
|
|
1297
1297
|
|
1298
1298
|
• Parameters:
|
1299
1299
|
- df (pd.DataFrame)
|
1300
|
-
- `date_ranges` (str):
|
1300
|
+
- `date_ranges` (list[str]): List of date strings in a format pandas can parse (e.g., ['2020-01-01', '2020-06-30', '2020-12-31']).
|
1301
1301
|
- `target_col` (str)
|
1302
1302
|
- `new_col_name` (str)
|
1303
1303
|
|
@@ -1305,19 +1305,18 @@ Append a ranged date classification column to the DataFrame.
|
|
1305
1305
|
- pd.DataFrame
|
1306
1306
|
|
1307
1307
|
• Example:
|
1308
|
-
|
1308
|
+
|
1309
1309
|
from rgwfuncs import append_ranged_date_classification_column
|
1310
1310
|
import pandas as pd
|
1311
1311
|
|
1312
|
-
df = pd.DataFrame({'EventDate': pd.to_datetime(['2020-03-15','2020-08-10'])})
|
1312
|
+
df = pd.DataFrame({'EventDate': pd.to_datetime(['2020-03-15', '2020-08-10'])})
|
1313
1313
|
df_classified = append_ranged_date_classification_column(
|
1314
1314
|
df,
|
1315
|
-
'2020-01-
|
1315
|
+
['2020-01-01', '2020-06-30', '2020-12-31'],
|
1316
1316
|
'EventDate',
|
1317
1317
|
'DateRange'
|
1318
1318
|
)
|
1319
1319
|
print(df_classified)
|
1320
|
-
|
1321
1320
|
|
1322
1321
|
--------------------------------------------------------------------------------
|
1323
1322
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
|
2
|
+
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
+
rgwfuncs/df_lib.py,sha256=LHG6E-umLGVdDRWjziFrRb_YSlTronHv2QwEFBrTAt4,75528
|
4
|
+
rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
|
5
|
+
rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
|
6
|
+
rgwfuncs/str_lib.py,sha256=hE0VfP6rhQpczsKyCZvH3G1aMRwngKnkW3NTYCEc0Po,3208
|
7
|
+
rgwfuncs-0.0.93.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.93.dist-info/METADATA,sha256=eEiNMD4k_feeK6mxhrVqlvDTRONwsPosvR-pj9hYlAI,61443
|
9
|
+
rgwfuncs-0.0.93.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
10
|
+
rgwfuncs-0.0.93.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.93.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.93.dist-info/RECORD,,
|
rgwfuncs-0.0.91.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
|
2
|
-
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=coAmZ2RyWvovorNekQnLW5cUIZyY-h6s88YUm0ytFAw,75107
|
4
|
-
rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
|
5
|
-
rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
|
6
|
-
rgwfuncs/str_lib.py,sha256=-dcJt-jE0YG-XwHcNslCM_Gp-L0Ho0zYFPxjrxepzzA,3210
|
7
|
-
rgwfuncs-0.0.91.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
-
rgwfuncs-0.0.91.dist-info/METADATA,sha256=BVQ4dPy_oI5jTtqwEBfj96OGqwIlOy8Ib57Je7Q6Tuk,61390
|
9
|
-
rgwfuncs-0.0.91.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
|
10
|
-
rgwfuncs-0.0.91.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
-
rgwfuncs-0.0.91.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
-
rgwfuncs-0.0.91.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|