rgwfuncs 0.0.91__py3-none-any.whl → 0.0.93__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/df_lib.py CHANGED
@@ -911,7 +911,6 @@ def send_dataframe_via_telegram(
911
911
  else:
912
912
  raise ValueError("Config must be either a path string or a dictionary")
913
913
 
914
-
915
914
  config = get_config(config)
916
915
 
917
916
  bot_config = next(
@@ -1232,17 +1231,13 @@ def order_columns(df: pd.DataFrame, column_order_str: str) -> pd.DataFrame:
1232
1231
  return df[new_order]
1233
1232
 
1234
1233
 
1235
- def append_ranged_classification_column(
1236
- df: pd.DataFrame,
1237
- ranges: str,
1238
- target_col: str,
1239
- new_col_name: str) -> pd.DataFrame:
1234
+ def append_ranged_classification_column(df: pd.DataFrame, ranges: List[Union[int, float]], target_col: str, new_col_name: str) -> pd.DataFrame:
1240
1235
  """
1241
1236
  Append a ranged classification column to the DataFrame.
1242
1237
 
1243
1238
  Parameters:
1244
1239
  df: The DataFrame to modify.
1245
- ranges: A string representation of numeric ranges separated by commas.
1240
+ ranges: A list of numeric range boundaries (integers or floats, last bin extends to infinity).
1246
1241
  target_col: The column to analyze.
1247
1242
  new_col_name: The name of the new classification column.
1248
1243
 
@@ -1259,52 +1254,55 @@ def append_ranged_classification_column(
1259
1254
  else:
1260
1255
  return str(int(number)).zfill(integer_length)
1261
1256
 
1262
- range_list = ranges.split(',')
1263
- has_decimals = any('.' in r for r in range_list)
1257
+ # Check if any numbers in ranges are decimals
1258
+ has_decimals = any(isinstance(r, float) and r % 1 != 0 for r in ranges)
1264
1259
 
1265
1260
  if has_decimals:
1266
- range_list = [float(r) for r in range_list]
1261
+ range_list = [float(r) for r in ranges] + [float('inf')]
1267
1262
 
1268
1263
  max_decimal_length = max(
1269
- len(str(r).split('.')[1])
1270
- for r in range_list
1271
- if '.' in str(r)
1264
+ len(str(r).split('.')[1]) if isinstance(r, float) and r % 1 != 0 else 0
1265
+ for r in ranges
1272
1266
  )
1273
1267
 
1274
1268
  max_integer_length = max(
1275
1269
  len(str(int(float(r))))
1276
- for r in range_list
1270
+ for r in ranges
1277
1271
  )
1278
1272
 
1279
1273
  labels = []
1280
-
1281
- for i in range(len(range_list) - 1):
1274
+ for i in range(len(ranges)):
1282
1275
  start = pad_number(
1283
- range_list[i],
1284
- max_integer_length,
1285
- max_decimal_length,
1286
- decimal=True
1287
- )
1288
-
1289
- end = pad_number(
1290
- range_list[i + 1],
1276
+ ranges[i],
1291
1277
  max_integer_length,
1292
1278
  max_decimal_length,
1293
1279
  decimal=True
1294
1280
  )
1295
-
1296
- label = f"{start} to {end}"
1281
+ if i == len(ranges) - 1:
1282
+ label = f"{start}+"
1283
+ else:
1284
+ end = pad_number(
1285
+ ranges[i + 1],
1286
+ max_integer_length,
1287
+ max_decimal_length,
1288
+ decimal=True
1289
+ )
1290
+ label = f"{start} - {end}"
1297
1291
  labels.append(label)
1298
1292
 
1299
1293
  else:
1300
- range_list = [int(r) for r in range_list]
1294
+ range_list = [int(r) for r in ranges] + [float('inf')]
1295
+ max_integer_length = max(len(str(int(r))) for r in ranges)
1301
1296
 
1302
- max_integer_length = max(
1303
- len(str(r))
1304
- for r in range_list
1305
- )
1306
-
1307
- labels = [f"{pad_number(range_list[i], max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
1297
+ labels = []
1298
+ for i in range(len(ranges)):
1299
+ start = pad_number(ranges[i], max_integer_length)
1300
+ if i == len(ranges) - 1:
1301
+ label = f"{start}+"
1302
+ else:
1303
+ end = pad_number(ranges[i + 1], max_integer_length)
1304
+ label = f"{start} - {end}"
1305
+ labels.append(label)
1308
1306
 
1309
1307
  # Ensure the target column is numeric
1310
1308
  df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
@@ -1313,22 +1311,19 @@ def append_ranged_classification_column(
1313
1311
  bins=range_list,
1314
1312
  labels=labels,
1315
1313
  right=False,
1316
- include_lowest=True)
1314
+ include_lowest=True
1315
+ )
1317
1316
 
1318
1317
  return df
1319
1318
 
1320
1319
 
1321
- def append_percentile_classification_column(
1322
- df: pd.DataFrame,
1323
- percentiles: str,
1324
- target_col: str,
1325
- new_col_name: str) -> pd.DataFrame:
1320
+ def append_percentile_classification_column(df: pd.DataFrame, percentiles: List[Union[int, float]], target_col: str, new_col_name: str) -> pd.DataFrame:
1326
1321
  """
1327
1322
  Append a percentile classification column to the DataFrame.
1328
1323
 
1329
1324
  Parameters:
1330
1325
  df: The DataFrame to modify.
1331
- percentiles: A string representation of percentile values separated by commas.
1326
+ percentiles: A list of percentile values (0-100, integers or floats).
1332
1327
  target_col: The column to analyze.
1333
1328
  new_col_name: The name of the new classification column.
1334
1329
 
@@ -1345,40 +1340,42 @@ def append_percentile_classification_column(
1345
1340
  else:
1346
1341
  return str(int(number)).zfill(integer_length)
1347
1342
 
1348
- percentiles_list = percentiles.split(',')
1349
- has_decimals = any('.' in p for p in percentiles_list)
1343
+ # Check if any numbers in percentiles are decimals
1344
+ has_decimals = any(isinstance(p, float) and p % 1 != 0 for p in percentiles)
1350
1345
 
1351
1346
  if has_decimals:
1352
- percentiles_list = [float(p) for p in percentiles_list]
1353
- max_decimal_length = max(len(str(p).split('.')[1]) for p in percentiles_list if '.' in str(p))
1354
- max_integer_length = max(len(str(int(float(p)))) for p in percentiles_list)
1347
+ percentiles_list = [float(p) for p in percentiles]
1348
+ max_decimal_length = max(
1349
+ len(str(p).split('.')[1]) if isinstance(p, float) and p % 1 != 0 else 0
1350
+ for p in percentiles
1351
+ )
1352
+ max_integer_length = max(len(str(int(float(p)))) for p in percentiles)
1355
1353
 
1356
1354
  labels = []
1357
-
1358
1355
  for i in range(len(percentiles_list) - 1):
1359
1356
  start = pad_number(
1360
1357
  percentiles_list[i],
1361
1358
  max_integer_length,
1362
1359
  max_decimal_length,
1363
- decimal=True)
1360
+ decimal=True
1361
+ )
1364
1362
  end = pad_number(
1365
- percentiles_list[i + 1], max_integer_length, max_decimal_length, decimal=True)
1366
-
1367
- label = f"{start} to {end}"
1363
+ percentiles_list[i + 1],
1364
+ max_integer_length,
1365
+ max_decimal_length,
1366
+ decimal=True
1367
+ )
1368
+ label = f"{start} - {end}"
1368
1369
  labels.append(label)
1369
1370
  else:
1370
- percentiles_list = [int(p) for p in percentiles_list]
1371
-
1371
+ percentiles_list = [int(p) for p in percentiles]
1372
1372
  max_integer_length = max(len(str(p)) for p in percentiles_list)
1373
1373
 
1374
1374
  labels = []
1375
-
1376
1375
  for i in range(len(percentiles_list) - 1):
1377
1376
  start = pad_number(percentiles_list[i], max_integer_length)
1378
-
1379
1377
  end = pad_number(percentiles_list[i + 1], max_integer_length)
1380
-
1381
- label = f"{start} to {end}"
1378
+ label = f"{start} - {end}"
1382
1379
  labels.append(label)
1383
1380
 
1384
1381
  # Ensure the target column is numeric
@@ -1389,22 +1386,19 @@ def append_percentile_classification_column(
1389
1386
  df[target_col],
1390
1387
  bins=quantiles,
1391
1388
  labels=labels,
1392
- include_lowest=True)
1389
+ include_lowest=True
1390
+ )
1393
1391
 
1394
1392
  return df
1395
1393
 
1396
1394
 
1397
- def append_ranged_date_classification_column(
1398
- df: pd.DataFrame,
1399
- date_ranges: str,
1400
- target_col: str,
1401
- new_col_name: str) -> pd.DataFrame:
1395
+ def append_ranged_date_classification_column(df: pd.DataFrame, date_ranges: list[str], target_col: str, new_col_name: str) -> pd.DataFrame:
1402
1396
  """
1403
1397
  Append a ranged date classification column to the DataFrame.
1404
1398
 
1405
1399
  Parameters:
1406
1400
  df: The DataFrame to modify.
1407
- date_ranges: A string representation of date ranges separated by commas.
1401
+ date_ranges: A list of date strings in a format pandas can parse (e.g., ['2020-01-01', '2020-06-30', '2020-12-31']).
1408
1402
  target_col: The date column to analyze.
1409
1403
  new_col_name: The name of the new date classification column.
1410
1404
 
@@ -1412,28 +1406,26 @@ def append_ranged_date_classification_column(
1412
1406
  A new DataFrame with the date classification column appended.
1413
1407
  """
1414
1408
 
1415
- date_list = [pd.to_datetime(date) for date in date_ranges.split(',')]
1409
+ date_list = [pd.to_datetime(date) for date in date_ranges]
1416
1410
 
1417
1411
  labels = []
1418
-
1419
1412
  for i in range(len(date_list) - 1):
1420
1413
  start_date = date_list[i].strftime('%Y-%m-%d')
1421
1414
  end_date = date_list[i + 1].strftime('%Y-%m-%d')
1422
- label = f"{start_date} to {end_date}"
1415
+ label = f"{start_date} - {end_date}"
1423
1416
  labels.append(label)
1424
1417
 
1425
1418
  df[new_col_name] = pd.cut(
1426
1419
  pd.to_datetime(df[target_col]),
1427
1420
  bins=date_list,
1428
1421
  labels=labels,
1429
- right=False)
1422
+ right=False
1423
+ )
1430
1424
 
1431
1425
  return df
1432
1426
 
1433
1427
 
1434
- def rename_columns(df: pd.DataFrame,
1435
- rename_pairs: Dict[str,
1436
- str]) -> pd.DataFrame:
1428
+ def rename_columns(df: pd.DataFrame, rename_pairs: Dict[str, str]) -> pd.DataFrame:
1437
1429
  """
1438
1430
  Rename columns in the DataFrame.
1439
1431
 
rgwfuncs/str_lib.py CHANGED
@@ -45,8 +45,6 @@ def send_telegram_message(preset_name: str, message: str, config: Optional[Union
45
45
  else:
46
46
  raise ValueError("Config must be either a path string or a dictionary")
47
47
 
48
-
49
-
50
48
  def get_telegram_preset(config: dict, preset_name: str) -> dict:
51
49
  """Get the Telegram preset configuration."""
52
50
  presets = config.get("telegram_bot_presets", [])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rgwfuncs
3
- Version: 0.0.91
3
+ Version: 0.0.93
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1249,7 +1249,7 @@ Append a ranged classification column to the DataFrame.
1249
1249
 
1250
1250
  • Parameters:
1251
1251
  - df (pd.DataFrame)
1252
- - ranges (str): Ranges separated by commas (e.g., "0-10,11-20,21-30").
1252
+ - ranges (list[int | float]): List of numeric range boundaries (e.g., [0, 10, 20, 30]), last bin extends to infinity.
1253
1253
  - `target_col` (str): The column to classify.
1254
1254
  - `new_col_name` (str): Name of the new classification column.
1255
1255
 
@@ -1257,14 +1257,14 @@ Append a ranged classification column to the DataFrame.
1257
1257
  - pd.DataFrame
1258
1258
 
1259
1259
  • Example:
1260
-
1260
+
1261
1261
  from rgwfuncs import append_ranged_classification_column
1262
1262
  import pandas as pd
1263
1263
 
1264
1264
  df = pd.DataFrame({'Scores': [5, 12, 25]})
1265
- df_classified = append_ranged_classification_column(df, '0-10,11-20,21-30', 'Scores', 'ScoreRange')
1265
+ df_classified = append_ranged_classification_column(df, [0, 10, 20, 30], 'Scores', 'ScoreRange')
1266
1266
  print(df_classified)
1267
-
1267
+
1268
1268
 
1269
1269
  --------------------------------------------------------------------------------
1270
1270
 
@@ -1273,7 +1273,7 @@ Append a percentile classification column to the DataFrame.
1273
1273
 
1274
1274
  • Parameters:
1275
1275
  - df (pd.DataFrame)
1276
- - percentiles (str): Percentile values separated by commas (e.g., "25,50,75").
1276
+ - percentiles (list[int | float]): List of percentile values (0-100, e.g., [25, 50, 75]).
1277
1277
  - `target_col` (str)
1278
1278
  - `new_col_name` (str)
1279
1279
 
@@ -1281,14 +1281,14 @@ Append a percentile classification column to the DataFrame.
1281
1281
  - pd.DataFrame
1282
1282
 
1283
1283
  • Example:
1284
-
1284
+
1285
1285
  from rgwfuncs import append_percentile_classification_column
1286
1286
  import pandas as pd
1287
1287
 
1288
1288
  df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
1289
- df_classified = append_percentile_classification_column(df, '25,50,75', 'Values', 'ValuePercentile')
1289
+ df_classified = append_percentile_classification_column(df, [25, 50, 75], 'Values', 'ValuePercentile')
1290
1290
  print(df_classified)
1291
-
1291
+
1292
1292
 
1293
1293
  --------------------------------------------------------------------------------
1294
1294
 
@@ -1297,7 +1297,7 @@ Append a ranged date classification column to the DataFrame.
1297
1297
 
1298
1298
  • Parameters:
1299
1299
  - df (pd.DataFrame)
1300
- - `date_ranges` (str): Date ranges separated by commas, e.g., `2020-01-01_2020-06-30,2020-07-01_2020-12-31`
1300
+ - `date_ranges` (list[str]): List of date strings in a format pandas can parse (e.g., ['2020-01-01', '2020-06-30', '2020-12-31']).
1301
1301
  - `target_col` (str)
1302
1302
  - `new_col_name` (str)
1303
1303
 
@@ -1305,19 +1305,18 @@ Append a ranged date classification column to the DataFrame.
1305
1305
  - pd.DataFrame
1306
1306
 
1307
1307
  • Example:
1308
-
1308
+
1309
1309
  from rgwfuncs import append_ranged_date_classification_column
1310
1310
  import pandas as pd
1311
1311
 
1312
- df = pd.DataFrame({'EventDate': pd.to_datetime(['2020-03-15','2020-08-10'])})
1312
+ df = pd.DataFrame({'EventDate': pd.to_datetime(['2020-03-15', '2020-08-10'])})
1313
1313
  df_classified = append_ranged_date_classification_column(
1314
1314
  df,
1315
- '2020-01-01_2020-06-30,2020-07-01_2020-12-31',
1315
+ ['2020-01-01', '2020-06-30', '2020-12-31'],
1316
1316
  'EventDate',
1317
1317
  'DateRange'
1318
1318
  )
1319
1319
  print(df_classified)
1320
-
1321
1320
 
1322
1321
  --------------------------------------------------------------------------------
1323
1322
 
@@ -0,0 +1,12 @@
1
+ rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
2
+ rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
3
+ rgwfuncs/df_lib.py,sha256=LHG6E-umLGVdDRWjziFrRb_YSlTronHv2QwEFBrTAt4,75528
4
+ rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
5
+ rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
6
+ rgwfuncs/str_lib.py,sha256=hE0VfP6rhQpczsKyCZvH3G1aMRwngKnkW3NTYCEc0Po,3208
7
+ rgwfuncs-0.0.93.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
+ rgwfuncs-0.0.93.dist-info/METADATA,sha256=eEiNMD4k_feeK6mxhrVqlvDTRONwsPosvR-pj9hYlAI,61443
9
+ rgwfuncs-0.0.93.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
+ rgwfuncs-0.0.93.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
+ rgwfuncs-0.0.93.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
+ rgwfuncs-0.0.93.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.0.2)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
2
- rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
3
- rgwfuncs/df_lib.py,sha256=coAmZ2RyWvovorNekQnLW5cUIZyY-h6s88YUm0ytFAw,75107
4
- rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
5
- rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
6
- rgwfuncs/str_lib.py,sha256=-dcJt-jE0YG-XwHcNslCM_Gp-L0Ho0zYFPxjrxepzzA,3210
7
- rgwfuncs-0.0.91.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
- rgwfuncs-0.0.91.dist-info/METADATA,sha256=BVQ4dPy_oI5jTtqwEBfj96OGqwIlOy8Ib57Je7Q6Tuk,61390
9
- rgwfuncs-0.0.91.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
10
- rgwfuncs-0.0.91.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
- rgwfuncs-0.0.91.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
- rgwfuncs-0.0.91.dist-info/RECORD,,