rgwfuncs 0.0.90__py3-none-any.whl → 0.0.92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +167 -98
- rgwfuncs/interactive_shell_lib.py +19 -19
- rgwfuncs/str_lib.py +27 -12
- {rgwfuncs-0.0.90.dist-info → rgwfuncs-0.0.92.dist-info}/METADATA +35 -15
- rgwfuncs-0.0.92.dist-info/RECORD +12 -0
- {rgwfuncs-0.0.90.dist-info → rgwfuncs-0.0.92.dist-info}/WHEEL +1 -1
- rgwfuncs-0.0.90.dist-info/RECORD +0 -12
- {rgwfuncs-0.0.90.dist-info → rgwfuncs-0.0.92.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.90.dist-info → rgwfuncs-0.0.92.dist-info/licenses}/LICENSE +0 -0
- {rgwfuncs-0.0.90.dist-info → rgwfuncs-0.0.92.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -22,8 +22,7 @@ from email import encoders
|
|
22
22
|
from googleapiclient.discovery import build
|
23
23
|
import base64
|
24
24
|
import boto3
|
25
|
-
|
26
|
-
from typing import Optional, Dict, List, Tuple, Any, Callable
|
25
|
+
from typing import Optional, Dict, List, Tuple, Any, Callable, Union
|
27
26
|
import warnings
|
28
27
|
|
29
28
|
# Suppress all FutureWarnings
|
@@ -311,13 +310,17 @@ def drop_duplicates_retain_last(
|
|
311
310
|
return df.drop_duplicates(subset=columns_list, keep='last')
|
312
311
|
|
313
312
|
|
314
|
-
def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
|
313
|
+
def load_data_from_query(db_preset_name: str, query: str, config: Optional[Union[str, dict]] = None) -> pd.DataFrame:
|
315
314
|
"""
|
316
315
|
Load data from a database query into a DataFrame based on a configuration preset.
|
317
316
|
|
318
317
|
Parameters:
|
319
318
|
db_preset_name: The name of the database preset in the configuration file.
|
320
319
|
query: The SQL query to execute.
|
320
|
+
config (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
321
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
322
|
+
- str: Path to a JSON configuration file
|
323
|
+
- dict: Direct configuration dictionary
|
321
324
|
|
322
325
|
Returns:
|
323
326
|
A DataFrame containing the query result.
|
@@ -327,6 +330,27 @@ def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
|
|
327
330
|
ValueError: If the database preset or db_type is invalid.
|
328
331
|
"""
|
329
332
|
|
333
|
+
def get_config(config: Optional[Union[str, dict]] = None) -> dict:
|
334
|
+
"""Get telegram configuration either from a path or direct dictionary."""
|
335
|
+
def get_config_from_file(config_path: str) -> dict:
|
336
|
+
"""Load configuration from a JSON file."""
|
337
|
+
with open(config_path, 'r') as file:
|
338
|
+
return json.load(file)
|
339
|
+
|
340
|
+
# Determine the config to use
|
341
|
+
if config is None:
|
342
|
+
# Default to ~/.rgwfuncsrc if no config provided
|
343
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
344
|
+
return get_config_from_file(config_path)
|
345
|
+
elif isinstance(config, str):
|
346
|
+
# If config is a string, treat it as a path and load it
|
347
|
+
return get_config_from_file(config)
|
348
|
+
elif isinstance(config, dict):
|
349
|
+
# If config is already a dict, use it directly
|
350
|
+
return config
|
351
|
+
else:
|
352
|
+
raise ValueError("Config must be either a path string or a dictionary")
|
353
|
+
|
330
354
|
def query_mssql(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
|
331
355
|
server = db_preset['host']
|
332
356
|
user = db_preset['username']
|
@@ -446,11 +470,7 @@ def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
|
|
446
470
|
wait_for_athena_query_to_complete(athena_client, query_execution_id)
|
447
471
|
return download_athena_query_results(athena_client, query_execution_id)
|
448
472
|
|
449
|
-
|
450
|
-
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
451
|
-
with open(config_path, 'r') as f:
|
452
|
-
config = json.load(f)
|
453
|
-
|
473
|
+
config = get_config(config)
|
454
474
|
db_presets = config.get('db_presets', [])
|
455
475
|
db_preset = next(
|
456
476
|
(preset for preset in db_presets if preset['name'] == db_preset_name),
|
@@ -846,7 +866,8 @@ def send_dataframe_via_telegram(
|
|
846
866
|
bot_name: str,
|
847
867
|
message: Optional[str] = None,
|
848
868
|
as_file: bool = True,
|
849
|
-
remove_after_send: bool = True
|
869
|
+
remove_after_send: bool = True,
|
870
|
+
config: Optional[Union[str, dict]] = None) -> None:
|
850
871
|
"""
|
851
872
|
Send a DataFrame via Telegram using a specified bot configuration.
|
852
873
|
|
@@ -856,6 +877,10 @@ def send_dataframe_via_telegram(
|
|
856
877
|
message: Custom message to send along with the DataFrame or file. Defaults to None.
|
857
878
|
as_file: Boolean flag to indicate whether the DataFrame should be sent as a file (True) or as text (False). Defaults to True.
|
858
879
|
remove_after_send: If True, removes the CSV file after sending. Defaults to True.
|
880
|
+
config (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
881
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
882
|
+
- str: Path to a JSON configuration file
|
883
|
+
- dict: Direct configuration dictionary
|
859
884
|
|
860
885
|
Raises:
|
861
886
|
ValueError: If the specified bot is not found or if no DataFrame is provided.
|
@@ -865,14 +890,28 @@ def send_dataframe_via_telegram(
|
|
865
890
|
The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
866
891
|
"""
|
867
892
|
|
868
|
-
def get_config(
|
869
|
-
"""
|
870
|
-
|
871
|
-
|
893
|
+
def get_config(config: Optional[Union[str, dict]] = None) -> dict:
|
894
|
+
"""Get telegram configuration either from a path or direct dictionary."""
|
895
|
+
def get_config_from_file(config_path: str) -> dict:
|
896
|
+
"""Load configuration from a JSON file."""
|
897
|
+
with open(config_path, 'r') as file:
|
898
|
+
return json.load(file)
|
872
899
|
|
873
|
-
|
874
|
-
|
875
|
-
|
900
|
+
# Determine the config to use
|
901
|
+
if config is None:
|
902
|
+
# Default to ~/.rgwfuncsrc if no config provided
|
903
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
904
|
+
return get_config_from_file(config_path)
|
905
|
+
elif isinstance(config, str):
|
906
|
+
# If config is a string, treat it as a path and load it
|
907
|
+
return get_config_from_file(config)
|
908
|
+
elif isinstance(config, dict):
|
909
|
+
# If config is already a dict, use it directly
|
910
|
+
return config
|
911
|
+
else:
|
912
|
+
raise ValueError("Config must be either a path string or a dictionary")
|
913
|
+
|
914
|
+
config = get_config(config)
|
876
915
|
|
877
916
|
bot_config = next(
|
878
917
|
(bot for bot in config['telegram_bot_presets'] if bot['name'] == bot_name),
|
@@ -926,7 +965,8 @@ def send_data_to_email(
|
|
926
965
|
subject: Optional[str] = None,
|
927
966
|
body: Optional[str] = None,
|
928
967
|
as_file: bool = True,
|
929
|
-
remove_after_send: bool = True
|
968
|
+
remove_after_send: bool = True,
|
969
|
+
config: Optional[Union[str, dict]] = None) -> None:
|
930
970
|
"""
|
931
971
|
Send an email with an optional DataFrame attachment using the Gmail API via a specified preset.
|
932
972
|
|
@@ -938,6 +978,10 @@ def send_data_to_email(
|
|
938
978
|
body: Optional message body of the email. Defaults to 'Please find the CSV file attached.' if not given.
|
939
979
|
as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or embed it in the email (False). Defaults to True.
|
940
980
|
remove_after_send: If True, removes the CSV file after sending. Defaults to True.
|
981
|
+
config (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
982
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
983
|
+
- str: Path to a JSON configuration file
|
984
|
+
- dict: Direct configuration dictionary
|
941
985
|
|
942
986
|
Raises:
|
943
987
|
ValueError: If the preset is not found in the configuration.
|
@@ -947,12 +991,26 @@ def send_data_to_email(
|
|
947
991
|
The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
948
992
|
"""
|
949
993
|
|
950
|
-
def get_config(
|
951
|
-
|
952
|
-
|
994
|
+
def get_config(config: Optional[Union[str, dict]] = None) -> dict:
|
995
|
+
"""Get telegram configuration either from a path or direct dictionary."""
|
996
|
+
def get_config_from_file(config_path: str) -> dict:
|
997
|
+
"""Load configuration from a JSON file."""
|
998
|
+
with open(config_path, 'r') as file:
|
953
999
|
return json.load(file)
|
954
|
-
|
955
|
-
|
1000
|
+
|
1001
|
+
# Determine the config to use
|
1002
|
+
if config is None:
|
1003
|
+
# Default to ~/.rgwfuncsrc if no config provided
|
1004
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
1005
|
+
return get_config_from_file(config_path)
|
1006
|
+
elif isinstance(config, str):
|
1007
|
+
# If config is a string, treat it as a path and load it
|
1008
|
+
return get_config_from_file(config)
|
1009
|
+
elif isinstance(config, dict):
|
1010
|
+
# If config is already a dict, use it directly
|
1011
|
+
return config
|
1012
|
+
else:
|
1013
|
+
raise ValueError("Config must be either a path string or a dictionary")
|
956
1014
|
|
957
1015
|
def authenticate_service_account(
|
958
1016
|
service_account_credentials_path: str,
|
@@ -964,9 +1022,7 @@ def send_data_to_email(
|
|
964
1022
|
)
|
965
1023
|
return build('gmail', 'v1', credentials=credentials)
|
966
1024
|
|
967
|
-
|
968
|
-
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
969
|
-
config = get_config(config_path)
|
1025
|
+
config = get_config(config)
|
970
1026
|
|
971
1027
|
# Retrieve Gmail preset configuration
|
972
1028
|
gmail_config = next(
|
@@ -1038,7 +1094,8 @@ def send_data_to_slack(
|
|
1038
1094
|
bot_name: str,
|
1039
1095
|
message: Optional[str] = None,
|
1040
1096
|
as_file: bool = True,
|
1041
|
-
remove_after_send: bool = True
|
1097
|
+
remove_after_send: bool = True,
|
1098
|
+
config: Optional[Union[str, dict]] = None) -> None:
|
1042
1099
|
"""
|
1043
1100
|
Send a DataFrame or message to Slack using a specified bot configuration.
|
1044
1101
|
|
@@ -1048,6 +1105,10 @@ def send_data_to_slack(
|
|
1048
1105
|
message: Custom message to send along with the DataFrame or file. Defaults to None.
|
1049
1106
|
as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or as text (False). Defaults to True.
|
1050
1107
|
remove_after_send: If True, removes the CSV file after sending. Defaults to True.
|
1108
|
+
config (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
1109
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
1110
|
+
- str: Path to a JSON configuration file
|
1111
|
+
- dict: Direct configuration dictionary
|
1051
1112
|
|
1052
1113
|
Raises:
|
1053
1114
|
ValueError: If the specified bot is not found in the configuration.
|
@@ -1057,14 +1118,29 @@ def send_data_to_slack(
|
|
1057
1118
|
The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
1058
1119
|
"""
|
1059
1120
|
|
1060
|
-
def get_config(
|
1061
|
-
"""
|
1062
|
-
|
1063
|
-
|
1121
|
+
def get_config(config: Optional[Union[str, dict]] = None) -> dict:
|
1122
|
+
"""Get telegram configuration either from a path or direct dictionary."""
|
1123
|
+
def get_config_from_file(config_path: str) -> dict:
|
1124
|
+
"""Load configuration from a JSON file."""
|
1125
|
+
with open(config_path, 'r') as file:
|
1126
|
+
return json.load(file)
|
1127
|
+
|
1128
|
+
# Determine the config to use
|
1129
|
+
if config is None:
|
1130
|
+
# Default to ~/.rgwfuncsrc if no config provided
|
1131
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
1132
|
+
return get_config_from_file(config_path)
|
1133
|
+
elif isinstance(config, str):
|
1134
|
+
# If config is a string, treat it as a path and load it
|
1135
|
+
return get_config_from_file(config)
|
1136
|
+
elif isinstance(config, dict):
|
1137
|
+
# If config is already a dict, use it directly
|
1138
|
+
return config
|
1139
|
+
else:
|
1140
|
+
raise ValueError("Config must be either a path string or a dictionary")
|
1064
1141
|
|
1065
1142
|
# Load the Slack configuration from ~/.rgwfuncsrc
|
1066
|
-
|
1067
|
-
config = get_config(config_path)
|
1143
|
+
config = get_config(config)
|
1068
1144
|
|
1069
1145
|
bot_config = next(
|
1070
1146
|
(bot for bot in config['slack_bot_presets'] if bot['name'] == bot_name),
|
@@ -1155,17 +1231,13 @@ def order_columns(df: pd.DataFrame, column_order_str: str) -> pd.DataFrame:
|
|
1155
1231
|
return df[new_order]
|
1156
1232
|
|
1157
1233
|
|
1158
|
-
def append_ranged_classification_column(
|
1159
|
-
df: pd.DataFrame,
|
1160
|
-
ranges: str,
|
1161
|
-
target_col: str,
|
1162
|
-
new_col_name: str) -> pd.DataFrame:
|
1234
|
+
def append_ranged_classification_column(df: pd.DataFrame, ranges: List[Union[int, float]], target_col: str, new_col_name: str) -> pd.DataFrame:
|
1163
1235
|
"""
|
1164
1236
|
Append a ranged classification column to the DataFrame.
|
1165
1237
|
|
1166
1238
|
Parameters:
|
1167
1239
|
df: The DataFrame to modify.
|
1168
|
-
ranges: A
|
1240
|
+
ranges: A list of numeric range boundaries (integers or floats, last bin extends to infinity).
|
1169
1241
|
target_col: The column to analyze.
|
1170
1242
|
new_col_name: The name of the new classification column.
|
1171
1243
|
|
@@ -1182,52 +1254,55 @@ def append_ranged_classification_column(
|
|
1182
1254
|
else:
|
1183
1255
|
return str(int(number)).zfill(integer_length)
|
1184
1256
|
|
1185
|
-
|
1186
|
-
has_decimals = any(
|
1257
|
+
# Check if any numbers in ranges are decimals
|
1258
|
+
has_decimals = any(isinstance(r, float) and r % 1 != 0 for r in ranges)
|
1187
1259
|
|
1188
1260
|
if has_decimals:
|
1189
|
-
range_list = [float(r) for r in
|
1261
|
+
range_list = [float(r) for r in ranges] + [float('inf')]
|
1190
1262
|
|
1191
1263
|
max_decimal_length = max(
|
1192
|
-
len(str(r).split('.')[1])
|
1193
|
-
for r in
|
1194
|
-
if '.' in str(r)
|
1264
|
+
len(str(r).split('.')[1]) if isinstance(r, float) and r % 1 != 0 else 0
|
1265
|
+
for r in ranges
|
1195
1266
|
)
|
1196
1267
|
|
1197
1268
|
max_integer_length = max(
|
1198
1269
|
len(str(int(float(r))))
|
1199
|
-
for r in
|
1270
|
+
for r in ranges
|
1200
1271
|
)
|
1201
1272
|
|
1202
1273
|
labels = []
|
1203
|
-
|
1204
|
-
for i in range(len(range_list) - 1):
|
1274
|
+
for i in range(len(ranges)):
|
1205
1275
|
start = pad_number(
|
1206
|
-
|
1276
|
+
ranges[i],
|
1207
1277
|
max_integer_length,
|
1208
1278
|
max_decimal_length,
|
1209
1279
|
decimal=True
|
1210
1280
|
)
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1281
|
+
if i == len(ranges) - 1:
|
1282
|
+
label = f"{start} to infinity"
|
1283
|
+
else:
|
1284
|
+
end = pad_number(
|
1285
|
+
ranges[i + 1],
|
1286
|
+
max_integer_length,
|
1287
|
+
max_decimal_length,
|
1288
|
+
decimal=True
|
1289
|
+
)
|
1290
|
+
label = f"{start} to {end}"
|
1220
1291
|
labels.append(label)
|
1221
1292
|
|
1222
1293
|
else:
|
1223
|
-
range_list = [int(r) for r in
|
1224
|
-
|
1225
|
-
max_integer_length = max(
|
1226
|
-
len(str(r))
|
1227
|
-
for r in range_list
|
1228
|
-
)
|
1294
|
+
range_list = [int(r) for r in ranges] + [float('inf')]
|
1295
|
+
max_integer_length = max(len(str(int(r))) for r in ranges)
|
1229
1296
|
|
1230
|
-
labels = [
|
1297
|
+
labels = []
|
1298
|
+
for i in range(len(ranges)):
|
1299
|
+
start = pad_number(ranges[i], max_integer_length)
|
1300
|
+
if i == len(ranges) - 1:
|
1301
|
+
label = f"{start} to infinity"
|
1302
|
+
else:
|
1303
|
+
end = pad_number(ranges[i + 1], max_integer_length)
|
1304
|
+
label = f"{start} to {end}"
|
1305
|
+
labels.append(label)
|
1231
1306
|
|
1232
1307
|
# Ensure the target column is numeric
|
1233
1308
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
@@ -1236,22 +1311,19 @@ def append_ranged_classification_column(
|
|
1236
1311
|
bins=range_list,
|
1237
1312
|
labels=labels,
|
1238
1313
|
right=False,
|
1239
|
-
include_lowest=True
|
1314
|
+
include_lowest=True
|
1315
|
+
)
|
1240
1316
|
|
1241
1317
|
return df
|
1242
1318
|
|
1243
1319
|
|
1244
|
-
def append_percentile_classification_column(
|
1245
|
-
df: pd.DataFrame,
|
1246
|
-
percentiles: str,
|
1247
|
-
target_col: str,
|
1248
|
-
new_col_name: str) -> pd.DataFrame:
|
1320
|
+
def append_percentile_classification_column(df: pd.DataFrame, percentiles: List[Union[int, float]], target_col: str, new_col_name: str) -> pd.DataFrame:
|
1249
1321
|
"""
|
1250
1322
|
Append a percentile classification column to the DataFrame.
|
1251
1323
|
|
1252
1324
|
Parameters:
|
1253
1325
|
df: The DataFrame to modify.
|
1254
|
-
percentiles: A
|
1326
|
+
percentiles: A list of percentile values (0-100, integers or floats).
|
1255
1327
|
target_col: The column to analyze.
|
1256
1328
|
new_col_name: The name of the new classification column.
|
1257
1329
|
|
@@ -1268,39 +1340,41 @@ def append_percentile_classification_column(
|
|
1268
1340
|
else:
|
1269
1341
|
return str(int(number)).zfill(integer_length)
|
1270
1342
|
|
1271
|
-
|
1272
|
-
has_decimals = any(
|
1343
|
+
# Check if any numbers in percentiles are decimals
|
1344
|
+
has_decimals = any(isinstance(p, float) and p % 1 != 0 for p in percentiles)
|
1273
1345
|
|
1274
1346
|
if has_decimals:
|
1275
|
-
percentiles_list = [float(p) for p in
|
1276
|
-
max_decimal_length = max(
|
1277
|
-
|
1347
|
+
percentiles_list = [float(p) for p in percentiles]
|
1348
|
+
max_decimal_length = max(
|
1349
|
+
len(str(p).split('.')[1]) if isinstance(p, float) and p % 1 != 0 else 0
|
1350
|
+
for p in percentiles
|
1351
|
+
)
|
1352
|
+
max_integer_length = max(len(str(int(float(p)))) for p in percentiles)
|
1278
1353
|
|
1279
1354
|
labels = []
|
1280
|
-
|
1281
1355
|
for i in range(len(percentiles_list) - 1):
|
1282
1356
|
start = pad_number(
|
1283
1357
|
percentiles_list[i],
|
1284
1358
|
max_integer_length,
|
1285
1359
|
max_decimal_length,
|
1286
|
-
decimal=True
|
1360
|
+
decimal=True
|
1361
|
+
)
|
1287
1362
|
end = pad_number(
|
1288
|
-
percentiles_list[i + 1],
|
1289
|
-
|
1363
|
+
percentiles_list[i + 1],
|
1364
|
+
max_integer_length,
|
1365
|
+
max_decimal_length,
|
1366
|
+
decimal=True
|
1367
|
+
)
|
1290
1368
|
label = f"{start} to {end}"
|
1291
1369
|
labels.append(label)
|
1292
1370
|
else:
|
1293
|
-
percentiles_list = [int(p) for p in
|
1294
|
-
|
1371
|
+
percentiles_list = [int(p) for p in percentiles]
|
1295
1372
|
max_integer_length = max(len(str(p)) for p in percentiles_list)
|
1296
1373
|
|
1297
1374
|
labels = []
|
1298
|
-
|
1299
1375
|
for i in range(len(percentiles_list) - 1):
|
1300
1376
|
start = pad_number(percentiles_list[i], max_integer_length)
|
1301
|
-
|
1302
1377
|
end = pad_number(percentiles_list[i + 1], max_integer_length)
|
1303
|
-
|
1304
1378
|
label = f"{start} to {end}"
|
1305
1379
|
labels.append(label)
|
1306
1380
|
|
@@ -1312,22 +1386,19 @@ def append_percentile_classification_column(
|
|
1312
1386
|
df[target_col],
|
1313
1387
|
bins=quantiles,
|
1314
1388
|
labels=labels,
|
1315
|
-
include_lowest=True
|
1389
|
+
include_lowest=True
|
1390
|
+
)
|
1316
1391
|
|
1317
1392
|
return df
|
1318
1393
|
|
1319
1394
|
|
1320
|
-
def append_ranged_date_classification_column(
|
1321
|
-
df: pd.DataFrame,
|
1322
|
-
date_ranges: str,
|
1323
|
-
target_col: str,
|
1324
|
-
new_col_name: str) -> pd.DataFrame:
|
1395
|
+
def append_ranged_date_classification_column(df: pd.DataFrame, date_ranges: list[str], target_col: str, new_col_name: str) -> pd.DataFrame:
|
1325
1396
|
"""
|
1326
1397
|
Append a ranged date classification column to the DataFrame.
|
1327
1398
|
|
1328
1399
|
Parameters:
|
1329
1400
|
df: The DataFrame to modify.
|
1330
|
-
date_ranges: A
|
1401
|
+
date_ranges: A list of date strings in a format pandas can parse (e.g., ['2020-01-01', '2020-06-30', '2020-12-31']).
|
1331
1402
|
target_col: The date column to analyze.
|
1332
1403
|
new_col_name: The name of the new date classification column.
|
1333
1404
|
|
@@ -1335,10 +1406,9 @@ def append_ranged_date_classification_column(
|
|
1335
1406
|
A new DataFrame with the date classification column appended.
|
1336
1407
|
"""
|
1337
1408
|
|
1338
|
-
date_list = [pd.to_datetime(date) for date in date_ranges
|
1409
|
+
date_list = [pd.to_datetime(date) for date in date_ranges]
|
1339
1410
|
|
1340
1411
|
labels = []
|
1341
|
-
|
1342
1412
|
for i in range(len(date_list) - 1):
|
1343
1413
|
start_date = date_list[i].strftime('%Y-%m-%d')
|
1344
1414
|
end_date = date_list[i + 1].strftime('%Y-%m-%d')
|
@@ -1349,14 +1419,13 @@ def append_ranged_date_classification_column(
|
|
1349
1419
|
pd.to_datetime(df[target_col]),
|
1350
1420
|
bins=date_list,
|
1351
1421
|
labels=labels,
|
1352
|
-
right=False
|
1422
|
+
right=False
|
1423
|
+
)
|
1353
1424
|
|
1354
1425
|
return df
|
1355
1426
|
|
1356
1427
|
|
1357
|
-
def rename_columns(df: pd.DataFrame,
|
1358
|
-
rename_pairs: Dict[str,
|
1359
|
-
str]) -> pd.DataFrame:
|
1428
|
+
def rename_columns(df: pd.DataFrame, rename_pairs: Dict[str, str]) -> pd.DataFrame:
|
1360
1429
|
"""
|
1361
1430
|
Rename columns in the DataFrame.
|
1362
1431
|
|
@@ -11,20 +11,21 @@ from .algebra_lib import * # noqa: F401, F403, E402
|
|
11
11
|
from .str_lib import * # noqa: F401, F403, E402
|
12
12
|
from .docs_lib import * # noqa: F401, F403, E402
|
13
13
|
|
14
|
+
|
14
15
|
def interactive_shell(local_vars: Dict[str, Any]) -> None:
|
15
16
|
"""
|
16
17
|
Launch an interactive prompt for inspecting and modifying local variables,
|
17
18
|
with blue-colored output and a white prompt. An extra blank line appears before
|
18
19
|
each new prompt, and the welcome banner appears in blue. No extra exit message is printed.
|
19
|
-
|
20
|
+
|
20
21
|
local_vars: dictionary of variables available in the interactive shell.
|
21
22
|
"""
|
22
|
-
|
23
|
+
|
23
24
|
# ANSI color escape codes.
|
24
25
|
BLUE = "\033[94m"
|
25
26
|
WHITE = "\033[37m"
|
26
27
|
RESET = "\033[0m"
|
27
|
-
|
28
|
+
|
28
29
|
# Set up readline history.
|
29
30
|
def setup_readline() -> None:
|
30
31
|
HISTORY_FILE = os.path.expanduser("~/.rgwfuncs_shell_history")
|
@@ -36,13 +37,13 @@ def interactive_shell(local_vars: Dict[str, Any]) -> None:
|
|
36
37
|
except Exception as e:
|
37
38
|
print(f"Warning: Could not load history file: {e}")
|
38
39
|
atexit.register(readline.write_history_file, HISTORY_FILE)
|
39
|
-
|
40
|
+
|
40
41
|
# BlueStdout: a wrapper for sys.stdout to ensure output is in blue.
|
41
42
|
class BlueStdout:
|
42
43
|
def __init__(self, wrapped):
|
43
44
|
self.wrapped = wrapped
|
44
45
|
self.at_line_start = True
|
45
|
-
|
46
|
+
|
46
47
|
def write(self, s):
|
47
48
|
# If the output exactly matches our prompt, leave it uncolored.
|
48
49
|
if s == sys.ps1 or s == sys.ps2:
|
@@ -60,19 +61,19 @@ def interactive_shell(local_vars: Dict[str, Any]) -> None:
|
|
60
61
|
self.at_line_start = True
|
61
62
|
else:
|
62
63
|
self.at_line_start = (line == "")
|
63
|
-
|
64
|
+
|
64
65
|
def flush(self):
|
65
66
|
self.wrapped.flush()
|
66
|
-
|
67
|
+
|
67
68
|
def isatty(self):
|
68
69
|
return self.wrapped.isatty()
|
69
|
-
|
70
|
+
|
70
71
|
def fileno(self):
|
71
72
|
return self.wrapped.fileno()
|
72
|
-
|
73
|
+
|
73
74
|
def __getattr__(self, attr):
|
74
75
|
return getattr(self.wrapped, attr)
|
75
|
-
|
76
|
+
|
76
77
|
# ColorInteractiveConsole: a subclass that temporarily restores the original stdout
|
77
78
|
# while reading input and prints an extra blank line before each prompt.
|
78
79
|
class ColorInteractiveConsole(code.InteractiveConsole):
|
@@ -86,37 +87,36 @@ def interactive_shell(local_vars: Dict[str, Any]) -> None:
|
|
86
87
|
finally:
|
87
88
|
sys.stdout = saved_stdout
|
88
89
|
return line
|
89
|
-
|
90
|
+
|
90
91
|
# Ensure local_vars is a dictionary.
|
91
92
|
if not isinstance(local_vars, dict):
|
92
93
|
raise TypeError("local_vars must be a dictionary")
|
93
|
-
|
94
|
+
|
94
95
|
# Initialize readline history.
|
95
96
|
setup_readline()
|
96
|
-
|
97
|
+
|
97
98
|
# Merge globals into local_vars.
|
98
99
|
local_vars.update(globals())
|
99
|
-
|
100
|
+
|
100
101
|
# Wrap ANSI escape codes with markers (\001 and \002) so readline ignores these in prompt length.
|
101
102
|
sys.ps1 = "\001" + WHITE + "\002" + ">>> " + "\001" + RESET + "\002"
|
102
103
|
sys.ps2 = "\001" + WHITE + "\002" + "... " + "\001" + RESET + "\002"
|
103
|
-
|
104
|
+
|
104
105
|
# Replace sys.stdout with BlueStdout to ensure all output is printed in blue.
|
105
106
|
sys.stdout = BlueStdout(sys.__stdout__)
|
106
|
-
|
107
|
+
|
107
108
|
# Create our custom interactive console.
|
108
109
|
console = ColorInteractiveConsole(locals=local_vars)
|
109
|
-
|
110
|
+
|
110
111
|
# The welcome banner is explicitly wrapped in BLUE and RESET.
|
111
112
|
banner = (BLUE +
|
112
113
|
"Welcome to the rgwfuncs interactive shell.\n"
|
113
114
|
"Use up/down arrows for command history.\n"
|
114
115
|
"Type 'exit()' or Ctrl+D to quit." +
|
115
116
|
RESET)
|
116
|
-
|
117
|
+
|
117
118
|
# Call interact with an empty exit message.
|
118
119
|
try:
|
119
120
|
console.interact(banner=banner, exitmsg="")
|
120
121
|
except SystemExit:
|
121
122
|
pass
|
122
|
-
|
rgwfuncs/str_lib.py
CHANGED
@@ -1,32 +1,49 @@
|
|
1
1
|
import os
|
2
2
|
import json
|
3
3
|
import requests
|
4
|
-
from typing import Tuple
|
4
|
+
from typing import Tuple, Optional, Union, Dict
|
5
5
|
import warnings
|
6
6
|
|
7
7
|
# Suppress all FutureWarnings
|
8
8
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
9
9
|
|
10
10
|
|
11
|
-
def send_telegram_message(preset_name: str, message: str) -> None:
|
11
|
+
def send_telegram_message(preset_name: str, message: str, config: Optional[Union[str, dict]] = None) -> None:
|
12
12
|
"""
|
13
13
|
Send a Telegram message using the specified preset.
|
14
14
|
|
15
15
|
Args:
|
16
16
|
preset_name (str): The name of the preset to use for sending the message.
|
17
17
|
message (str): The message to send.
|
18
|
+
config (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
19
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
20
|
+
- str: Path to a JSON configuration file
|
21
|
+
- dict: Direct configuration dictionary
|
18
22
|
|
19
23
|
Raises:
|
20
24
|
RuntimeError: If the preset is not found or necessary details are missing.
|
21
25
|
"""
|
22
26
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
27
|
+
def get_config(config: Optional[Union[str, dict]] = None) -> dict:
|
28
|
+
"""Get telegram configuration either from a path or direct dictionary."""
|
29
|
+
def get_config_from_file(config_path: str) -> dict:
|
30
|
+
"""Load configuration from a JSON file."""
|
31
|
+
with open(config_path, 'r') as file:
|
32
|
+
return json.load(file)
|
33
|
+
|
34
|
+
# Determine the config to use
|
35
|
+
if config is None:
|
36
|
+
# Default to ~/.rgwfuncsrc if no config provided
|
37
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
38
|
+
return get_config_from_file(config_path)
|
39
|
+
elif isinstance(config, str):
|
40
|
+
# If config is a string, treat it as a path and load it
|
41
|
+
return get_config_from_file(config)
|
42
|
+
elif isinstance(config, dict):
|
43
|
+
# If config is already a dict, use it directly
|
44
|
+
return config
|
45
|
+
else:
|
46
|
+
raise ValueError("Config must be either a path string or a dictionary")
|
30
47
|
|
31
48
|
def get_telegram_preset(config: dict, preset_name: str) -> dict:
|
32
49
|
"""Get the Telegram preset configuration."""
|
@@ -53,9 +70,7 @@ def send_telegram_message(preset_name: str, message: str) -> None:
|
|
53
70
|
|
54
71
|
return bot_token, chat_id
|
55
72
|
|
56
|
-
|
57
|
-
config = load_config()
|
58
|
-
|
73
|
+
config = get_config(config)
|
59
74
|
# Get bot details from the configuration
|
60
75
|
bot_token, chat_id = get_telegram_bot_details(config, preset_name)
|
61
76
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.92
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -24,6 +24,7 @@ Requires-Dist: requests
|
|
24
24
|
Requires-Dist: slack-sdk
|
25
25
|
Requires-Dist: google-api-python-client
|
26
26
|
Requires-Dist: boto3
|
27
|
+
Dynamic: license-file
|
27
28
|
|
28
29
|
# RGWFUNCS
|
29
30
|
|
@@ -602,6 +603,10 @@ Send a message to a Telegram chat using a specified preset from your configurati
|
|
602
603
|
• Parameters:
|
603
604
|
- `preset_name` (str): The name of the preset to use for sending the message. This should match a preset in the configuration file.
|
604
605
|
- `message` (str): The message text that you want to send to the Telegram chat.
|
606
|
+
- config (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
607
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
608
|
+
- str: Path to a JSON configuration file
|
609
|
+
- dict: Direct configuration dictionary
|
605
610
|
|
606
611
|
• Raises:
|
607
612
|
- `RuntimeError`: If the preset is not found in the configuration file or if necessary details (bot token or chat ID) are missing.
|
@@ -864,6 +869,10 @@ Load data from a specified database using a SQL query and return the results in
|
|
864
869
|
|
865
870
|
- `db_preset_name` (str): The name of the database preset found in the configuration file. This preset determines which database connection details to use.
|
866
871
|
- `query` (str): The SQL query string to be executed on the database.
|
872
|
+
- `config` (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
873
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
874
|
+
- str: Path to a JSON configuration file
|
875
|
+
- dict: Direct configuration dictionary
|
867
876
|
|
868
877
|
#### Returns
|
869
878
|
|
@@ -1127,6 +1136,10 @@ Send a DataFrame via Telegram using a specified bot configuration.
|
|
1127
1136
|
- message (str)
|
1128
1137
|
- `as_file` (bool)
|
1129
1138
|
- `remove_after_send` (bool)
|
1139
|
+
- `config` (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
1140
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
1141
|
+
- str: Path to a JSON configuration file
|
1142
|
+
- dict: Direct configuration dictionary
|
1130
1143
|
|
1131
1144
|
• Example:
|
1132
1145
|
|
@@ -1156,6 +1169,10 @@ Send an email with an optional DataFrame attachment using the Gmail API via a sp
|
|
1156
1169
|
- body (str, optional)
|
1157
1170
|
- `as_file` (bool)
|
1158
1171
|
- `remove_after_send` (bool)
|
1172
|
+
- `config` (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
1173
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
1174
|
+
- str: Path to a JSON configuration file
|
1175
|
+
- dict: Direct configuration dictionary
|
1159
1176
|
|
1160
1177
|
• Example:
|
1161
1178
|
|
@@ -1184,6 +1201,10 @@ Send a DataFrame or message to Slack using a specified bot configuration.
|
|
1184
1201
|
- message (str)
|
1185
1202
|
- `as_file` (bool)
|
1186
1203
|
- `remove_after_send` (bool)
|
1204
|
+
- `config` (Optional[Union[str, dict]], optional): Configuration source. Can be:
|
1205
|
+
- None: Uses default path '~/.rgwfuncsrc'
|
1206
|
+
- str: Path to a JSON configuration file
|
1207
|
+
- dict: Direct configuration dictionary
|
1187
1208
|
|
1188
1209
|
• Example:
|
1189
1210
|
|
@@ -1228,7 +1249,7 @@ Append a ranged classification column to the DataFrame.
|
|
1228
1249
|
|
1229
1250
|
• Parameters:
|
1230
1251
|
- df (pd.DataFrame)
|
1231
|
-
- ranges (
|
1252
|
+
- ranges (list[int | float]): List of numeric range boundaries (e.g., [0, 10, 20, 30]), last bin extends to infinity.
|
1232
1253
|
- `target_col` (str): The column to classify.
|
1233
1254
|
- `new_col_name` (str): Name of the new classification column.
|
1234
1255
|
|
@@ -1236,14 +1257,14 @@ Append a ranged classification column to the DataFrame.
|
|
1236
1257
|
- pd.DataFrame
|
1237
1258
|
|
1238
1259
|
• Example:
|
1239
|
-
|
1260
|
+
|
1240
1261
|
from rgwfuncs import append_ranged_classification_column
|
1241
1262
|
import pandas as pd
|
1242
1263
|
|
1243
1264
|
df = pd.DataFrame({'Scores': [5, 12, 25]})
|
1244
|
-
df_classified = append_ranged_classification_column(df,
|
1265
|
+
df_classified = append_ranged_classification_column(df, [0, 10, 20, 30], 'Scores', 'ScoreRange')
|
1245
1266
|
print(df_classified)
|
1246
|
-
|
1267
|
+
|
1247
1268
|
|
1248
1269
|
--------------------------------------------------------------------------------
|
1249
1270
|
|
@@ -1252,7 +1273,7 @@ Append a percentile classification column to the DataFrame.
|
|
1252
1273
|
|
1253
1274
|
• Parameters:
|
1254
1275
|
- df (pd.DataFrame)
|
1255
|
-
- percentiles (
|
1276
|
+
- percentiles (list[int | float]): List of percentile values (0-100, e.g., [25, 50, 75]).
|
1256
1277
|
- `target_col` (str)
|
1257
1278
|
- `new_col_name` (str)
|
1258
1279
|
|
@@ -1260,14 +1281,14 @@ Append a percentile classification column to the DataFrame.
|
|
1260
1281
|
- pd.DataFrame
|
1261
1282
|
|
1262
1283
|
• Example:
|
1263
|
-
|
1284
|
+
|
1264
1285
|
from rgwfuncs import append_percentile_classification_column
|
1265
1286
|
import pandas as pd
|
1266
1287
|
|
1267
1288
|
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
|
1268
|
-
df_classified = append_percentile_classification_column(df,
|
1289
|
+
df_classified = append_percentile_classification_column(df, [25, 50, 75], 'Values', 'ValuePercentile')
|
1269
1290
|
print(df_classified)
|
1270
|
-
|
1291
|
+
|
1271
1292
|
|
1272
1293
|
--------------------------------------------------------------------------------
|
1273
1294
|
|
@@ -1276,7 +1297,7 @@ Append a ranged date classification column to the DataFrame.
|
|
1276
1297
|
|
1277
1298
|
• Parameters:
|
1278
1299
|
- df (pd.DataFrame)
|
1279
|
-
- `date_ranges` (str):
|
1300
|
+
- `date_ranges` (list[str]): List of date strings in a format pandas can parse (e.g., ['2020-01-01', '2020-06-30', '2020-12-31']).
|
1280
1301
|
- `target_col` (str)
|
1281
1302
|
- `new_col_name` (str)
|
1282
1303
|
|
@@ -1284,19 +1305,18 @@ Append a ranged date classification column to the DataFrame.
|
|
1284
1305
|
- pd.DataFrame
|
1285
1306
|
|
1286
1307
|
• Example:
|
1287
|
-
|
1308
|
+
|
1288
1309
|
from rgwfuncs import append_ranged_date_classification_column
|
1289
1310
|
import pandas as pd
|
1290
1311
|
|
1291
|
-
df = pd.DataFrame({'EventDate': pd.to_datetime(['2020-03-15','2020-08-10'])})
|
1312
|
+
df = pd.DataFrame({'EventDate': pd.to_datetime(['2020-03-15', '2020-08-10'])})
|
1292
1313
|
df_classified = append_ranged_date_classification_column(
|
1293
1314
|
df,
|
1294
|
-
'2020-01-
|
1315
|
+
['2020-01-01', '2020-06-30', '2020-12-31'],
|
1295
1316
|
'EventDate',
|
1296
1317
|
'DateRange'
|
1297
1318
|
)
|
1298
1319
|
print(df_classified)
|
1299
|
-
|
1300
1320
|
|
1301
1321
|
--------------------------------------------------------------------------------
|
1302
1322
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
|
2
|
+
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
+
rgwfuncs/df_lib.py,sha256=uhP5qv1PTBNTuZSzUe_-Qwwtm20rPU8JpEQa8OEetHk,75555
|
4
|
+
rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
|
5
|
+
rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
|
6
|
+
rgwfuncs/str_lib.py,sha256=hE0VfP6rhQpczsKyCZvH3G1aMRwngKnkW3NTYCEc0Po,3208
|
7
|
+
rgwfuncs-0.0.92.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.92.dist-info/METADATA,sha256=Vx7bicfYGVHY2ER5s4gpjDdNsVYsfQx6_2kbLGS6EVU,61443
|
9
|
+
rgwfuncs-0.0.92.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
10
|
+
rgwfuncs-0.0.92.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.92.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.92.dist-info/RECORD,,
|
rgwfuncs-0.0.90.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
|
2
|
-
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=r6T-MwyDq9NAPW1Xf6NzSy7ZFicIKdemR-UKu6TZt5g,71111
|
4
|
-
rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
|
5
|
-
rgwfuncs/interactive_shell_lib.py,sha256=F9Kul06SK-X1DxFpINDLEHFab7UaqFHgx2eYmdmMoOg,4393
|
6
|
-
rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
|
7
|
-
rgwfuncs-0.0.90.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
-
rgwfuncs-0.0.90.dist-info/METADATA,sha256=VqTLa3ss_JZSpa0DWB8pOgCitIKCuSL7xIl02JjDShk,60288
|
9
|
-
rgwfuncs-0.0.90.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
-
rgwfuncs-0.0.90.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
-
rgwfuncs-0.0.90.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
-
rgwfuncs-0.0.90.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|