rgwfuncs 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/__init__.py +2 -1
- rgwfuncs/df_lib.py +414 -158
- rgwfuncs/str_lib.py +62 -0
- {rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/METADATA +59 -20
- rgwfuncs-0.0.18.dist-info/RECORD +9 -0
- rgwfuncs-0.0.16.dist-info/RECORD +0 -8
- {rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/top_level.txt +0 -0
rgwfuncs/__init__.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# This file is automatically generated
|
2
2
|
# Dynamically importing functions from modules
|
3
3
|
|
4
|
-
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
4
|
+
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
5
|
+
from .str_lib import send_telegram_message
|
rgwfuncs/df_lib.py
CHANGED
@@ -28,6 +28,7 @@ import warnings
|
|
28
28
|
# Suppress all FutureWarnings
|
29
29
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
30
30
|
|
31
|
+
|
31
32
|
def docs(method_type_filter: Optional[str] = None) -> None:
|
32
33
|
"""
|
33
34
|
Print a list of function names in alphabetical order. If method_type_filter
|
@@ -65,7 +66,11 @@ def docs(method_type_filter: Optional[str] = None) -> None:
|
|
65
66
|
print(f"\n{name}:\n{docstring}")
|
66
67
|
|
67
68
|
|
68
|
-
def numeric_clean(
|
69
|
+
def numeric_clean(
|
70
|
+
df: pd.DataFrame,
|
71
|
+
column_names: str,
|
72
|
+
column_type: str,
|
73
|
+
irregular_value_treatment: str) -> pd.DataFrame:
|
69
74
|
"""
|
70
75
|
Cleans the numeric columns based on specified treatments.
|
71
76
|
|
@@ -296,7 +301,9 @@ def drop_duplicates(df: pd.DataFrame) -> pd.DataFrame:
|
|
296
301
|
return df.drop_duplicates(keep='first')
|
297
302
|
|
298
303
|
|
299
|
-
def drop_duplicates_retain_first(
|
304
|
+
def drop_duplicates_retain_first(
|
305
|
+
df: pd.DataFrame,
|
306
|
+
columns: Optional[str] = None) -> pd.DataFrame:
|
300
307
|
"""
|
301
308
|
Drop duplicate rows in the DataFrame based on specified columns, retaining the first occurrence.
|
302
309
|
|
@@ -318,7 +325,9 @@ def drop_duplicates_retain_first(df: pd.DataFrame, columns: Optional[str] = None
|
|
318
325
|
return df.drop_duplicates(subset=columns_list, keep='first')
|
319
326
|
|
320
327
|
|
321
|
-
def drop_duplicates_retain_last(
|
328
|
+
def drop_duplicates_retain_last(
|
329
|
+
df: pd.DataFrame,
|
330
|
+
columns: Optional[str] = None) -> pd.DataFrame:
|
322
331
|
"""
|
323
332
|
Drop duplicate rows in the DataFrame based on specified columns, retaining the last occurrence.
|
324
333
|
|
@@ -335,20 +344,18 @@ def drop_duplicates_retain_last(df: pd.DataFrame, columns: Optional[str] = None)
|
|
335
344
|
if df is None:
|
336
345
|
raise ValueError("DataFrame is not initialized.")
|
337
346
|
|
338
|
-
columns_list = [col.strip()
|
347
|
+
columns_list = [col.strip()
|
348
|
+
for col in columns.split(',')] if columns else None
|
339
349
|
return df.drop_duplicates(subset=columns_list, keep='last')
|
340
350
|
|
341
351
|
|
342
|
-
def load_data_from_query(db_preset_name: str, query: str
|
352
|
+
def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
|
343
353
|
"""
|
344
|
-
Load data from a database query into a DataFrame based on a configuration
|
345
|
-
preset.
|
354
|
+
Load data from a database query into a DataFrame based on a configuration preset.
|
346
355
|
|
347
356
|
Parameters:
|
348
357
|
db_preset_name: The name of the database preset in the configuration file.
|
349
358
|
query: The SQL query to execute.
|
350
|
-
config_file_name: Name of the configuration file
|
351
|
-
(default: 'rgwml.config').
|
352
359
|
|
353
360
|
Returns:
|
354
361
|
A DataFrame containing the query result.
|
@@ -358,17 +365,6 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
|
|
358
365
|
ValueError: If the database preset or db_type is invalid.
|
359
366
|
"""
|
360
367
|
|
361
|
-
def locate_config_file(filename: str = config_file_name) -> str:
|
362
|
-
home_dir = os.path.expanduser("~")
|
363
|
-
search_paths = [os.path.join(home_dir, "Desktop"), os.path.join(home_dir, "Documents"), os.path.join(home_dir, "Downloads"),]
|
364
|
-
|
365
|
-
for path in search_paths:
|
366
|
-
for root, dirs, files in os.walk(path):
|
367
|
-
if filename in files:
|
368
|
-
return os.path.join(root, filename)
|
369
|
-
raise FileNotFoundError(
|
370
|
-
f"{filename} not found in Desktop, Documents, or Downloads folders")
|
371
|
-
|
372
368
|
def query_mssql(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
|
373
369
|
server = db_preset['host']
|
374
370
|
user = db_preset['username']
|
@@ -393,12 +389,13 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
|
|
393
389
|
with conn.cursor() as cursor:
|
394
390
|
cursor.execute(query)
|
395
391
|
rows = cursor.fetchall()
|
396
|
-
columns = ([desc[0] for desc in cursor.description]
|
392
|
+
columns = ([desc[0] for desc in cursor.description]
|
393
|
+
if cursor.description else [])
|
397
394
|
|
398
395
|
return pd.DataFrame(rows, columns=columns)
|
399
396
|
|
400
|
-
def query_clickhouse(
|
401
|
-
|
397
|
+
def query_clickhouse(
|
398
|
+
db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
|
402
399
|
host = db_preset['host']
|
403
400
|
user = db_preset['username']
|
404
401
|
password = db_preset['password']
|
@@ -409,7 +406,8 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
|
|
409
406
|
|
410
407
|
for attempt in range(max_retries):
|
411
408
|
try:
|
412
|
-
client = clickhouse_connect.get_client(
|
409
|
+
client = clickhouse_connect.get_client(
|
410
|
+
host=host, port='8123', username=user, password=password, database=database)
|
413
411
|
data = client.query(query)
|
414
412
|
rows = data.result_rows
|
415
413
|
columns = data.column_names
|
@@ -423,11 +421,13 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
|
|
423
421
|
raise ConnectionError(
|
424
422
|
"All attempts to connect to ClickHouse failed.")
|
425
423
|
|
426
|
-
def query_google_big_query(
|
424
|
+
def query_google_big_query(
|
425
|
+
db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
|
427
426
|
json_file_path = db_preset['json_file_path']
|
428
427
|
project_id = db_preset['project_id']
|
429
428
|
|
430
|
-
credentials = service_account.Credentials.from_service_account_file(
|
429
|
+
credentials = service_account.Credentials.from_service_account_file(
|
430
|
+
json_file_path)
|
431
431
|
client = bigquery.Client(credentials=credentials, project=project_id)
|
432
432
|
|
433
433
|
query_job = client.query(query)
|
@@ -437,13 +437,15 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
|
|
437
437
|
|
438
438
|
return pd.DataFrame(rows, columns=columns)
|
439
439
|
|
440
|
-
#
|
441
|
-
config_path =
|
440
|
+
# Assume the configuration file is located at ~/.rgwfuncsrc
|
441
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
442
442
|
with open(config_path, 'r') as f:
|
443
443
|
config = json.load(f)
|
444
444
|
|
445
445
|
db_presets = config.get('db_presets', [])
|
446
|
-
db_preset = next(
|
446
|
+
db_preset = next(
|
447
|
+
(preset for preset in db_presets if preset['name'] == db_preset_name),
|
448
|
+
None)
|
447
449
|
if not db_preset:
|
448
450
|
raise ValueError(f"No matching db_preset found for {db_preset_name}")
|
449
451
|
|
@@ -621,10 +623,20 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
|
|
621
623
|
for column in columns:
|
622
624
|
if column in df.columns:
|
623
625
|
frequency = df[column].astype(str).value_counts(dropna=False)
|
624
|
-
frequency = frequency.rename(
|
626
|
+
frequency = frequency.rename(
|
627
|
+
index={
|
628
|
+
'nan': 'NaN',
|
629
|
+
'NaT': 'NaT',
|
630
|
+
'None': 'None',
|
631
|
+
'': 'Empty'})
|
625
632
|
top_n_values = frequency.nlargest(n)
|
626
|
-
report[column] = {str(value): str(count)
|
627
|
-
|
633
|
+
report[column] = {str(value): str(count)
|
634
|
+
for value, count in top_n_values.items()}
|
635
|
+
print(
|
636
|
+
f"Top {n} unique values for column '{column}':\n{
|
637
|
+
json.dumps(
|
638
|
+
report[column],
|
639
|
+
indent=2)}\n")
|
628
640
|
else:
|
629
641
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
630
642
|
else:
|
@@ -634,7 +646,10 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
|
|
634
646
|
gc.collect()
|
635
647
|
|
636
648
|
|
637
|
-
def bottom_n_unique_values(
|
649
|
+
def bottom_n_unique_values(
|
650
|
+
df: pd.DataFrame,
|
651
|
+
n: int,
|
652
|
+
columns: List[str]) -> None:
|
638
653
|
"""
|
639
654
|
Print the bottom `n` unique values for specified columns in the DataFrame.
|
640
655
|
|
@@ -654,12 +669,21 @@ def bottom_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None
|
|
654
669
|
for column in columns:
|
655
670
|
if column in df.columns:
|
656
671
|
frequency = df[column].astype(str).value_counts(dropna=False)
|
657
|
-
frequency = frequency.rename(
|
672
|
+
frequency = frequency.rename(
|
673
|
+
index={
|
674
|
+
'nan': 'NaN',
|
675
|
+
'NaT': 'NaT',
|
676
|
+
'None': 'None',
|
677
|
+
'': 'Empty'})
|
658
678
|
bottom_n_values = frequency.nsmallest(n)
|
659
679
|
report[column] = {
|
660
680
|
str(value): str(count) for value,
|
661
681
|
count in bottom_n_values.items()}
|
662
|
-
print(
|
682
|
+
print(
|
683
|
+
f"Bottom {n} unique values for column '{column}':\n{
|
684
|
+
json.dumps(
|
685
|
+
report[column],
|
686
|
+
indent=2)}\n")
|
663
687
|
else:
|
664
688
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
665
689
|
else:
|
@@ -669,7 +693,8 @@ def bottom_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None
|
|
669
693
|
gc.collect()
|
670
694
|
|
671
695
|
|
672
|
-
def print_correlation(
|
696
|
+
def print_correlation(
|
697
|
+
df: pd.DataFrame, column_pairs: List[Tuple[str, str]]) -> None:
|
673
698
|
"""
|
674
699
|
Print correlation for multiple pairs of columns in the DataFrame.
|
675
700
|
|
@@ -688,13 +713,16 @@ def print_correlation(df: pd.DataFrame, column_pairs: List[Tuple[str, str]]) ->
|
|
688
713
|
|
689
714
|
correlation = numeric_col1.corr(numeric_col2)
|
690
715
|
if pd.notnull(correlation):
|
691
|
-
print(
|
716
|
+
print(
|
717
|
+
f"The correlation between '{col1}' and '{col2}' is {correlation}.")
|
692
718
|
else:
|
693
|
-
print(
|
719
|
+
print(
|
720
|
+
f"Cannot calculate correlation between '{col1}' and '{col2}' due to insufficient numeric data.")
|
694
721
|
except Exception as e:
|
695
722
|
print(f"Error processing cols '{col1}' and '{col2}': {e}")
|
696
723
|
else:
|
697
|
-
print(
|
724
|
+
print(
|
725
|
+
f"One or both of the specified cols ('{col1}', '{col2}') do not exist in the DataFrame.")
|
698
726
|
else:
|
699
727
|
print("The DataFrame is empty.")
|
700
728
|
|
@@ -714,7 +742,8 @@ def print_memory_usage(df: pd.DataFrame) -> None:
|
|
714
742
|
- ValueError: If the DataFrame is `None`.
|
715
743
|
"""
|
716
744
|
if df is not None:
|
717
|
-
memory_usage = df.memory_usage(deep=True).sum(
|
745
|
+
memory_usage = df.memory_usage(deep=True).sum(
|
746
|
+
) / (1024 * 1024) # Convert bytes to MB
|
718
747
|
print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
|
719
748
|
else:
|
720
749
|
raise ValueError("No DataFrame to print. Please provide a DataFrame.")
|
@@ -795,7 +824,8 @@ def print_dataframe(df: pd.DataFrame, source: Optional[str] = None) -> None:
|
|
795
824
|
"""
|
796
825
|
if df is not None:
|
797
826
|
print(df)
|
798
|
-
columns_with_types = [
|
827
|
+
columns_with_types = [
|
828
|
+
f"{col} ({df[col].dtypes})" for col in df.columns]
|
799
829
|
print("Columns:", columns_with_types)
|
800
830
|
if source:
|
801
831
|
print(f"Source: {source}")
|
@@ -811,48 +841,53 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
|
|
811
841
|
|
812
842
|
Parameters:
|
813
843
|
df: The DataFrame to send.
|
814
|
-
bot_name: The name of the Telegram bot as specified in the configuration.
|
815
|
-
message: Custom message to send along with the DataFrame or file.
|
816
|
-
as_file: Boolean flag to
|
817
|
-
remove_after_send: If True, removes the file after sending.
|
818
|
-
"""
|
844
|
+
bot_name: The name of the Telegram bot as specified in the configuration file.
|
845
|
+
message: Custom message to send along with the DataFrame or file. Defaults to None.
|
846
|
+
as_file: Boolean flag to indicate whether the DataFrame should be sent as a file (True) or as text (False). Defaults to True.
|
847
|
+
remove_after_send: If True, removes the CSV file after sending. Defaults to True.
|
819
848
|
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
|
849
|
+
Raises:
|
850
|
+
ValueError: If the specified bot is not found or if no DataFrame is provided.
|
851
|
+
Exception: If the message sending fails.
|
824
852
|
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
return os.path.join(root, filename)
|
829
|
-
raise FileNotFoundError(
|
830
|
-
f"{filename} not found in Desktop, Documents, or Downloads")
|
853
|
+
Notes:
|
854
|
+
The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
855
|
+
"""
|
831
856
|
|
832
857
|
def get_config(config_path: str) -> dict:
|
833
|
-
"""Load configuration from a
|
858
|
+
"""Load configuration from a JSON file."""
|
834
859
|
with open(config_path, 'r') as file:
|
835
860
|
return json.load(file)
|
836
861
|
|
837
|
-
|
862
|
+
# Assume the configuration file is located at ~/.rgwfuncsrc
|
863
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
838
864
|
config = get_config(config_path)
|
839
|
-
bot_config = next((bot for bot in config['telegram_bot_presets'] if bot['name'] == bot_name), None)
|
840
865
|
|
866
|
+
bot_config = next(
|
867
|
+
(bot for bot in config['telegram_bot_presets'] if bot['name'] == bot_name),
|
868
|
+
None)
|
841
869
|
if not bot_config:
|
842
870
|
raise ValueError(f"No bot found with the name {bot_name}")
|
843
871
|
|
844
872
|
if df is None:
|
845
873
|
raise ValueError("No DataFrame to send. Please provide a DataFrame.")
|
846
874
|
|
875
|
+
response = None
|
847
876
|
if as_file:
|
848
877
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
849
878
|
file_name = f"df_{timestamp}.csv"
|
850
879
|
df.to_csv(file_name, index=False)
|
851
880
|
try:
|
852
881
|
with open(file_name, 'rb') as file:
|
853
|
-
payload = {
|
882
|
+
payload = {
|
883
|
+
'chat_id': bot_config['chat_id'],
|
884
|
+
'caption': message or ''}
|
854
885
|
files = {'document': file}
|
855
|
-
response = requests.post(
|
886
|
+
response = requests.post(
|
887
|
+
f"https://api.telegram.org/bot{
|
888
|
+
bot_config['bot_token']}/sendDocument",
|
889
|
+
data=payload,
|
890
|
+
files=files)
|
856
891
|
if remove_after_send and os.path.exists(file_name):
|
857
892
|
os.remove(file_name)
|
858
893
|
except Exception as e:
|
@@ -862,40 +897,45 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
|
|
862
897
|
df_str = df.to_string()
|
863
898
|
payload = {
|
864
899
|
'chat_id': bot_config['chat_id'],
|
865
|
-
'text': message + "\n\n" + df_str if message else df_str,
|
866
|
-
'parse_mode': 'HTML'
|
867
|
-
|
900
|
+
'text': (message + "\n\n" + df_str) if message else df_str,
|
901
|
+
'parse_mode': 'HTML'
|
902
|
+
}
|
903
|
+
response = requests.post(
|
904
|
+
f"https://api.telegram.org/bot{bot_config['bot_token']}/sendMessage", data=payload)
|
868
905
|
|
869
|
-
if not response.ok:
|
906
|
+
if response and not response.ok:
|
870
907
|
raise Exception(f"Error sending message: {response.text}")
|
871
908
|
|
872
909
|
print("Message sent successfully.")
|
873
910
|
|
874
911
|
|
875
|
-
def send_data_to_email(
|
912
|
+
def send_data_to_email(
|
913
|
+
df: pd.DataFrame,
|
914
|
+
preset_name: str,
|
915
|
+
to_email: str,
|
916
|
+
subject: Optional[str] = None,
|
917
|
+
body: Optional[str] = None,
|
918
|
+
as_file: bool = True,
|
919
|
+
remove_after_send: bool = True) -> None:
|
876
920
|
"""
|
877
|
-
Send an email with optional DataFrame attachment using Gmail API via a specified preset.
|
921
|
+
Send an email with an optional DataFrame attachment using the Gmail API via a specified preset.
|
878
922
|
|
879
923
|
Parameters:
|
880
924
|
df: The DataFrame to send.
|
881
925
|
preset_name: The configuration preset name to use for sending the email.
|
882
926
|
to_email: The recipient email address.
|
883
|
-
subject: Optional subject of the email.
|
884
|
-
body: Optional message body of the email.
|
885
|
-
as_file: Boolean flag to decide whether to send the DataFrame as a file.
|
886
|
-
remove_after_send: If True, removes the CSV file after sending.
|
887
|
-
"""
|
927
|
+
subject: Optional subject of the email. Defaults to 'DataFrame CSV File' if not given.
|
928
|
+
body: Optional message body of the email. Defaults to 'Please find the CSV file attached.' if not given.
|
929
|
+
as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or embed it in the email (False). Defaults to True.
|
930
|
+
remove_after_send: If True, removes the CSV file after sending. Defaults to True.
|
888
931
|
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
|
932
|
+
Raises:
|
933
|
+
ValueError: If the preset is not found in the configuration.
|
934
|
+
Exception: If the email preparation or sending fails.
|
893
935
|
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
return os.path.join(root, filename)
|
898
|
-
raise FileNotFoundError(f"{filename} not found in Desktop, Documents, or Downloads folders")
|
936
|
+
Notes:
|
937
|
+
The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
938
|
+
"""
|
899
939
|
|
900
940
|
def get_config(config_path: str) -> dict:
|
901
941
|
with open(config_path, 'r') as file:
|
@@ -914,12 +954,14 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
|
|
914
954
|
)
|
915
955
|
return build('gmail', 'v1', credentials=credentials)
|
916
956
|
|
917
|
-
# Load configuration
|
918
|
-
config_path =
|
957
|
+
# Load configuration from ~/.rgwfuncsrc
|
958
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
919
959
|
config = get_config(config_path)
|
920
960
|
|
921
961
|
# Retrieve Gmail preset configuration
|
922
|
-
gmail_config = next(
|
962
|
+
gmail_config = next(
|
963
|
+
(preset for preset in config['gmail_bot_presets'] if preset['name'] == preset_name),
|
964
|
+
None)
|
923
965
|
|
924
966
|
if not gmail_config:
|
925
967
|
raise ValueError(f"No preset found with the name {preset_name}")
|
@@ -942,13 +984,18 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
|
|
942
984
|
message['to'] = to_email
|
943
985
|
message['from'] = sender_email
|
944
986
|
message['subject'] = subject if subject else 'DataFrame CSV File'
|
945
|
-
message.attach(
|
987
|
+
message.attach(
|
988
|
+
MIMEText(
|
989
|
+
body if body else 'Please find the CSV file attached.'))
|
946
990
|
|
947
991
|
with open(tmp_file_name, 'rb') as file:
|
948
992
|
part = MIMEBase('application', 'octet-stream')
|
949
993
|
part.set_payload(file.read())
|
950
994
|
encoders.encode_base64(part)
|
951
|
-
part.add_header(
|
995
|
+
part.add_header(
|
996
|
+
'Content-Disposition',
|
997
|
+
f'attachment; filename={
|
998
|
+
os.path.basename(tmp_file_name)}')
|
952
999
|
message.attach(part)
|
953
1000
|
|
954
1001
|
if remove_after_send and os.path.exists(tmp_file_name):
|
@@ -970,46 +1017,49 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
|
|
970
1017
|
try:
|
971
1018
|
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
|
972
1019
|
email_body = {'raw': raw}
|
973
|
-
sent_message = service.users().messages().send(
|
1020
|
+
sent_message = service.users().messages().send(
|
1021
|
+
userId="me", body=email_body).execute()
|
974
1022
|
print(f"Email with Message Id {sent_message['id']} successfully sent.")
|
975
1023
|
except Exception as error:
|
976
1024
|
raise Exception(f"Error sending email: {error}")
|
977
1025
|
|
978
1026
|
|
979
|
-
def send_data_to_slack(
|
1027
|
+
def send_data_to_slack(
|
1028
|
+
df: pd.DataFrame,
|
1029
|
+
bot_name: str,
|
1030
|
+
message: Optional[str] = None,
|
1031
|
+
as_file: bool = True,
|
1032
|
+
remove_after_send: bool = True) -> None:
|
980
1033
|
"""
|
981
1034
|
Send a DataFrame or message to Slack using a specified bot configuration.
|
982
1035
|
|
983
1036
|
Parameters:
|
984
1037
|
df: The DataFrame to send.
|
985
1038
|
bot_name: The Slack bot configuration preset name.
|
986
|
-
message: Custom message to send along with the DataFrame or file.
|
987
|
-
as_file: Boolean flag to decide whether to send the DataFrame as a file.
|
988
|
-
remove_after_send: If True, removes the CSV file after sending.
|
989
|
-
"""
|
1039
|
+
message: Custom message to send along with the DataFrame or file. Defaults to None.
|
1040
|
+
as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or as text (False). Defaults to True.
|
1041
|
+
remove_after_send: If True, removes the CSV file after sending. Defaults to True.
|
990
1042
|
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
|
1043
|
+
Raises:
|
1044
|
+
ValueError: If the specified bot is not found in the configuration.
|
1045
|
+
Exception: If the message sending fails.
|
995
1046
|
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
return os.path.join(root, filename)
|
1000
|
-
raise FileNotFoundError(
|
1001
|
-
f"{filename} not found in Desktop, Documents, or Downloads folders")
|
1047
|
+
Notes:
|
1048
|
+
The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
1049
|
+
"""
|
1002
1050
|
|
1003
1051
|
def get_config(config_path: str) -> dict:
|
1004
1052
|
"""Load configuration from a JSON file."""
|
1005
1053
|
with open(config_path, 'r') as file:
|
1006
1054
|
return json.load(file)
|
1007
1055
|
|
1008
|
-
# Load the Slack configuration
|
1009
|
-
config_path =
|
1056
|
+
# Load the Slack configuration from ~/.rgwfuncsrc
|
1057
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
1010
1058
|
config = get_config(config_path)
|
1011
1059
|
|
1012
|
-
bot_config = next(
|
1060
|
+
bot_config = next(
|
1061
|
+
(bot for bot in config['slack_bot_presets'] if bot['name'] == bot_name),
|
1062
|
+
None)
|
1013
1063
|
|
1014
1064
|
if not bot_config:
|
1015
1065
|
raise ValueError(f"No bot found with the name {bot_name}")
|
@@ -1024,13 +1074,22 @@ def send_data_to_slack(df: pd.DataFrame, bot_name: str, message: Optional[str] =
|
|
1024
1074
|
|
1025
1075
|
try:
|
1026
1076
|
with open(file_name, 'rb') as file:
|
1027
|
-
response = client.files_upload(
|
1077
|
+
response = client.files_upload(
|
1078
|
+
channels=bot_config['channel_id'],
|
1079
|
+
file=file,
|
1080
|
+
filename=os.path.basename(file_name),
|
1081
|
+
title="DataFrame Upload",
|
1082
|
+
initial_comment=message or ''
|
1083
|
+
)
|
1028
1084
|
finally:
|
1029
1085
|
if remove_after_send and os.path.exists(file_name):
|
1030
1086
|
os.remove(file_name)
|
1031
1087
|
else:
|
1032
1088
|
df_str = df.to_string()
|
1033
|
-
response = client.chat_postMessage(
|
1089
|
+
response = client.chat_postMessage(
|
1090
|
+
channel=bot_config['channel_id'],
|
1091
|
+
text=(message + "\n\n" + df_str) if message else df_str
|
1092
|
+
)
|
1034
1093
|
|
1035
1094
|
# Check if the message was sent successfully
|
1036
1095
|
if not response["ok"]:
|
@@ -1087,7 +1146,11 @@ def order_columns(df: pd.DataFrame, column_order_str: str) -> pd.DataFrame:
|
|
1087
1146
|
return df[new_order]
|
1088
1147
|
|
1089
1148
|
|
1090
|
-
def append_ranged_classification_column(
|
1149
|
+
def append_ranged_classification_column(
|
1150
|
+
df: pd.DataFrame,
|
1151
|
+
ranges: str,
|
1152
|
+
target_col: str,
|
1153
|
+
new_col_name: str) -> pd.DataFrame:
|
1091
1154
|
"""
|
1092
1155
|
Append a ranged classification column to the DataFrame.
|
1093
1156
|
|
@@ -1155,16 +1218,27 @@ def append_ranged_classification_column(df: pd.DataFrame, ranges: str, target_co
|
|
1155
1218
|
for r in range_list
|
1156
1219
|
)
|
1157
1220
|
|
1158
|
-
labels = [f"{pad_number(range_list[i],
|
1221
|
+
labels = [f"{pad_number(range_list[i],
|
1222
|
+
max_integer_length)} to {pad_number(range_list[i + 1],
|
1223
|
+
max_integer_length)}" for i in range(len(range_list) - 1)]
|
1159
1224
|
|
1160
1225
|
# Ensure the target column is numeric
|
1161
1226
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
1162
|
-
df[new_col_name] = pd.cut(
|
1227
|
+
df[new_col_name] = pd.cut(
|
1228
|
+
df[target_col],
|
1229
|
+
bins=range_list,
|
1230
|
+
labels=labels,
|
1231
|
+
right=False,
|
1232
|
+
include_lowest=True)
|
1163
1233
|
|
1164
1234
|
return df
|
1165
1235
|
|
1166
1236
|
|
1167
|
-
def append_percentile_classification_column(
|
1237
|
+
def append_percentile_classification_column(
|
1238
|
+
df: pd.DataFrame,
|
1239
|
+
percentiles: str,
|
1240
|
+
target_col: str,
|
1241
|
+
new_col_name: str) -> pd.DataFrame:
|
1168
1242
|
"""
|
1169
1243
|
Append a percentile classification column to the DataFrame.
|
1170
1244
|
|
@@ -1192,14 +1266,21 @@ def append_percentile_classification_column(df: pd.DataFrame, percentiles: str,
|
|
1192
1266
|
|
1193
1267
|
if has_decimals:
|
1194
1268
|
percentiles_list = [float(p) for p in percentiles_list]
|
1195
|
-
max_decimal_length = max(
|
1196
|
-
|
1269
|
+
max_decimal_length = max(
|
1270
|
+
len(str(p).split('.')[1]) for p in percentiles_list if '.' in str(p))
|
1271
|
+
max_integer_length = max(len(str(int(float(p))))
|
1272
|
+
for p in percentiles_list)
|
1197
1273
|
|
1198
1274
|
labels = []
|
1199
1275
|
|
1200
1276
|
for i in range(len(percentiles_list) - 1):
|
1201
|
-
start = pad_number(
|
1202
|
-
|
1277
|
+
start = pad_number(
|
1278
|
+
percentiles_list[i],
|
1279
|
+
max_integer_length,
|
1280
|
+
max_decimal_length,
|
1281
|
+
decimal=True)
|
1282
|
+
end = pad_number(
|
1283
|
+
percentiles_list[i + 1], max_integer_length, max_decimal_length, decimal=True)
|
1203
1284
|
|
1204
1285
|
label = f"{start} to {end}"
|
1205
1286
|
labels.append(label)
|
@@ -1222,12 +1303,20 @@ def append_percentile_classification_column(df: pd.DataFrame, percentiles: str,
|
|
1222
1303
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
1223
1304
|
quantiles = [df[target_col].quantile(p / 100) for p in percentiles_list]
|
1224
1305
|
|
1225
|
-
df[new_col_name] = pd.cut(
|
1306
|
+
df[new_col_name] = pd.cut(
|
1307
|
+
df[target_col],
|
1308
|
+
bins=quantiles,
|
1309
|
+
labels=labels,
|
1310
|
+
include_lowest=True)
|
1226
1311
|
|
1227
1312
|
return df
|
1228
1313
|
|
1229
1314
|
|
1230
|
-
def append_ranged_date_classification_column(
|
1315
|
+
def append_ranged_date_classification_column(
|
1316
|
+
df: pd.DataFrame,
|
1317
|
+
date_ranges: str,
|
1318
|
+
target_col: str,
|
1319
|
+
new_col_name: str) -> pd.DataFrame:
|
1231
1320
|
"""
|
1232
1321
|
Append a ranged date classification column to the DataFrame.
|
1233
1322
|
|
@@ -1260,7 +1349,9 @@ def append_ranged_date_classification_column(df: pd.DataFrame, date_ranges: str,
|
|
1260
1349
|
return df
|
1261
1350
|
|
1262
1351
|
|
1263
|
-
def rename_columns(df: pd.DataFrame,
|
1352
|
+
def rename_columns(df: pd.DataFrame,
|
1353
|
+
rename_pairs: Dict[str,
|
1354
|
+
str]) -> pd.DataFrame:
|
1264
1355
|
"""
|
1265
1356
|
Rename columns in the DataFrame.
|
1266
1357
|
|
@@ -1272,7 +1363,8 @@ def rename_columns(df: pd.DataFrame, rename_pairs: Dict[str, str]) -> pd.DataFra
|
|
1272
1363
|
A new DataFrame with columns renamed.
|
1273
1364
|
"""
|
1274
1365
|
if df is None:
|
1275
|
-
raise ValueError(
|
1366
|
+
raise ValueError(
|
1367
|
+
"No DataFrame to rename columns. Please provide a valid DataFrame.")
|
1276
1368
|
|
1277
1369
|
return df.rename(columns=rename_pairs)
|
1278
1370
|
|
@@ -1290,7 +1382,8 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
|
|
1290
1382
|
A new DataFrame sorted by specified columns.
|
1291
1383
|
"""
|
1292
1384
|
if df is None:
|
1293
|
-
raise ValueError(
|
1385
|
+
raise ValueError(
|
1386
|
+
"No DataFrame to sort. Please provide a valid DataFrame.")
|
1294
1387
|
|
1295
1388
|
col_names = []
|
1296
1389
|
asc_order = []
|
@@ -1325,7 +1418,8 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1325
1418
|
A new DataFrame with XGB_TYPE labels appended.
|
1326
1419
|
"""
|
1327
1420
|
if df is None:
|
1328
|
-
raise ValueError(
|
1421
|
+
raise ValueError(
|
1422
|
+
"No DataFrame to add labels. Please provide a valid DataFrame.")
|
1329
1423
|
|
1330
1424
|
ratios = list(map(int, ratio_str.split(':')))
|
1331
1425
|
total_ratio = sum(ratios)
|
@@ -1342,7 +1436,8 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1342
1436
|
labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
|
1343
1437
|
validate_rows + ['TEST'] * test_rows
|
1344
1438
|
else:
|
1345
|
-
raise ValueError(
|
1439
|
+
raise ValueError(
|
1440
|
+
"Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
|
1346
1441
|
|
1347
1442
|
df_with_labels = df.copy()
|
1348
1443
|
df_with_labels['XGB_TYPE'] = labels
|
@@ -1350,7 +1445,13 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
|
|
1350
1445
|
return df_with_labels
|
1351
1446
|
|
1352
1447
|
|
1353
|
-
def append_xgb_regression_predictions(
|
1448
|
+
def append_xgb_regression_predictions(
|
1449
|
+
df: pd.DataFrame,
|
1450
|
+
target_col: str,
|
1451
|
+
feature_cols: str,
|
1452
|
+
pred_col: str,
|
1453
|
+
boosting_rounds: int = 100,
|
1454
|
+
model_path: Optional[str] = None) -> pd.DataFrame:
|
1354
1455
|
"""
|
1355
1456
|
Append XGB regression predictions to DataFrame. Assumes data is labeled by an 'XGB_TYPE' column.
|
1356
1457
|
|
@@ -1366,7 +1467,8 @@ def append_xgb_regression_predictions(df: pd.DataFrame, target_col: str, feature
|
|
1366
1467
|
DataFrame with predictions appended.
|
1367
1468
|
"""
|
1368
1469
|
if df is None or 'XGB_TYPE' not in df.columns:
|
1369
|
-
raise ValueError(
|
1470
|
+
raise ValueError(
|
1471
|
+
"DataFrame is not initialized or 'XGB_TYPE' column is missing.")
|
1370
1472
|
|
1371
1473
|
features = feature_cols.replace(' ', '').split(',')
|
1372
1474
|
|
@@ -1382,16 +1484,27 @@ def append_xgb_regression_predictions(df: pd.DataFrame, target_col: str, feature
|
|
1382
1484
|
else:
|
1383
1485
|
validate_data = None
|
1384
1486
|
|
1385
|
-
dtrain = xgb.DMatrix(
|
1487
|
+
dtrain = xgb.DMatrix(
|
1488
|
+
train_data[features],
|
1489
|
+
label=train_data[target_col],
|
1490
|
+
enable_categorical=True)
|
1386
1491
|
evals = [(dtrain, 'train')]
|
1387
1492
|
|
1388
1493
|
if validate_data is not None:
|
1389
|
-
dvalidate = xgb.DMatrix(
|
1494
|
+
dvalidate = xgb.DMatrix(
|
1495
|
+
validate_data[features],
|
1496
|
+
label=validate_data[target_col],
|
1497
|
+
enable_categorical=True)
|
1390
1498
|
evals.append((dvalidate, 'validate'))
|
1391
1499
|
|
1392
1500
|
params = {'objective': 'reg:squarederror', 'eval_metric': 'rmse'}
|
1393
1501
|
|
1394
|
-
model = xgb.train(
|
1502
|
+
model = xgb.train(
|
1503
|
+
params,
|
1504
|
+
dtrain,
|
1505
|
+
num_boost_round=boosting_rounds,
|
1506
|
+
evals=evals,
|
1507
|
+
early_stopping_rounds=10 if validate_data is not None else None)
|
1395
1508
|
|
1396
1509
|
# Make predictions for all data
|
1397
1510
|
dall = xgb.DMatrix(df[features], enable_categorical=True)
|
@@ -1400,13 +1513,20 @@ def append_xgb_regression_predictions(df: pd.DataFrame, target_col: str, feature
|
|
1400
1513
|
if model_path:
|
1401
1514
|
model.save_model(model_path)
|
1402
1515
|
|
1403
|
-
columns_order = [col for col in df.columns if col not in [
|
1516
|
+
columns_order = [col for col in df.columns if col not in [
|
1517
|
+
'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1404
1518
|
df = df[columns_order]
|
1405
1519
|
|
1406
1520
|
return df
|
1407
1521
|
|
1408
1522
|
|
1409
|
-
def append_xgb_logistic_regression_predictions(
|
1523
|
+
def append_xgb_logistic_regression_predictions(
|
1524
|
+
df: pd.DataFrame,
|
1525
|
+
target_col: str,
|
1526
|
+
feature_cols: str,
|
1527
|
+
pred_col: str,
|
1528
|
+
boosting_rounds: int = 100,
|
1529
|
+
model_path: Optional[str] = None) -> pd.DataFrame:
|
1410
1530
|
"""
|
1411
1531
|
Append XGB logistic regression predictions to DataFrame. Assumes data is labeled by an 'XGB_TYPE' column.
|
1412
1532
|
|
@@ -1438,16 +1558,27 @@ def append_xgb_logistic_regression_predictions(df: pd.DataFrame, target_col: str
|
|
1438
1558
|
if 'VALIDATE' in df['XGB_TYPE'].values:
|
1439
1559
|
validate_data = df[df['XGB_TYPE'] == 'VALIDATE']
|
1440
1560
|
|
1441
|
-
dtrain = xgb.DMatrix(
|
1561
|
+
dtrain = xgb.DMatrix(
|
1562
|
+
train_data[features],
|
1563
|
+
label=train_data[target_col],
|
1564
|
+
enable_categorical=True)
|
1442
1565
|
evals = [(dtrain, 'train')]
|
1443
1566
|
|
1444
1567
|
if validate_data is not None:
|
1445
|
-
dvalidate = xgb.DMatrix(
|
1568
|
+
dvalidate = xgb.DMatrix(
|
1569
|
+
validate_data[features],
|
1570
|
+
label=validate_data[target_col],
|
1571
|
+
enable_categorical=True)
|
1446
1572
|
evals.append((dvalidate, 'validate'))
|
1447
1573
|
|
1448
1574
|
params = {'objective': 'binary:logistic', 'eval_metric': 'auc'}
|
1449
1575
|
|
1450
|
-
model = xgb.train(
|
1576
|
+
model = xgb.train(
|
1577
|
+
params,
|
1578
|
+
dtrain,
|
1579
|
+
num_boost_round=boosting_rounds,
|
1580
|
+
evals=evals,
|
1581
|
+
early_stopping_rounds=10 if validate_data is not None else None)
|
1451
1582
|
|
1452
1583
|
# Make predictions for all data
|
1453
1584
|
dall = xgb.DMatrix(df[features], enable_categorical=True)
|
@@ -1456,13 +1587,18 @@ def append_xgb_logistic_regression_predictions(df: pd.DataFrame, target_col: str
|
|
1456
1587
|
if model_path:
|
1457
1588
|
model.save_model(model_path)
|
1458
1589
|
|
1459
|
-
columns_order = [col for col in df.columns if col not in [
|
1590
|
+
columns_order = [col for col in df.columns if col not in [
|
1591
|
+
'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
|
1460
1592
|
df = df[columns_order]
|
1461
1593
|
|
1462
1594
|
return df
|
1463
1595
|
|
1464
1596
|
|
1465
|
-
def print_n_frequency_cascading(
|
1597
|
+
def print_n_frequency_cascading(
|
1598
|
+
df: pd.DataFrame,
|
1599
|
+
n: int,
|
1600
|
+
columns: str,
|
1601
|
+
order_by: str = "FREQ_DESC") -> None:
|
1466
1602
|
"""
|
1467
1603
|
Print the cascading frequency of top n values for specified columns.
|
1468
1604
|
|
@@ -1485,7 +1621,12 @@ def print_n_frequency_cascading(df: pd.DataFrame, n: int, columns: str, order_by
|
|
1485
1621
|
# Convert the column to string representation
|
1486
1622
|
df[current_col] = df[current_col].astype(str)
|
1487
1623
|
frequency = df[current_col].value_counts(dropna=False)
|
1488
|
-
frequency = frequency.rename(
|
1624
|
+
frequency = frequency.rename(
|
1625
|
+
index={
|
1626
|
+
'nan': 'NaN',
|
1627
|
+
'NaT': 'NaT',
|
1628
|
+
'None': 'None',
|
1629
|
+
'': 'Empty'})
|
1489
1630
|
|
1490
1631
|
if limit is not None:
|
1491
1632
|
frequency = frequency.nlargest(limit)
|
@@ -1500,8 +1641,11 @@ def print_n_frequency_cascading(df: pd.DataFrame, n: int, columns: str, order_by
|
|
1500
1641
|
filtered_df = df[df[current_col] == value]
|
1501
1642
|
|
1502
1643
|
if len(columns) > 1:
|
1503
|
-
sub_report = generate_cascade_report(
|
1504
|
-
|
1644
|
+
sub_report = generate_cascade_report(
|
1645
|
+
filtered_df, columns[1:], limit, order_by)
|
1646
|
+
report[value] = {
|
1647
|
+
"count": str(count), f"sub_distribution({
|
1648
|
+
columns[1]})": sub_report if sub_report else {}}
|
1505
1649
|
else:
|
1506
1650
|
report[value] = {"count": str(count)}
|
1507
1651
|
|
@@ -1511,17 +1655,29 @@ def print_n_frequency_cascading(df: pd.DataFrame, n: int, columns: str, order_by
|
|
1511
1655
|
if order_by == "ASC":
|
1512
1656
|
return dict(sorted(frequency.items(), key=lambda item: item[0]))
|
1513
1657
|
elif order_by == "DESC":
|
1514
|
-
return dict(
|
1658
|
+
return dict(
|
1659
|
+
sorted(
|
1660
|
+
frequency.items(),
|
1661
|
+
key=lambda item: item[0],
|
1662
|
+
reverse=True))
|
1515
1663
|
elif order_by == "FREQ_ASC":
|
1516
1664
|
return dict(sorted(frequency.items(), key=lambda item: item[1]))
|
1517
1665
|
else: # Default to "FREQ_DESC"
|
1518
|
-
return dict(
|
1666
|
+
return dict(
|
1667
|
+
sorted(
|
1668
|
+
frequency.items(),
|
1669
|
+
key=lambda item: item[1],
|
1670
|
+
reverse=True))
|
1519
1671
|
|
1520
1672
|
report = generate_cascade_report(df, columns, n, order_by)
|
1521
1673
|
print(json.dumps(report, indent=2))
|
1522
1674
|
|
1523
1675
|
|
1524
|
-
def print_n_frequency_linear(
|
1676
|
+
def print_n_frequency_linear(
|
1677
|
+
df: pd.DataFrame,
|
1678
|
+
n: int,
|
1679
|
+
columns: str,
|
1680
|
+
order_by: str = "FREQ_DESC") -> None:
|
1525
1681
|
"""
|
1526
1682
|
Print the linear frequency of top n values for specified columns.
|
1527
1683
|
|
@@ -1541,13 +1697,19 @@ def print_n_frequency_linear(df: pd.DataFrame, n: int, columns: str, order_by: s
|
|
1541
1697
|
continue
|
1542
1698
|
|
1543
1699
|
frequency = df[current_col].astype(str).value_counts(dropna=False)
|
1544
|
-
frequency = frequency.rename(
|
1700
|
+
frequency = frequency.rename(
|
1701
|
+
index={
|
1702
|
+
'nan': 'NaN',
|
1703
|
+
'NaT': 'NaT',
|
1704
|
+
'None': 'None',
|
1705
|
+
'': 'Empty'})
|
1545
1706
|
|
1546
1707
|
if limit is not None:
|
1547
1708
|
frequency = frequency.nlargest(limit)
|
1548
1709
|
|
1549
1710
|
sorted_frequency = sort_frequency(frequency, order_by)
|
1550
|
-
col_report = {str(value): str(count)
|
1711
|
+
col_report = {str(value): str(count)
|
1712
|
+
for value, count in sorted_frequency.items()}
|
1551
1713
|
report[current_col] = col_report
|
1552
1714
|
|
1553
1715
|
return report
|
@@ -1556,17 +1718,27 @@ def print_n_frequency_linear(df: pd.DataFrame, n: int, columns: str, order_by: s
|
|
1556
1718
|
if order_by == "ASC":
|
1557
1719
|
return dict(sorted(frequency.items(), key=lambda item: item[0]))
|
1558
1720
|
elif order_by == "DESC":
|
1559
|
-
return dict(
|
1721
|
+
return dict(
|
1722
|
+
sorted(
|
1723
|
+
frequency.items(),
|
1724
|
+
key=lambda item: item[0],
|
1725
|
+
reverse=True))
|
1560
1726
|
elif order_by == "FREQ_ASC":
|
1561
1727
|
return dict(sorted(frequency.items(), key=lambda item: item[1]))
|
1562
1728
|
else: # Default to "FREQ_DESC"
|
1563
|
-
return dict(
|
1729
|
+
return dict(
|
1730
|
+
sorted(
|
1731
|
+
frequency.items(),
|
1732
|
+
key=lambda item: item[1],
|
1733
|
+
reverse=True))
|
1564
1734
|
|
1565
1735
|
report = generate_linear_report(df, columns, n, order_by)
|
1566
1736
|
print(json.dumps(report, indent=2))
|
1567
1737
|
|
1568
1738
|
|
1569
|
-
def retain_columns(
|
1739
|
+
def retain_columns(
|
1740
|
+
df: pd.DataFrame,
|
1741
|
+
columns_to_retain: List[str]) -> pd.DataFrame:
|
1570
1742
|
"""
|
1571
1743
|
Retain specified columns in the DataFrame and drop the others.
|
1572
1744
|
|
@@ -1582,7 +1754,10 @@ def retain_columns(df: pd.DataFrame, columns_to_retain: List[str]) -> pd.DataFra
|
|
1582
1754
|
return df[columns_to_retain]
|
1583
1755
|
|
1584
1756
|
|
1585
|
-
def mask_against_dataframe(
|
1757
|
+
def mask_against_dataframe(
|
1758
|
+
df: pd.DataFrame,
|
1759
|
+
other_df: pd.DataFrame,
|
1760
|
+
column_name: str) -> pd.DataFrame:
|
1586
1761
|
"""
|
1587
1762
|
Retain only rows with common column values between two DataFrames.
|
1588
1763
|
|
@@ -1599,7 +1774,10 @@ def mask_against_dataframe(df: pd.DataFrame, other_df: pd.DataFrame, column_name
|
|
1599
1774
|
return df[df[column_name].isin(other_df[column_name])]
|
1600
1775
|
|
1601
1776
|
|
1602
|
-
def mask_against_dataframe_converse(
|
1777
|
+
def mask_against_dataframe_converse(
|
1778
|
+
df: pd.DataFrame,
|
1779
|
+
other_df: pd.DataFrame,
|
1780
|
+
column_name: str) -> pd.DataFrame:
|
1603
1781
|
"""
|
1604
1782
|
Retain only rows with uncommon column values between two DataFrames.
|
1605
1783
|
|
@@ -1633,7 +1811,8 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1633
1811
|
ValueError: If the DataFrames do not have the same columns.
|
1634
1812
|
"""
|
1635
1813
|
if set(df1.columns) != set(df2.columns):
|
1636
|
-
raise ValueError(
|
1814
|
+
raise ValueError(
|
1815
|
+
"Both DataFrames must have the same columns for a union join")
|
1637
1816
|
|
1638
1817
|
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1639
1818
|
return result_df
|
@@ -1654,13 +1833,18 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1654
1833
|
ValueError: If the DataFrames do not have the same columns.
|
1655
1834
|
"""
|
1656
1835
|
if set(df1.columns) != set(df2.columns):
|
1657
|
-
raise ValueError(
|
1836
|
+
raise ValueError(
|
1837
|
+
"Both DataFrames must have the same columns for a bag union join")
|
1658
1838
|
|
1659
1839
|
result_df = pd.concat([df1, df2], ignore_index=True)
|
1660
1840
|
return result_df
|
1661
1841
|
|
1662
1842
|
|
1663
|
-
def left_join(
|
1843
|
+
def left_join(
|
1844
|
+
df1: pd.DataFrame,
|
1845
|
+
df2: pd.DataFrame,
|
1846
|
+
left_on: str,
|
1847
|
+
right_on: str) -> pd.DataFrame:
|
1664
1848
|
"""
|
1665
1849
|
Perform a left join on two DataFrames.
|
1666
1850
|
|
@@ -1676,7 +1860,11 @@ def left_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str)
|
|
1676
1860
|
return df1.merge(df2, how='left', left_on=left_on, right_on=right_on)
|
1677
1861
|
|
1678
1862
|
|
1679
|
-
def right_join(
|
1863
|
+
def right_join(
|
1864
|
+
df1: pd.DataFrame,
|
1865
|
+
df2: pd.DataFrame,
|
1866
|
+
left_on: str,
|
1867
|
+
right_on: str) -> pd.DataFrame:
|
1680
1868
|
"""
|
1681
1869
|
Perform a right join on two DataFrames.
|
1682
1870
|
|
@@ -1692,7 +1880,72 @@ def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str
|
|
1692
1880
|
return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
|
1693
1881
|
|
1694
1882
|
|
1695
|
-
def
|
1883
|
+
def insert_dataframe_in_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
|
1884
|
+
"""
|
1885
|
+
Inserts a Pandas DataFrame into a SQLite database table.
|
1886
|
+
|
1887
|
+
Parameters:
|
1888
|
+
db_path: str
|
1889
|
+
The file path to the SQLite database. If the database does not exist,
|
1890
|
+
it will be created.
|
1891
|
+
|
1892
|
+
tablename: str
|
1893
|
+
The name of the table where the data will be inserted. If the table does
|
1894
|
+
not exist, it will be created based on the DataFrame's columns and types.
|
1895
|
+
|
1896
|
+
df: pd.DataFrame
|
1897
|
+
The DataFrame containing the data to be inserted into the database.
|
1898
|
+
|
1899
|
+
Functionality:
|
1900
|
+
- Checks if the specified table exists in the database.
|
1901
|
+
- Creates the table with appropriate column types if it doesn't exist.
|
1902
|
+
- Inserts the DataFrame's data into the table, appending to any existing data.
|
1903
|
+
|
1904
|
+
Data Type Mapping:
|
1905
|
+
- Converts Pandas data types to SQLite types: 'int64' to 'INTEGER',
|
1906
|
+
'float64' to 'REAL', 'object' to 'TEXT', 'datetime64[ns]' to 'TEXT',
|
1907
|
+
and 'bool' to 'INTEGER'.
|
1908
|
+
|
1909
|
+
Returns:
|
1910
|
+
None
|
1911
|
+
"""
|
1912
|
+
|
1913
|
+
def table_exists(cursor, table_name):
|
1914
|
+
cursor.execute(
|
1915
|
+
f"SELECT count(name) FROM sqlite_master WHERE type='table' AND name='{table_name}'")
|
1916
|
+
return cursor.fetchone()[0] == 1
|
1917
|
+
|
1918
|
+
dtype_mapping = {
|
1919
|
+
'int64': 'INTEGER',
|
1920
|
+
'float64': 'REAL',
|
1921
|
+
'object': 'TEXT',
|
1922
|
+
'datetime64[ns]': 'TEXT',
|
1923
|
+
'bool': 'INTEGER',
|
1924
|
+
}
|
1925
|
+
|
1926
|
+
def map_dtype(dtype):
|
1927
|
+
return dtype_mapping.get(str(dtype), 'TEXT')
|
1928
|
+
|
1929
|
+
with sqlite3.connect(db_path) as conn:
|
1930
|
+
cursor = conn.cursor()
|
1931
|
+
|
1932
|
+
if not table_exists(cursor, tablename):
|
1933
|
+
columns_with_types = ', '.join(
|
1934
|
+
f'"{col}" {
|
1935
|
+
map_dtype(dtype)}' for col,
|
1936
|
+
dtype in zip(
|
1937
|
+
df.columns,
|
1938
|
+
df.dtypes))
|
1939
|
+
create_table_query = f'CREATE TABLE "{tablename}" ({columns_with_types})'
|
1940
|
+
conn.execute(create_table_query)
|
1941
|
+
|
1942
|
+
df.to_sql(tablename, conn, if_exists='append', index=False)
|
1943
|
+
|
1944
|
+
|
1945
|
+
def sync_dataframe_to_sqlite_database(
|
1946
|
+
db_path: str,
|
1947
|
+
tablename: str,
|
1948
|
+
df: pd.DataFrame) -> None:
|
1696
1949
|
"""
|
1697
1950
|
Processes and saves a DataFrame to an SQLite database, adding a timestamp column
|
1698
1951
|
and replacing the existing table if needed. Creates the table if it does not exist.
|
@@ -1702,6 +1955,10 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
|
|
1702
1955
|
- tablename (str): The name of the table in the database.
|
1703
1956
|
- df (pd.DataFrame): The DataFrame to be processed and saved.
|
1704
1957
|
"""
|
1958
|
+
# Helper function to map pandas dtype to SQLite type
|
1959
|
+
def map_dtype(dtype):
|
1960
|
+
return dtype_mapping.get(str(dtype), 'TEXT')
|
1961
|
+
|
1705
1962
|
# Step 1: Add a timestamp column to the dataframe
|
1706
1963
|
df['rgwfuncs_sync_timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
1707
1964
|
|
@@ -1714,10 +1971,6 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
|
|
1714
1971
|
'bool': 'INTEGER', # SQLite does not have a separate Boolean storage class
|
1715
1972
|
}
|
1716
1973
|
|
1717
|
-
# Helper function to map pandas dtype to SQLite type
|
1718
|
-
def map_dtype(dtype):
|
1719
|
-
return dtype_mapping.get(str(dtype), 'TEXT')
|
1720
|
-
|
1721
1974
|
# Step 2: Save df in SQLite3 db as '{tablename}_new'
|
1722
1975
|
with sqlite3.connect(db_path) as conn:
|
1723
1976
|
new_table_name = f"{tablename}_new"
|
@@ -1728,8 +1981,11 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
|
|
1728
1981
|
if cursor.fetchall() == []: # Table does not exist
|
1729
1982
|
# Create a table using the DataFrame's column names and types
|
1730
1983
|
columns_with_types = ', '.join(
|
1731
|
-
f'"{col}" {
|
1732
|
-
|
1984
|
+
f'"{col}" {
|
1985
|
+
map_dtype(dtype)}' for col,
|
1986
|
+
dtype in zip(
|
1987
|
+
df.columns,
|
1988
|
+
df.dtypes))
|
1733
1989
|
create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
|
1734
1990
|
conn.execute(create_table_query)
|
1735
1991
|
|
rgwfuncs/str_lib.py
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import requests
|
4
|
+
from typing import Tuple
|
5
|
+
|
6
|
+
def send_telegram_message(preset_name: str, message: str) -> None:
|
7
|
+
"""Send a Telegram message using the specified preset.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
preset_name (str): The name of the preset to use for sending the message.
|
11
|
+
message (str): The message to send.
|
12
|
+
|
13
|
+
Raises:
|
14
|
+
RuntimeError: If the preset is not found or necessary details are missing.
|
15
|
+
"""
|
16
|
+
|
17
|
+
# Set the config path to ~/.rgwfuncsrc
|
18
|
+
config_path = os.path.expanduser("~/.rgwfuncsrc")
|
19
|
+
|
20
|
+
def load_config() -> dict:
|
21
|
+
"""Load the configuration from the .rgwfuncsrc file."""
|
22
|
+
with open(config_path, 'r') as file:
|
23
|
+
return json.load(file)
|
24
|
+
|
25
|
+
def get_telegram_preset(config: dict, preset_name: str) -> dict:
|
26
|
+
"""Get the Telegram preset configuration."""
|
27
|
+
presets = config.get("telegram_bot_presets", [])
|
28
|
+
for preset in presets:
|
29
|
+
if preset.get("name") == preset_name:
|
30
|
+
return preset
|
31
|
+
return None
|
32
|
+
|
33
|
+
def get_telegram_bot_details(config: dict, preset_name: str) -> Tuple[str, str]:
|
34
|
+
"""Retrieve the Telegram bot token and chat ID from the preset."""
|
35
|
+
preset = get_telegram_preset(config, preset_name)
|
36
|
+
if not preset:
|
37
|
+
raise RuntimeError(f"Telegram bot preset '{preset_name}' not found in the configuration file")
|
38
|
+
|
39
|
+
bot_token = preset.get("bot_token")
|
40
|
+
chat_id = preset.get("chat_id")
|
41
|
+
|
42
|
+
if not bot_token or not chat_id:
|
43
|
+
raise RuntimeError(
|
44
|
+
f"Telegram bot token or chat ID for '{preset_name}' not found in the configuration file"
|
45
|
+
)
|
46
|
+
|
47
|
+
return bot_token, chat_id
|
48
|
+
|
49
|
+
# Load the configuration
|
50
|
+
config = load_config()
|
51
|
+
|
52
|
+
# Get bot details from the configuration
|
53
|
+
bot_token, chat_id = get_telegram_bot_details(config, preset_name)
|
54
|
+
|
55
|
+
# Prepare the request
|
56
|
+
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
|
57
|
+
payload = {"chat_id": chat_id, "text": message}
|
58
|
+
|
59
|
+
# Send the message
|
60
|
+
response = requests.post(url, json=payload)
|
61
|
+
response.raise_for_status()
|
62
|
+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.18
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -40,9 +40,9 @@ Install the package using:
|
|
40
40
|
|
41
41
|
--------------------------------------------------------------------------------
|
42
42
|
|
43
|
-
## Create a `
|
43
|
+
## Create a `.rgwfuncsrc` File
|
44
44
|
|
45
|
-
A `
|
45
|
+
A `.rgwfuncsrc` file (located at `vi ~/.rgwfuncsrc) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
|
46
46
|
|
47
47
|
{
|
48
48
|
"db_presets" : [
|
@@ -381,28 +381,30 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
|
|
381
381
|
--------------------------------------------------------------------------------
|
382
382
|
|
383
383
|
### 12. `load_data_from_query`
|
384
|
+
|
384
385
|
Load data from a database query into a DataFrame based on a configuration preset.
|
385
386
|
|
386
|
-
|
387
|
-
- `db_preset_name` (str): Name of the database preset in the
|
388
|
-
- query (str): The SQL query to execute.
|
389
|
-
- `config_file_name` (str): Name of the configuration file (default: "rgwml.config").
|
387
|
+
- **Parameters:**
|
388
|
+
- `db_preset_name` (str): Name of the database preset in the configuration file.
|
389
|
+
- `query` (str): The SQL query to execute.
|
390
390
|
|
391
|
-
|
392
|
-
- pd.DataFrame
|
391
|
+
- **Returns:**
|
392
|
+
- `pd.DataFrame`: A DataFrame containing the query result.
|
393
393
|
|
394
|
-
|
395
|
-
|
396
|
-
from rgwfuncs import load_data_from_query
|
394
|
+
- **Notes:**
|
395
|
+
- The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
397
396
|
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
397
|
+
- **Example:**
|
398
|
+
|
399
|
+
from rgwfuncs import load_data_from_query
|
400
|
+
|
401
|
+
df = load_data_from_query(
|
402
|
+
db_preset_name="MyDBPreset",
|
403
|
+
query="SELECT * FROM my_table"
|
404
|
+
)
|
405
|
+
print(df)
|
405
406
|
|
407
|
+
|
406
408
|
--------------------------------------------------------------------------------
|
407
409
|
|
408
410
|
### 13. `load_data_from_path`
|
@@ -1148,10 +1150,47 @@ Perform a right join on two DataFrames.
|
|
1148
1150
|
df_right_join = right_join(df1, df2, 'ID', 'ID')
|
1149
1151
|
print(df_right_join)
|
1150
1152
|
|
1153
|
+
--------------------------------------------------------------------------------
|
1154
|
+
|
1155
|
+
### 45. `insert_dataframe_in_sqlite_database`
|
1156
|
+
|
1157
|
+
Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
|
1158
|
+
|
1159
|
+
- **Parameters:**
|
1160
|
+
- `db_path` (str): The path to the SQLite database file. If the database does not exist, it will be created.
|
1161
|
+
- `tablename` (str): The name of the table in the database. If the table does not exist, it is created with the DataFrame's columns and data types.
|
1162
|
+
- `df` (pd.DataFrame): The DataFrame containing the data to be inserted into the database table.
|
1163
|
+
|
1164
|
+
- **Returns:**
|
1165
|
+
- `None`
|
1166
|
+
|
1167
|
+
- **Notes:**
|
1168
|
+
- Data types in the DataFrame are converted to SQLite-compatible types:
|
1169
|
+
- `int64` is mapped to `INTEGER`
|
1170
|
+
- `float64` is mapped to `REAL`
|
1171
|
+
- `object` is mapped to `TEXT`
|
1172
|
+
- `datetime64[ns]` is mapped to `TEXT` (dates are stored as text)
|
1173
|
+
- `bool` is mapped to `INTEGER` (SQLite does not have a separate Boolean type)
|
1174
|
+
|
1175
|
+
- **Example:**
|
1176
|
+
|
1177
|
+
from rgwfuncs import insert_dataframe_in_sqlite_database
|
1178
|
+
import pandas as pd
|
1179
|
+
|
1180
|
+
df = pd.DataFrame({
|
1181
|
+
'ID': [1, 2, 3],
|
1182
|
+
'Name': ['Alice', 'Bob', 'Charlie'],
|
1183
|
+
'Score': [88.5, 92.3, 85.0]
|
1184
|
+
})
|
1185
|
+
|
1186
|
+
db_path = 'my_database.db'
|
1187
|
+
tablename = 'students'
|
1188
|
+
|
1189
|
+
insert_dataframe_in_sqlite_database(db_path, tablename, df)
|
1151
1190
|
|
1152
1191
|
--------------------------------------------------------------------------------
|
1153
1192
|
|
1154
|
-
###
|
1193
|
+
### 46. `sync_dataframe_to_sqlite_database`
|
1155
1194
|
Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
|
1156
1195
|
|
1157
1196
|
• Parameters:
|
@@ -0,0 +1,9 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=XqJ8TJuc4HkQq3T5Gzjf3KTBsdJtyi2NKXBgbPuDn0Y,1156
|
2
|
+
rgwfuncs/df_lib.py,sha256=rY1yVvY04uqR174JwYBFiRnujekr9mbe258wmu9OeeY,67148
|
3
|
+
rgwfuncs/str_lib.py,sha256=6v9AXZ5wWsWVEcvcIz0B1rTmsvYaD-v53r2sYPcV4pU,2109
|
4
|
+
rgwfuncs-0.0.18.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
5
|
+
rgwfuncs-0.0.18.dist-info/METADATA,sha256=GfMK-J1vH4CG_fQqQAWwAvDE6JcSqNrKuNKvfOUKV_E,33442
|
6
|
+
rgwfuncs-0.0.18.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
7
|
+
rgwfuncs-0.0.18.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
8
|
+
rgwfuncs-0.0.18.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
9
|
+
rgwfuncs-0.0.18.dist-info/RECORD,,
|
rgwfuncs-0.0.16.dist-info/RECORD
DELETED
@@ -1,8 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
|
2
|
-
rgwfuncs/df_lib.py,sha256=OZPI7M35mbue6YsieWmlzjM5RUkaow0v0d3P-V71L6o,63034
|
3
|
-
rgwfuncs-0.0.16.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
4
|
-
rgwfuncs-0.0.16.dist-info/METADATA,sha256=oKTScVPzrgTTWdCQ7vxEdKYRnc-S_90hKwefifayeDU,32059
|
5
|
-
rgwfuncs-0.0.16.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
-
rgwfuncs-0.0.16.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
7
|
-
rgwfuncs-0.0.16.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
8
|
-
rgwfuncs-0.0.16.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|