PyPI - rgwfuncs - Versions diffs - 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl - Mend

rgwfuncs 0.0.16py3-none-any.whl → 0.0.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

rgwfuncs/__init__.py +2 -1
rgwfuncs/df_lib.py +414 -158
rgwfuncs/str_lib.py +62 -0
{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/METADATA +59 -20
rgwfuncs-0.0.18.dist-info/RECORD +9 -0
rgwfuncs-0.0.16.dist-info/RECORD +0 -8
{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/LICENSE +0 -0
{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/WHEEL +0 -0
{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/entry_points.txt +0 -0
{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/top_level.txt +0 -0

rgwfuncs/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # This file is automatically generated
 # Dynamically importing functions from modules
-from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
+from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
+from .str_lib import send_telegram_message

rgwfuncs/df_lib.py CHANGED Viewed

@@ -28,6 +28,7 @@ import warnings
 # Suppress all FutureWarnings
 warnings.filterwarnings("ignore", category=FutureWarning)
 def docs(method_type_filter: Optional[str] = None) -> None:
     """
     Print a list of function names in alphabetical order. If method_type_filter
@@ -65,7 +66,11 @@ def docs(method_type_filter: Optional[str] = None) -> None:
                     print(f"\n{name}:\n{docstring}")
-def numeric_clean(df: pd.DataFrame, column_names: str, column_type: str, irregular_value_treatment: str) -> pd.DataFrame:
+def numeric_clean(
+        df: pd.DataFrame,
+        column_names: str,
+        column_type: str,
+        irregular_value_treatment: str) -> pd.DataFrame:
     """
     Cleans the numeric columns based on specified treatments.
@@ -296,7 +301,9 @@ def drop_duplicates(df: pd.DataFrame) -> pd.DataFrame:
     return df.drop_duplicates(keep='first')
-def drop_duplicates_retain_first(df: pd.DataFrame, columns: Optional[str] = None) -> pd.DataFrame:
+def drop_duplicates_retain_first(
+        df: pd.DataFrame,
+        columns: Optional[str] = None) -> pd.DataFrame:
     """
     Drop duplicate rows in the DataFrame based on specified columns, retaining the first occurrence.
@@ -318,7 +325,9 @@ def drop_duplicates_retain_first(df: pd.DataFrame, columns: Optional[str] = None
     return df.drop_duplicates(subset=columns_list, keep='first')
-def drop_duplicates_retain_last(df: pd.DataFrame, columns: Optional[str] = None) -> pd.DataFrame:
+def drop_duplicates_retain_last(
+        df: pd.DataFrame,
+        columns: Optional[str] = None) -> pd.DataFrame:
     """
     Drop duplicate rows in the DataFrame based on specified columns, retaining the last occurrence.
@@ -335,20 +344,18 @@ def drop_duplicates_retain_last(df: pd.DataFrame, columns: Optional[str] = None)
     if df is None:
         raise ValueError("DataFrame is not initialized.")
-    columns_list = [col.strip() for col in columns.split(',')] if columns else None
+    columns_list = [col.strip()
+                    for col in columns.split(',')] if columns else None
     return df.drop_duplicates(subset=columns_list, keep='last')
-def load_data_from_query(db_preset_name: str, query: str, config_file_name: str = "rgwml.config") -> pd.DataFrame:
+def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
     """
-    Load data from a database query into a DataFrame based on a configuration
-    preset.
+    Load data from a database query into a DataFrame based on a configuration preset.
     Parameters:
         db_preset_name: The name of the database preset in the configuration file.
         query: The SQL query to execute.
-        config_file_name: Name of the configuration file
-        (default: 'rgwml.config').
     Returns:
         A DataFrame containing the query result.
@@ -358,17 +365,6 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
         ValueError: If the database preset or db_type is invalid.
     """
-    def locate_config_file(filename: str = config_file_name) -> str:
-        home_dir = os.path.expanduser("~")
-        search_paths = [os.path.join(home_dir, "Desktop"), os.path.join(home_dir, "Documents"), os.path.join(home_dir, "Downloads"),]
-        for path in search_paths:
-            for root, dirs, files in os.walk(path):
-                if filename in files:
-                    return os.path.join(root, filename)
-        raise FileNotFoundError(
-            f"{filename} not found in Desktop, Documents, or Downloads folders")
     def query_mssql(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
         server = db_preset['host']
         user = db_preset['username']
@@ -393,12 +389,13 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
             with conn.cursor() as cursor:
                 cursor.execute(query)
                 rows = cursor.fetchall()
-                columns = ([desc[0] for desc in cursor.description] if cursor.description else [])
+                columns = ([desc[0] for desc in cursor.description]
+                           if cursor.description else [])
         return pd.DataFrame(rows, columns=columns)
-    def query_clickhouse(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
+    def query_clickhouse(
+            db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
         host = db_preset['host']
         user = db_preset['username']
         password = db_preset['password']
@@ -409,7 +406,8 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
         for attempt in range(max_retries):
             try:
-                client = clickhouse_connect.get_client(host=host, port='8123', username=user, password=password, database=database)
+                client = clickhouse_connect.get_client(
+                    host=host, port='8123', username=user, password=password, database=database)
                 data = client.query(query)
                 rows = data.result_rows
                 columns = data.column_names
@@ -423,11 +421,13 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
                     raise ConnectionError(
                         "All attempts to connect to ClickHouse failed.")
-    def query_google_big_query(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
+    def query_google_big_query(
+            db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
         json_file_path = db_preset['json_file_path']
         project_id = db_preset['project_id']
-        credentials = service_account.Credentials.from_service_account_file(json_file_path)
+        credentials = service_account.Credentials.from_service_account_file(
+            json_file_path)
         client = bigquery.Client(credentials=credentials, project=project_id)
         query_job = client.query(query)
@@ -437,13 +437,15 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
         return pd.DataFrame(rows, columns=columns)
-    # Read the configuration file to get the database preset
-    config_path = locate_config_file()
+    # Assume the configuration file is located at ~/.rgwfuncsrc
+    config_path = os.path.expanduser('~/.rgwfuncsrc')
     with open(config_path, 'r') as f:
         config = json.load(f)
     db_presets = config.get('db_presets', [])
-    db_preset = next((preset for preset in db_presets if preset['name'] == db_preset_name), None)
+    db_preset = next(
+        (preset for preset in db_presets if preset['name'] == db_preset_name),
+        None)
     if not db_preset:
         raise ValueError(f"No matching db_preset found for {db_preset_name}")
@@ -621,10 +623,20 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
         for column in columns:
             if column in df.columns:
                 frequency = df[column].astype(str).value_counts(dropna=False)
-                frequency = frequency.rename(index={'nan': 'NaN', 'NaT': 'NaT', 'None': 'None', '': 'Empty'})
+                frequency = frequency.rename(
+                    index={
+                        'nan': 'NaN',
+                        'NaT': 'NaT',
+                        'None': 'None',
+                        '': 'Empty'})
                 top_n_values = frequency.nlargest(n)
-                report[column] = {str(value): str(count) for value, count in top_n_values.items()}
-                print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
+                report[column] = {str(value): str(count)
+                                  for value, count in top_n_values.items()}
+                print(
+                    f"Top {n} unique values for column '{column}':\n{
+                        json.dumps(
+                            report[column],
+                            indent=2)}\n")
             else:
                 print(f"Column '{column}' does not exist in the DataFrame.")
     else:
@@ -634,7 +646,10 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
     gc.collect()
-def bottom_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
+def bottom_n_unique_values(
+        df: pd.DataFrame,
+        n: int,
+        columns: List[str]) -> None:
     """
     Print the bottom `n` unique values for specified columns in the DataFrame.
@@ -654,12 +669,21 @@ def bottom_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None
         for column in columns:
             if column in df.columns:
                 frequency = df[column].astype(str).value_counts(dropna=False)
-                frequency = frequency.rename(index={'nan': 'NaN', 'NaT': 'NaT', 'None': 'None', '': 'Empty'})
+                frequency = frequency.rename(
+                    index={
+                        'nan': 'NaN',
+                        'NaT': 'NaT',
+                        'None': 'None',
+                        '': 'Empty'})
                 bottom_n_values = frequency.nsmallest(n)
                 report[column] = {
                     str(value): str(count) for value,
                     count in bottom_n_values.items()}
-                print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
+                print(
+                    f"Bottom {n} unique values for column '{column}':\n{
+                        json.dumps(
+                            report[column],
+                            indent=2)}\n")
             else:
                 print(f"Column '{column}' does not exist in the DataFrame.")
     else:
@@ -669,7 +693,8 @@ def bottom_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None
     gc.collect()
-def print_correlation(df: pd.DataFrame, column_pairs: List[Tuple[str, str]]) -> None:
+def print_correlation(
+        df: pd.DataFrame, column_pairs: List[Tuple[str, str]]) -> None:
     """
     Print correlation for multiple pairs of columns in the DataFrame.
@@ -688,13 +713,16 @@ def print_correlation(df: pd.DataFrame, column_pairs: List[Tuple[str, str]]) ->
                     correlation = numeric_col1.corr(numeric_col2)
                     if pd.notnull(correlation):
-                        print(f"The correlation between '{col1}' and '{col2}' is {correlation}.")
+                        print(
+                            f"The correlation between '{col1}' and '{col2}' is {correlation}.")
                     else:
-                        print(f"Cannot calculate correlation between '{col1}' and '{col2}' due to insufficient numeric data.")
+                        print(
+                            f"Cannot calculate correlation between '{col1}' and '{col2}' due to insufficient numeric data.")
                 except Exception as e:
                     print(f"Error processing cols '{col1}' and '{col2}': {e}")
             else:
-                print(f"One or both of the specified cols ('{col1}', '{col2}') do not exist in the DataFrame.")
+                print(
+                    f"One or both of the specified cols ('{col1}', '{col2}') do not exist in the DataFrame.")
     else:
         print("The DataFrame is empty.")
@@ -714,7 +742,8 @@ def print_memory_usage(df: pd.DataFrame) -> None:
     - ValueError: If the DataFrame is `None`.
     """
     if df is not None:
-        memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024)  # Convert bytes to MB
+        memory_usage = df.memory_usage(deep=True).sum(
+        ) / (1024 * 1024)  # Convert bytes to MB
         print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
     else:
         raise ValueError("No DataFrame to print. Please provide a DataFrame.")
@@ -795,7 +824,8 @@ def print_dataframe(df: pd.DataFrame, source: Optional[str] = None) -> None:
     """
     if df is not None:
         print(df)
-        columns_with_types = [f"{col} ({df[col].dtypes})" for col in df.columns]
+        columns_with_types = [
+            f"{col} ({df[col].dtypes})" for col in df.columns]
         print("Columns:", columns_with_types)
         if source:
             print(f"Source: {source}")
@@ -811,48 +841,53 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
     Parameters:
         df: The DataFrame to send.
-        bot_name: The name of the Telegram bot as specified in the configuration.
-        message: Custom message to send along with the DataFrame or file.
-        as_file: Boolean flag to decide whether to send the DataFrame as a file or as text.
-        remove_after_send: If True, removes the file after sending.
-    """
+        bot_name: The name of the Telegram bot as specified in the configuration file.
+        message: Custom message to send along with the DataFrame or file. Defaults to None.
+        as_file: Boolean flag to indicate whether the DataFrame should be sent as a file (True) or as text (False). Defaults to True.
+        remove_after_send: If True, removes the CSV file after sending. Defaults to True.
-    def locate_config_file(filename: str = "rgwml.config") -> str:
-        """Retrieve the configuration file path."""
-        home_dir = os.path.expanduser("~")
-        search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
+    Raises:
+        ValueError: If the specified bot is not found or if no DataFrame is provided.
+        Exception: If the message sending fails.
-        for path in search_paths:
-            for root, _, files in os.walk(path):
-                if filename in files:
-                    return os.path.join(root, filename)
-        raise FileNotFoundError(
-            f"{filename} not found in Desktop, Documents, or Downloads")
+    Notes:
+        The configuration file is assumed to be located at `~/.rgwfuncsrc`.
+    """
     def get_config(config_path: str) -> dict:
-        """Load configuration from a json file."""
+        """Load configuration from a JSON file."""
         with open(config_path, 'r') as file:
             return json.load(file)
-    config_path = locate_config_file()
+    # Assume the configuration file is located at ~/.rgwfuncsrc
+    config_path = os.path.expanduser('~/.rgwfuncsrc')
     config = get_config(config_path)
-    bot_config = next((bot for bot in config['telegram_bot_presets'] if bot['name'] == bot_name), None)
+    bot_config = next(
+        (bot for bot in config['telegram_bot_presets'] if bot['name'] == bot_name),
+        None)
     if not bot_config:
         raise ValueError(f"No bot found with the name {bot_name}")
     if df is None:
         raise ValueError("No DataFrame to send. Please provide a DataFrame.")
+    response = None
     if as_file:
         timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
         file_name = f"df_{timestamp}.csv"
         df.to_csv(file_name, index=False)
         try:
             with open(file_name, 'rb') as file:
-                payload = {'chat_id': bot_config['chat_id'], 'caption': message or ''}
+                payload = {
+                    'chat_id': bot_config['chat_id'],
+                    'caption': message or ''}
                 files = {'document': file}
-                response = requests.post(f"https://api.telegram.org/bot{bot_config['bot_token']}/sendDocument", data=payload, files=files)
+                response = requests.post(
+                    f"https://api.telegram.org/bot{
+                        bot_config['bot_token']}/sendDocument",
+                    data=payload,
+                    files=files)
             if remove_after_send and os.path.exists(file_name):
                 os.remove(file_name)
         except Exception as e:
@@ -862,40 +897,45 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
         df_str = df.to_string()
         payload = {
             'chat_id': bot_config['chat_id'],
-            'text': message + "\n\n" + df_str if message else df_str,
-            'parse_mode': 'HTML'}
-        response = requests.post(f"https://api.telegram.org/bot{bot_config['bot_token']}/sendMessage", data=payload)
+            'text': (message + "\n\n" + df_str) if message else df_str,
+            'parse_mode': 'HTML'
+        }
+        response = requests.post(
+            f"https://api.telegram.org/bot{bot_config['bot_token']}/sendMessage", data=payload)
-    if not response.ok:
+    if response and not response.ok:
         raise Exception(f"Error sending message: {response.text}")
     print("Message sent successfully.")
-def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subject: Optional[str] = None, body: Optional[str] = None, as_file: bool = True, remove_after_send: bool = True) -> None:
+def send_data_to_email(
+        df: pd.DataFrame,
+        preset_name: str,
+        to_email: str,
+        subject: Optional[str] = None,
+        body: Optional[str] = None,
+        as_file: bool = True,
+        remove_after_send: bool = True) -> None:
     """
-    Send an email with optional DataFrame attachment using Gmail API via a specified preset.
+    Send an email with an optional DataFrame attachment using the Gmail API via a specified preset.
     Parameters:
         df: The DataFrame to send.
         preset_name: The configuration preset name to use for sending the email.
         to_email: The recipient email address.
-        subject: Optional subject of the email.
-        body: Optional message body of the email.
-        as_file: Boolean flag to decide whether to send the DataFrame as a file.
-        remove_after_send: If True, removes the CSV file after sending.
-    """
+        subject: Optional subject of the email. Defaults to 'DataFrame CSV File' if not given.
+        body: Optional message body of the email. Defaults to 'Please find the CSV file attached.' if not given.
+        as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or embed it in the email (False). Defaults to True.
+        remove_after_send: If True, removes the CSV file after sending. Defaults to True.
-    def locate_config_file(filename: str = "rgwml.config") -> str:
-        """Locate config file in common user directories."""
-        home_dir = os.path.expanduser("~")
-        search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
+    Raises:
+        ValueError: If the preset is not found in the configuration.
+        Exception: If the email preparation or sending fails.
-        for path in search_paths:
-            for root, _, files in os.walk(path):
-                if filename in files:
-                    return os.path.join(root, filename)
-        raise FileNotFoundError(f"{filename} not found in Desktop, Documents, or Downloads folders")
+    Notes:
+        The configuration file is assumed to be located at `~/.rgwfuncsrc`.
+    """
     def get_config(config_path: str) -> dict:
         with open(config_path, 'r') as file:
@@ -914,12 +954,14 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
         )
         return build('gmail', 'v1', credentials=credentials)
-    # Load configuration
-    config_path = locate_config_file()
+    # Load configuration from ~/.rgwfuncsrc
+    config_path = os.path.expanduser('~/.rgwfuncsrc')
     config = get_config(config_path)
     # Retrieve Gmail preset configuration
-    gmail_config = next((preset for preset in config['gmail_bot_presets'] if preset['name'] == preset_name), None)
+    gmail_config = next(
+        (preset for preset in config['gmail_bot_presets'] if preset['name'] == preset_name),
+        None)
     if not gmail_config:
         raise ValueError(f"No preset found with the name {preset_name}")
@@ -942,13 +984,18 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
             message['to'] = to_email
             message['from'] = sender_email
             message['subject'] = subject if subject else 'DataFrame CSV File'
-            message.attach(MIMEText(body if body else 'Please find the CSV file attached.'))
+            message.attach(
+                MIMEText(
+                    body if body else 'Please find the CSV file attached.'))
             with open(tmp_file_name, 'rb') as file:
                 part = MIMEBase('application', 'octet-stream')
                 part.set_payload(file.read())
                 encoders.encode_base64(part)
-                part.add_header('Content-Disposition', f'attachment; filename={os.path.basename(tmp_file_name)}')
+                part.add_header(
+                    'Content-Disposition',
+                    f'attachment; filename={
+                        os.path.basename(tmp_file_name)}')
                 message.attach(part)
             if remove_after_send and os.path.exists(tmp_file_name):
@@ -970,46 +1017,49 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
     try:
         raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
         email_body = {'raw': raw}
-        sent_message = service.users().messages().send(userId="me", body=email_body).execute()
+        sent_message = service.users().messages().send(
+            userId="me", body=email_body).execute()
         print(f"Email with Message Id {sent_message['id']} successfully sent.")
     except Exception as error:
         raise Exception(f"Error sending email: {error}")
-def send_data_to_slack(df: pd.DataFrame, bot_name: str, message: Optional[str] = None, as_file: bool = True, remove_after_send: bool = True) -> None:
+def send_data_to_slack(
+        df: pd.DataFrame,
+        bot_name: str,
+        message: Optional[str] = None,
+        as_file: bool = True,
+        remove_after_send: bool = True) -> None:
     """
     Send a DataFrame or message to Slack using a specified bot configuration.
     Parameters:
         df: The DataFrame to send.
         bot_name: The Slack bot configuration preset name.
-        message: Custom message to send along with the DataFrame or file.
-        as_file: Boolean flag to decide whether to send the DataFrame as a file.
-        remove_after_send: If True, removes the CSV file after sending.
-    """
+        message: Custom message to send along with the DataFrame or file. Defaults to None.
+        as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or as text (False). Defaults to True.
+        remove_after_send: If True, removes the CSV file after sending. Defaults to True.
-    def locate_config_file(filename: str = "rgwml.config") -> str:
-        """Locate config file in common user directories."""
-        home_dir = os.path.expanduser("~")
-        search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
+    Raises:
+        ValueError: If the specified bot is not found in the configuration.
+        Exception: If the message sending fails.
-        for path in search_paths:
-            for root, _, files in os.walk(path):
-                if filename in files:
-                    return os.path.join(root, filename)
-        raise FileNotFoundError(
-            f"{filename} not found in Desktop, Documents, or Downloads folders")
+    Notes:
+        The configuration file is assumed to be located at `~/.rgwfuncsrc`.
+    """
     def get_config(config_path: str) -> dict:
         """Load configuration from a JSON file."""
         with open(config_path, 'r') as file:
             return json.load(file)
-    # Load the Slack configuration
-    config_path = locate_config_file()
+    # Load the Slack configuration from ~/.rgwfuncsrc
+    config_path = os.path.expanduser('~/.rgwfuncsrc')
     config = get_config(config_path)
-    bot_config = next((bot for bot in config['slack_bot_presets'] if bot['name'] == bot_name), None)
+    bot_config = next(
+        (bot for bot in config['slack_bot_presets'] if bot['name'] == bot_name),
+        None)
     if not bot_config:
         raise ValueError(f"No bot found with the name {bot_name}")
@@ -1024,13 +1074,22 @@ def send_data_to_slack(df: pd.DataFrame, bot_name: str, message: Optional[str] =
         try:
             with open(file_name, 'rb') as file:
-                response = client.files_upload(channels=bot_config['channel_id'], file=file, filename=os.path.basename(file_name), title="DataFrame Upload", initial_comment=message or '')
+                response = client.files_upload(
+                    channels=bot_config['channel_id'],
+                    file=file,
+                    filename=os.path.basename(file_name),
+                    title="DataFrame Upload",
+                    initial_comment=message or ''
+                )
         finally:
             if remove_after_send and os.path.exists(file_name):
                 os.remove(file_name)
     else:
         df_str = df.to_string()
-        response = client.chat_postMessage(channel=bot_config['channel_id'], text=(message + "\n\n" + df_str) if message else df_str)
+        response = client.chat_postMessage(
+            channel=bot_config['channel_id'],
+            text=(message + "\n\n" + df_str) if message else df_str
+        )
     # Check if the message was sent successfully
     if not response["ok"]:
@@ -1087,7 +1146,11 @@ def order_columns(df: pd.DataFrame, column_order_str: str) -> pd.DataFrame:
     return df[new_order]
-def append_ranged_classification_column(df: pd.DataFrame, ranges: str, target_col: str, new_col_name: str) -> pd.DataFrame:
+def append_ranged_classification_column(
+        df: pd.DataFrame,
+        ranges: str,
+        target_col: str,
+        new_col_name: str) -> pd.DataFrame:
     """
     Append a ranged classification column to the DataFrame.
@@ -1155,16 +1218,27 @@ def append_ranged_classification_column(df: pd.DataFrame, ranges: str, target_co
             for r in range_list
         )
-        labels = [f"{pad_number(range_list[i], max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
+        labels = [f"{pad_number(range_list[i],
+                                max_integer_length)} to {pad_number(range_list[i + 1],
+                                                                    max_integer_length)}" for i in range(len(range_list) - 1)]
     # Ensure the target column is numeric
     df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
-    df[new_col_name] = pd.cut(df[target_col], bins=range_list, labels=labels, right=False, include_lowest=True)
+    df[new_col_name] = pd.cut(
+        df[target_col],
+        bins=range_list,
+        labels=labels,
+        right=False,
+        include_lowest=True)
     return df
-def append_percentile_classification_column(df: pd.DataFrame, percentiles: str, target_col: str, new_col_name: str) -> pd.DataFrame:
+def append_percentile_classification_column(
+        df: pd.DataFrame,
+        percentiles: str,
+        target_col: str,
+        new_col_name: str) -> pd.DataFrame:
     """
     Append a percentile classification column to the DataFrame.
@@ -1192,14 +1266,21 @@ def append_percentile_classification_column(df: pd.DataFrame, percentiles: str,
     if has_decimals:
         percentiles_list = [float(p) for p in percentiles_list]
-        max_decimal_length = max(len(str(p).split('.')[1]) for p in percentiles_list if '.' in str(p))
-        max_integer_length = max(len(str(int(float(p)))) for p in percentiles_list)
+        max_decimal_length = max(
+            len(str(p).split('.')[1]) for p in percentiles_list if '.' in str(p))
+        max_integer_length = max(len(str(int(float(p))))
+                                 for p in percentiles_list)
         labels = []
         for i in range(len(percentiles_list) - 1):
-            start = pad_number(percentiles_list[i], max_integer_length, max_decimal_length, decimal=True)
-            end = pad_number(percentiles_list[i + 1], max_integer_length, max_decimal_length, decimal=True)
+            start = pad_number(
+                percentiles_list[i],
+                max_integer_length,
+                max_decimal_length,
+                decimal=True)
+            end = pad_number(
+                percentiles_list[i + 1], max_integer_length, max_decimal_length, decimal=True)
             label = f"{start} to {end}"
             labels.append(label)
@@ -1222,12 +1303,20 @@ def append_percentile_classification_column(df: pd.DataFrame, percentiles: str,
     df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
     quantiles = [df[target_col].quantile(p / 100) for p in percentiles_list]
-    df[new_col_name] = pd.cut(df[target_col], bins=quantiles, labels=labels, include_lowest=True)
+    df[new_col_name] = pd.cut(
+        df[target_col],
+        bins=quantiles,
+        labels=labels,
+        include_lowest=True)
     return df
-def append_ranged_date_classification_column(df: pd.DataFrame, date_ranges: str, target_col: str, new_col_name: str) -> pd.DataFrame:
+def append_ranged_date_classification_column(
+        df: pd.DataFrame,
+        date_ranges: str,
+        target_col: str,
+        new_col_name: str) -> pd.DataFrame:
     """
     Append a ranged date classification column to the DataFrame.
@@ -1260,7 +1349,9 @@ def append_ranged_date_classification_column(df: pd.DataFrame, date_ranges: str,
     return df
-def rename_columns(df: pd.DataFrame, rename_pairs: Dict[str, str]) -> pd.DataFrame:
+def rename_columns(df: pd.DataFrame,
+                   rename_pairs: Dict[str,
+                                      str]) -> pd.DataFrame:
     """
     Rename columns in the DataFrame.
@@ -1272,7 +1363,8 @@ def rename_columns(df: pd.DataFrame, rename_pairs: Dict[str, str]) -> pd.DataFra
         A new DataFrame with columns renamed.
     """
     if df is None:
-        raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
+        raise ValueError(
+            "No DataFrame to rename columns. Please provide a valid DataFrame.")
     return df.rename(columns=rename_pairs)
@@ -1290,7 +1382,8 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
         A new DataFrame sorted by specified columns.
     """
     if df is None:
-        raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
+        raise ValueError(
+            "No DataFrame to sort. Please provide a valid DataFrame.")
     col_names = []
     asc_order = []
@@ -1325,7 +1418,8 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
         A new DataFrame with XGB_TYPE labels appended.
     """
     if df is None:
-        raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
+        raise ValueError(
+            "No DataFrame to add labels. Please provide a valid DataFrame.")
     ratios = list(map(int, ratio_str.split(':')))
     total_ratio = sum(ratios)
@@ -1342,7 +1436,8 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
         labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
             validate_rows + ['TEST'] * test_rows
     else:
-        raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
+        raise ValueError(
+            "Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
     df_with_labels = df.copy()
     df_with_labels['XGB_TYPE'] = labels
@@ -1350,7 +1445,13 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
     return df_with_labels
-def append_xgb_regression_predictions(df: pd.DataFrame, target_col: str, feature_cols: str, pred_col: str, boosting_rounds: int = 100, model_path: Optional[str] = None) -> pd.DataFrame:
+def append_xgb_regression_predictions(
+        df: pd.DataFrame,
+        target_col: str,
+        feature_cols: str,
+        pred_col: str,
+        boosting_rounds: int = 100,
+        model_path: Optional[str] = None) -> pd.DataFrame:
     """
     Append XGB regression predictions to DataFrame. Assumes data is labeled by an 'XGB_TYPE' column.
@@ -1366,7 +1467,8 @@ def append_xgb_regression_predictions(df: pd.DataFrame, target_col: str, feature
         DataFrame with predictions appended.
     """
     if df is None or 'XGB_TYPE' not in df.columns:
-        raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
+        raise ValueError(
+            "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
     features = feature_cols.replace(' ', '').split(',')
@@ -1382,16 +1484,27 @@ def append_xgb_regression_predictions(df: pd.DataFrame, target_col: str, feature
     else:
         validate_data = None
-    dtrain = xgb.DMatrix(train_data[features], label=train_data[target_col], enable_categorical=True)
+    dtrain = xgb.DMatrix(
+        train_data[features],
+        label=train_data[target_col],
+        enable_categorical=True)
     evals = [(dtrain, 'train')]
     if validate_data is not None:
-        dvalidate = xgb.DMatrix(validate_data[features], label=validate_data[target_col], enable_categorical=True)
+        dvalidate = xgb.DMatrix(
+            validate_data[features],
+            label=validate_data[target_col],
+            enable_categorical=True)
         evals.append((dvalidate, 'validate'))
     params = {'objective': 'reg:squarederror', 'eval_metric': 'rmse'}
-    model = xgb.train(params, dtrain, num_boost_round=boosting_rounds, evals=evals, early_stopping_rounds=10 if validate_data is not None else None)
+    model = xgb.train(
+        params,
+        dtrain,
+        num_boost_round=boosting_rounds,
+        evals=evals,
+        early_stopping_rounds=10 if validate_data is not None else None)
     # Make predictions for all data
     dall = xgb.DMatrix(df[features], enable_categorical=True)
@@ -1400,13 +1513,20 @@ def append_xgb_regression_predictions(df: pd.DataFrame, target_col: str, feature
     if model_path:
         model.save_model(model_path)
-    columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
+    columns_order = [col for col in df.columns if col not in [
+        'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
     df = df[columns_order]
     return df
-def append_xgb_logistic_regression_predictions(df: pd.DataFrame, target_col: str, feature_cols: str, pred_col: str, boosting_rounds: int = 100, model_path: Optional[str] = None) -> pd.DataFrame:
+def append_xgb_logistic_regression_predictions(
+        df: pd.DataFrame,
+        target_col: str,
+        feature_cols: str,
+        pred_col: str,
+        boosting_rounds: int = 100,
+        model_path: Optional[str] = None) -> pd.DataFrame:
     """
     Append XGB logistic regression predictions to DataFrame. Assumes data is labeled by an 'XGB_TYPE' column.
@@ -1438,16 +1558,27 @@ def append_xgb_logistic_regression_predictions(df: pd.DataFrame, target_col: str
     if 'VALIDATE' in df['XGB_TYPE'].values:
         validate_data = df[df['XGB_TYPE'] == 'VALIDATE']
-    dtrain = xgb.DMatrix(train_data[features], label=train_data[target_col], enable_categorical=True)
+    dtrain = xgb.DMatrix(
+        train_data[features],
+        label=train_data[target_col],
+        enable_categorical=True)
     evals = [(dtrain, 'train')]
     if validate_data is not None:
-        dvalidate = xgb.DMatrix(validate_data[features], label=validate_data[target_col], enable_categorical=True)
+        dvalidate = xgb.DMatrix(
+            validate_data[features],
+            label=validate_data[target_col],
+            enable_categorical=True)
         evals.append((dvalidate, 'validate'))
     params = {'objective': 'binary:logistic', 'eval_metric': 'auc'}
-    model = xgb.train(params, dtrain, num_boost_round=boosting_rounds, evals=evals, early_stopping_rounds=10 if validate_data is not None else None)
+    model = xgb.train(
+        params,
+        dtrain,
+        num_boost_round=boosting_rounds,
+        evals=evals,
+        early_stopping_rounds=10 if validate_data is not None else None)
     # Make predictions for all data
     dall = xgb.DMatrix(df[features], enable_categorical=True)
@@ -1456,13 +1587,18 @@ def append_xgb_logistic_regression_predictions(df: pd.DataFrame, target_col: str
     if model_path:
         model.save_model(model_path)
-    columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
+    columns_order = [col for col in df.columns if col not in [
+        'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
     df = df[columns_order]
     return df
-def print_n_frequency_cascading(df: pd.DataFrame, n: int, columns: str, order_by: str = "FREQ_DESC") -> None:
+def print_n_frequency_cascading(
+        df: pd.DataFrame,
+        n: int,
+        columns: str,
+        order_by: str = "FREQ_DESC") -> None:
     """
     Print the cascading frequency of top n values for specified columns.
@@ -1485,7 +1621,12 @@ def print_n_frequency_cascading(df: pd.DataFrame, n: int, columns: str, order_by
         # Convert the column to string representation
         df[current_col] = df[current_col].astype(str)
         frequency = df[current_col].value_counts(dropna=False)
-        frequency = frequency.rename(index={'nan': 'NaN', 'NaT': 'NaT', 'None': 'None', '': 'Empty'})
+        frequency = frequency.rename(
+            index={
+                'nan': 'NaN',
+                'NaT': 'NaT',
+                'None': 'None',
+                '': 'Empty'})
         if limit is not None:
             frequency = frequency.nlargest(limit)
@@ -1500,8 +1641,11 @@ def print_n_frequency_cascading(df: pd.DataFrame, n: int, columns: str, order_by
                 filtered_df = df[df[current_col] == value]
             if len(columns) > 1:
-                sub_report = generate_cascade_report(filtered_df, columns[1:], limit, order_by)
-                report[value] = {"count": str(count), f"sub_distribution({columns[1]})": sub_report if sub_report else {}}
+                sub_report = generate_cascade_report(
+                    filtered_df, columns[1:], limit, order_by)
+                report[value] = {
+                    "count": str(count), f"sub_distribution({
+                        columns[1]})": sub_report if sub_report else {}}
             else:
                 report[value] = {"count": str(count)}
@@ -1511,17 +1655,29 @@ def print_n_frequency_cascading(df: pd.DataFrame, n: int, columns: str, order_by
         if order_by == "ASC":
             return dict(sorted(frequency.items(), key=lambda item: item[0]))
         elif order_by == "DESC":
-            return dict(sorted(frequency.items(), key=lambda item: item[0], reverse=True))
+            return dict(
+                sorted(
+                    frequency.items(),
+                    key=lambda item: item[0],
+                    reverse=True))
         elif order_by == "FREQ_ASC":
             return dict(sorted(frequency.items(), key=lambda item: item[1]))
         else:  # Default to "FREQ_DESC"
-            return dict(sorted(frequency.items(), key=lambda item: item[1], reverse=True))
+            return dict(
+                sorted(
+                    frequency.items(),
+                    key=lambda item: item[1],
+                    reverse=True))
     report = generate_cascade_report(df, columns, n, order_by)
     print(json.dumps(report, indent=2))
-def print_n_frequency_linear(df: pd.DataFrame, n: int, columns: str, order_by: str = "FREQ_DESC") -> None:
+def print_n_frequency_linear(
+        df: pd.DataFrame,
+        n: int,
+        columns: str,
+        order_by: str = "FREQ_DESC") -> None:
     """
     Print the linear frequency of top n values for specified columns.
@@ -1541,13 +1697,19 @@ def print_n_frequency_linear(df: pd.DataFrame, n: int, columns: str, order_by: s
                 continue
             frequency = df[current_col].astype(str).value_counts(dropna=False)
-            frequency = frequency.rename(index={'nan': 'NaN', 'NaT': 'NaT', 'None': 'None', '': 'Empty'})
+            frequency = frequency.rename(
+                index={
+                    'nan': 'NaN',
+                    'NaT': 'NaT',
+                    'None': 'None',
+                    '': 'Empty'})
             if limit is not None:
                 frequency = frequency.nlargest(limit)
             sorted_frequency = sort_frequency(frequency, order_by)
-            col_report = {str(value): str(count) for value, count in sorted_frequency.items()}
+            col_report = {str(value): str(count)
+                          for value, count in sorted_frequency.items()}
             report[current_col] = col_report
         return report
@@ -1556,17 +1718,27 @@ def print_n_frequency_linear(df: pd.DataFrame, n: int, columns: str, order_by: s
         if order_by == "ASC":
             return dict(sorted(frequency.items(), key=lambda item: item[0]))
         elif order_by == "DESC":
-            return dict(sorted(frequency.items(), key=lambda item: item[0], reverse=True))
+            return dict(
+                sorted(
+                    frequency.items(),
+                    key=lambda item: item[0],
+                    reverse=True))
         elif order_by == "FREQ_ASC":
             return dict(sorted(frequency.items(), key=lambda item: item[1]))
         else:  # Default to "FREQ_DESC"
-            return dict(sorted(frequency.items(), key=lambda item: item[1], reverse=True))
+            return dict(
+                sorted(
+                    frequency.items(),
+                    key=lambda item: item[1],
+                    reverse=True))
     report = generate_linear_report(df, columns, n, order_by)
     print(json.dumps(report, indent=2))
-def retain_columns(df: pd.DataFrame, columns_to_retain: List[str]) -> pd.DataFrame:
+def retain_columns(
+        df: pd.DataFrame,
+        columns_to_retain: List[str]) -> pd.DataFrame:
     """
     Retain specified columns in the DataFrame and drop the others.
@@ -1582,7 +1754,10 @@ def retain_columns(df: pd.DataFrame, columns_to_retain: List[str]) -> pd.DataFra
     return df[columns_to_retain]
-def mask_against_dataframe(df: pd.DataFrame, other_df: pd.DataFrame, column_name: str) -> pd.DataFrame:
+def mask_against_dataframe(
+        df: pd.DataFrame,
+        other_df: pd.DataFrame,
+        column_name: str) -> pd.DataFrame:
     """
     Retain only rows with common column values between two DataFrames.
@@ -1599,7 +1774,10 @@ def mask_against_dataframe(df: pd.DataFrame, other_df: pd.DataFrame, column_name
     return df[df[column_name].isin(other_df[column_name])]
-def mask_against_dataframe_converse(df: pd.DataFrame, other_df: pd.DataFrame, column_name: str) -> pd.DataFrame:
+def mask_against_dataframe_converse(
+        df: pd.DataFrame,
+        other_df: pd.DataFrame,
+        column_name: str) -> pd.DataFrame:
     """
     Retain only rows with uncommon column values between two DataFrames.
@@ -1633,7 +1811,8 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
         ValueError: If the DataFrames do not have the same columns.
     """
     if set(df1.columns) != set(df2.columns):
-        raise ValueError("Both DataFrames must have the same columns for a union join")
+        raise ValueError(
+            "Both DataFrames must have the same columns for a union join")
     result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
     return result_df
@@ -1654,13 +1833,18 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
         ValueError: If the DataFrames do not have the same columns.
     """
     if set(df1.columns) != set(df2.columns):
-        raise ValueError("Both DataFrames must have the same columns for a bag union join")
+        raise ValueError(
+            "Both DataFrames must have the same columns for a bag union join")
     result_df = pd.concat([df1, df2], ignore_index=True)
     return result_df
-def left_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str) -> pd.DataFrame:
+def left_join(
+        df1: pd.DataFrame,
+        df2: pd.DataFrame,
+        left_on: str,
+        right_on: str) -> pd.DataFrame:
     """
     Perform a left join on two DataFrames.
@@ -1676,7 +1860,11 @@ def left_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str)
     return df1.merge(df2, how='left', left_on=left_on, right_on=right_on)
-def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str) -> pd.DataFrame:
+def right_join(
+        df1: pd.DataFrame,
+        df2: pd.DataFrame,
+        left_on: str,
+        right_on: str) -> pd.DataFrame:
     """
     Perform a right join on two DataFrames.
@@ -1692,7 +1880,72 @@ def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str
     return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
-def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
+def insert_dataframe_in_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
+    """
+    Inserts a Pandas DataFrame into a SQLite database table.
+    Parameters:
+        db_path: str
+            The file path to the SQLite database. If the database does not exist,
+            it will be created.
+        tablename: str
+            The name of the table where the data will be inserted. If the table does
+            not exist, it will be created based on the DataFrame's columns and types.
+        df: pd.DataFrame
+            The DataFrame containing the data to be inserted into the database.
+    Functionality:
+        - Checks if the specified table exists in the database.
+        - Creates the table with appropriate column types if it doesn't exist.
+        - Inserts the DataFrame's data into the table, appending to any existing data.
+    Data Type Mapping:
+        - Converts Pandas data types to SQLite types: 'int64' to 'INTEGER',
+          'float64' to 'REAL', 'object' to 'TEXT', 'datetime64[ns]' to 'TEXT',
+          and 'bool' to 'INTEGER'.
+    Returns:
+        None
+    """
+    def table_exists(cursor, table_name):
+        cursor.execute(
+            f"SELECT count(name) FROM sqlite_master WHERE type='table' AND name='{table_name}'")
+        return cursor.fetchone()[0] == 1
+    dtype_mapping = {
+        'int64': 'INTEGER',
+        'float64': 'REAL',
+        'object': 'TEXT',
+        'datetime64[ns]': 'TEXT',
+        'bool': 'INTEGER',
+    }
+    def map_dtype(dtype):
+        return dtype_mapping.get(str(dtype), 'TEXT')
+    with sqlite3.connect(db_path) as conn:
+        cursor = conn.cursor()
+        if not table_exists(cursor, tablename):
+            columns_with_types = ', '.join(
+                f'"{col}" {
+                    map_dtype(dtype)}' for col,
+                dtype in zip(
+                    df.columns,
+                    df.dtypes))
+            create_table_query = f'CREATE TABLE "{tablename}" ({columns_with_types})'
+            conn.execute(create_table_query)
+        df.to_sql(tablename, conn, if_exists='append', index=False)
+def sync_dataframe_to_sqlite_database(
+        db_path: str,
+        tablename: str,
+        df: pd.DataFrame) -> None:
     """
     Processes and saves a DataFrame to an SQLite database, adding a timestamp column
     and replacing the existing table if needed. Creates the table if it does not exist.
@@ -1702,6 +1955,10 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
     - tablename (str): The name of the table in the database.
     - df (pd.DataFrame): The DataFrame to be processed and saved.
     """
+    # Helper function to map pandas dtype to SQLite type
+    def map_dtype(dtype):
+        return dtype_mapping.get(str(dtype), 'TEXT')
     # Step 1: Add a timestamp column to the dataframe
     df['rgwfuncs_sync_timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
@@ -1714,10 +1971,6 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
         'bool': 'INTEGER',  # SQLite does not have a separate Boolean storage class
     }
-    # Helper function to map pandas dtype to SQLite type
-    def map_dtype(dtype):
-        return dtype_mapping.get(str(dtype), 'TEXT')
     # Step 2: Save df in SQLite3 db as '{tablename}_new'
     with sqlite3.connect(db_path) as conn:
         new_table_name = f"{tablename}_new"
@@ -1728,8 +1981,11 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
         if cursor.fetchall() == []:  # Table does not exist
             # Create a table using the DataFrame's column names and types
             columns_with_types = ', '.join(
-                f'"{col}" {map_dtype(dtype)}' for col, dtype in zip(df.columns, df.dtypes)
-            )
+                f'"{col}" {
+                    map_dtype(dtype)}' for col,
+                dtype in zip(
+                    df.columns,
+                    df.dtypes))
             create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
             conn.execute(create_table_query)

rgwfuncs/str_lib.py ADDED Viewed

@@ -0,0 +1,62 @@
+import os
+import json
+import requests
+from typing import Tuple
+def send_telegram_message(preset_name: str, message: str) -> None:
+    """Send a Telegram message using the specified preset.
+    Args:
+        preset_name (str): The name of the preset to use for sending the message.
+        message (str): The message to send.
+    Raises:
+        RuntimeError: If the preset is not found or necessary details are missing.
+    """
+    # Set the config path to ~/.rgwfuncsrc
+    config_path = os.path.expanduser("~/.rgwfuncsrc")
+    def load_config() -> dict:
+        """Load the configuration from the .rgwfuncsrc file."""
+        with open(config_path, 'r') as file:
+            return json.load(file)
+    def get_telegram_preset(config: dict, preset_name: str) -> dict:
+        """Get the Telegram preset configuration."""
+        presets = config.get("telegram_bot_presets", [])
+        for preset in presets:
+            if preset.get("name") == preset_name:
+                return preset
+        return None
+    def get_telegram_bot_details(config: dict, preset_name: str) -> Tuple[str, str]:
+        """Retrieve the Telegram bot token and chat ID from the preset."""
+        preset = get_telegram_preset(config, preset_name)
+        if not preset:
+            raise RuntimeError(f"Telegram bot preset '{preset_name}' not found in the configuration file")
+        bot_token = preset.get("bot_token")
+        chat_id = preset.get("chat_id")
+        if not bot_token or not chat_id:
+            raise RuntimeError(
+                f"Telegram bot token or chat ID for '{preset_name}' not found in the configuration file"
+            )
+        return bot_token, chat_id
+    # Load the configuration
+    config = load_config()
+    # Get bot details from the configuration
+    bot_token, chat_id = get_telegram_bot_details(config, preset_name)
+    # Prepare the request
+    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
+    payload = {"chat_id": chat_id, "text": message}
+    # Send the message
+    response = requests.post(url, json=payload)
+    response.raise_for_status()

{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: rgwfuncs
-Version: 0.0.16
+Version: 0.0.18
 Summary: A functional programming paradigm for mathematical modelling and data science
 Home-page: https://github.com/ryangerardwilson/rgwfunc
 Author: Ryan Gerard Wilson
@@ -40,9 +40,9 @@ Install the package using:
 --------------------------------------------------------------------------------
-## Create a `rgwml.config` File
+## Create a `.rgwfuncsrc` File
-A `rgwml.config` file (located at `vi ~/Documents/rgwml.config) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
+A `.rgwfuncsrc` file (located at `vi ~/.rgwfuncsrc) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
     {
       "db_presets" : [
@@ -381,28 +381,30 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
 --------------------------------------------------------------------------------
 ### 12. `load_data_from_query`
 Load data from a database query into a DataFrame based on a configuration preset.
-• Parameters:
-  - `db_preset_name` (str): Name of the database preset in the config file.
-  - query (str): The SQL query to execute.
-  - `config_file_name` (str): Name of the configuration file (default: "rgwml.config").
+- **Parameters:**
+  - `db_preset_name` (str): Name of the database preset in the configuration file.
+  - `query` (str): The SQL query to execute.
-• Returns:
-  - pd.DataFrame: A DataFrame containing the query result.
+- **Returns:**
+  - `pd.DataFrame`: A DataFrame containing the query result.
-• Example:
-    from rgwfuncs import load_data_from_query
+- **Notes:**
+  - The configuration file is assumed to be located at `~/.rgwfuncsrc`.
-    df = load_data_from_query(
-        db_preset_name="MyDBPreset",
-        query="SELECT * FROM my_table",
-        config_file_name="rgwml.config"
-    )
-    print(df)
+- **Example:**
+  from rgwfuncs import load_data_from_query
+  df = load_data_from_query(
+      db_preset_name="MyDBPreset",
+      query="SELECT * FROM my_table"
+  )
+  print(df)
 --------------------------------------------------------------------------------
 ### 13. `load_data_from_path`
@@ -1148,10 +1150,47 @@ Perform a right join on two DataFrames.
     df_right_join = right_join(df1, df2, 'ID', 'ID')
     print(df_right_join)
+--------------------------------------------------------------------------------
+### 45. `insert_dataframe_in_sqlite_database`
+Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
+- **Parameters:**
+  - `db_path` (str): The path to the SQLite database file. If the database does not exist, it will be created.
+  - `tablename` (str): The name of the table in the database. If the table does not exist, it is created with the DataFrame's columns and data types.
+  - `df` (pd.DataFrame): The DataFrame containing the data to be inserted into the database table.
+- **Returns:**
+  - `None`
+- **Notes:**
+  - Data types in the DataFrame are converted to SQLite-compatible types:
+    - `int64` is mapped to `INTEGER`
+    - `float64` is mapped to `REAL`
+    - `object` is mapped to `TEXT`
+    - `datetime64[ns]` is mapped to `TEXT` (dates are stored as text)
+    - `bool` is mapped to `INTEGER` (SQLite does not have a separate Boolean type)
+- **Example:**
+    from rgwfuncs import insert_dataframe_in_sqlite_database
+    import pandas as pd
+    df = pd.DataFrame({
+        'ID': [1, 2, 3],
+        'Name': ['Alice', 'Bob', 'Charlie'],
+        'Score': [88.5, 92.3, 85.0]
+    })
+    db_path = 'my_database.db'
+    tablename = 'students'
+    insert_dataframe_in_sqlite_database(db_path, tablename, df)
 --------------------------------------------------------------------------------
-### 45. `sync_dataframe_to_sqlite_database`
+### 46. `sync_dataframe_to_sqlite_database`
 Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
 • Parameters:

rgwfuncs-0.0.18.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+rgwfuncs/__init__.py,sha256=XqJ8TJuc4HkQq3T5Gzjf3KTBsdJtyi2NKXBgbPuDn0Y,1156
+rgwfuncs/df_lib.py,sha256=rY1yVvY04uqR174JwYBFiRnujekr9mbe258wmu9OeeY,67148
+rgwfuncs/str_lib.py,sha256=6v9AXZ5wWsWVEcvcIz0B1rTmsvYaD-v53r2sYPcV4pU,2109
+rgwfuncs-0.0.18.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
+rgwfuncs-0.0.18.dist-info/METADATA,sha256=GfMK-J1vH4CG_fQqQAWwAvDE6JcSqNrKuNKvfOUKV_E,33442
+rgwfuncs-0.0.18.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+rgwfuncs-0.0.18.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
+rgwfuncs-0.0.18.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
+rgwfuncs-0.0.18.dist-info/RECORD,,

rgwfuncs-0.0.16.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
-rgwfuncs/df_lib.py,sha256=OZPI7M35mbue6YsieWmlzjM5RUkaow0v0d3P-V71L6o,63034
-rgwfuncs-0.0.16.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
-rgwfuncs-0.0.16.dist-info/METADATA,sha256=oKTScVPzrgTTWdCQ7vxEdKYRnc-S_90hKwefifayeDU,32059
-rgwfuncs-0.0.16.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-rgwfuncs-0.0.16.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
-rgwfuncs-0.0.16.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
-rgwfuncs-0.0.16.dist-info/RECORD,,

{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/LICENSE RENAMED Viewed

File without changes

{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/WHEEL RENAMED Viewed

File without changes

{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rgwfuncs-0.0.16.dist-info → rgwfuncs-0.0.18.dist-info}/top_level.txt RENAMED Viewed

File without changes

rgwfuncs 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

rgwfuncs 0.0.16py3-none-any.whl → 0.0.18py3-none-any.whl