PyPI - rgwfuncs - Versions diffs - 0.0.21__py3-none-any.whl → 0.0.54__py3-none-any.whl - Mend

rgwfuncs 0.0.21py3-none-any.whl → 0.0.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

rgwfuncs/__init__.py +5 -2
rgwfuncs/algebra_lib.py +901 -0
rgwfuncs/df_lib.py +111 -61
rgwfuncs/docs_lib.py +51 -0
rgwfuncs/interactive_shell_lib.py +32 -0
rgwfuncs/str_lib.py +8 -44
{rgwfuncs-0.0.21.dist-info → rgwfuncs-0.0.54.dist-info}/METADATA +517 -92
rgwfuncs-0.0.54.dist-info/RECORD +12 -0
rgwfuncs-0.0.21.dist-info/RECORD +0 -9
{rgwfuncs-0.0.21.dist-info → rgwfuncs-0.0.54.dist-info}/LICENSE +0 -0
{rgwfuncs-0.0.21.dist-info → rgwfuncs-0.0.54.dist-info}/WHEEL +0 -0
{rgwfuncs-0.0.21.dist-info → rgwfuncs-0.0.54.dist-info}/entry_points.txt +0 -0
{rgwfuncs-0.0.21.dist-info → rgwfuncs-0.0.54.dist-info}/top_level.txt +0 -0

rgwfuncs/df_lib.py CHANGED Viewed

@@ -21,51 +21,15 @@ from email.mime.base import MIMEBase
 from email import encoders
 from googleapiclient.discovery import build
 import base64
-import inspect
-from typing import Optional, Callable, Dict, List, Tuple, Any
+import boto3
+# import inspect
+from typing import Optional, Dict, List, Tuple, Any
 import warnings
 # Suppress all FutureWarnings
 warnings.filterwarnings("ignore", category=FutureWarning)
-def df_docs(method_type_filter: Optional[str] = None) -> None:
-    """
-    Print a list of function names in alphabetical order. If method_type_filter
-    is specified, print the docstrings of the functions that match the filter.
-    Using '*' as a filter will print the docstrings for all functions.
-    Parameters:
-        method_type_filter: Optional filter string representing a function name,
-        or '*' to display docstrings for all functions.
-    """
-    # Get the current module's namespace
-    current_module = __name__
-    local_functions: Dict[str, Callable] = {
-        name: obj for name, obj in globals().items()
-        if inspect.isfunction(obj) and obj.__module__ == current_module
-    }
-    # List of function names sorted alphabetically
-    function_names = sorted(local_functions.keys())
-    # Print function names
-    print("Functions in alphabetical order:")
-    for name in function_names:
-        print(name)
-    # If a filter is provided or '*', print the docstrings of functions
-    if method_type_filter:
-        # print("\nFiltered function documentation:")
-        for name, func in local_functions.items():
-            docstring: Optional[str] = func.__doc__
-            if docstring:
-                if method_type_filter == '*' or method_type_filter == name:
-                    # Print the entire docstring for the matching function
-                    print(f"\n{name}:\n{docstring}")
 def numeric_clean(
         df: pd.DataFrame,
         column_names: str,
@@ -421,8 +385,7 @@ def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
                     raise ConnectionError(
                         "All attempts to connect to ClickHouse failed.")
-    def query_google_big_query(
-            db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
+    def query_google_big_query(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
         json_file_path = db_preset['json_file_path']
         project_id = db_preset['project_id']
@@ -437,6 +400,54 @@ def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
         return pd.DataFrame(rows, columns=columns)
+    def query_athena(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
+        def execute_athena_query(athena_client, query: str, database: str, output_bucket: str) -> str:
+            response = athena_client.start_query_execution(
+                QueryString=query,
+                QueryExecutionContext={"Database": database},
+                ResultConfiguration={"OutputLocation": output_bucket}
+            )
+            return response["QueryExecutionId"]
+        def wait_for_athena_query_to_complete(athena_client, query_execution_id: str):
+            while True:
+                response = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
+                state = response["QueryExecution"]["Status"]["State"]
+                if state == "SUCCEEDED":
+                    break
+                elif state in ("FAILED", "CANCELLED"):
+                    raise Exception(f"Query failed with state: {state}")
+                time.sleep(1)
+        def download_athena_query_results(athena_client, query_execution_id: str) -> pd.DataFrame:
+            paginator = athena_client.get_paginator("get_query_results")
+            result_pages = paginator.paginate(QueryExecutionId=query_execution_id)
+            rows = []
+            columns = []
+            for page in result_pages:
+                if not columns:
+                    columns = [col["Name"] for col in page["ResultSet"]["ResultSetMetadata"]["ColumnInfo"]]
+                rows.extend(page["ResultSet"]["Rows"])
+            data = [[col.get("VarCharValue", None) for col in row["Data"]] for row in rows[1:]]
+            return pd.DataFrame(data, columns=columns)
+        aws_region = db_preset['aws_region']
+        database = db_preset['database']
+        output_bucket = db_preset['output_bucket']
+        athena_client = boto3.client(
+            'athena',
+            region_name=aws_region,
+            aws_access_key_id=db_preset['aws_access_key'],
+            aws_secret_access_key=db_preset['aws_secret_key']
+        )
+        query_execution_id = execute_athena_query(athena_client, query, database, output_bucket)
+        wait_for_athena_query_to_complete(athena_client, query_execution_id)
+        return download_athena_query_results(athena_client, query_execution_id)
     # Assume the configuration file is located at ~/.rgwfuncsrc
     config_path = os.path.expanduser('~/.rgwfuncsrc')
     with open(config_path, 'r') as f:
@@ -459,6 +470,8 @@ def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
         return query_clickhouse(db_preset, query)
     elif db_type == 'google_big_query':
         return query_google_big_query(db_preset, query)
+    elif db_type == 'aws_athena':
+        return query_athena(db_preset, query)
     else:
         raise ValueError(f"Unsupported db_type: {db_type}")
@@ -835,7 +848,12 @@ def print_dataframe(df: pd.DataFrame, source: Optional[str] = None) -> None:
     gc.collect()
-def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Optional[str] = None, as_file: bool = True, remove_after_send: bool = True) -> None:
+def send_dataframe_via_telegram(
+        df: pd.DataFrame,
+        bot_name: str,
+        message: Optional[str] = None,
+        as_file: bool = True,
+        remove_after_send: bool = True) -> None:
     """
     Send a DataFrame via Telegram using a specified bot configuration.
@@ -1673,7 +1691,11 @@ def print_n_frequency_cascading(
     print(json.dumps(report, indent=2))
-def print_n_frequency_linear(df: pd.DataFrame, n: int, columns: list, order_by: str = "FREQ_DESC") -> None:
+def print_n_frequency_linear(
+        df: pd.DataFrame,
+        n: int,
+        columns: list,
+        order_by: str = "FREQ_DESC") -> None:
     """
     Print the linear frequency of top n values for specified columns.
@@ -1709,27 +1731,49 @@ def print_n_frequency_linear(df: pd.DataFrame, n: int, columns: list, order_by:
         return report
+    def try_parse_numeric(val):
+        """Attempt to parse a value as an integer or float."""
+        try:
+            return int(val)
+        except ValueError:
+            try:
+                return float(val)
+            except ValueError:
+                return val
     def sort_frequency(frequency, order_by):
-        if order_by == "ASC":
-            return dict(sorted(frequency.items(), key=lambda item: item[0]))
-        elif order_by == "DESC":
-            return dict(
-                sorted(
+        # keys = frequency.keys()
+        # Convert keys to numerical values where possible, leaving `NaN` as a
+        # special string
+        # parsed_keys = [(try_parse_numeric(key), key) for key in keys]
+        if order_by in {"BY_KEYS_ASC", "BY_KEYS_DESC"}:
+            reverse = order_by == "BY_KEYS_DESC"
+            sorted_items = sorted(
+                frequency.items(),
+                key=lambda item: try_parse_numeric(
+                    item[0]),
+                reverse=reverse)
+        else:
+            if order_by == "ASC":
+                sorted_items = sorted(
+                    frequency.items(), key=lambda item: item[0])
+            elif order_by == "DESC":
+                sorted_items = sorted(
                     frequency.items(),
                     key=lambda item: item[0],
-                    reverse=True))
-        elif order_by == "FREQ_ASC":
-            return dict(sorted(frequency.items(), key=lambda item: item[1]))
-        elif order_by == "BY_KEYS_ASC":
-            return dict(sorted(frequency.items()))
-        elif order_by == "BY_KEYS_DESC":
-            return dict(sorted(frequency.items(), reverse=True))
-        else:  # Default to "FREQ_DESC"
-            return dict(
-                sorted(
+                    reverse=True)
+            elif order_by == "FREQ_ASC":
+                sorted_items = sorted(
+                    frequency.items(), key=lambda item: item[1])
+            else:  # Default to "FREQ_DESC"
+                sorted_items = sorted(
                     frequency.items(),
                     key=lambda item: item[1],
-                    reverse=True))
+                    reverse=True)
+        return dict(sorted_items)
     report = generate_linear_report(df, columns, n, order_by)
     print(json.dumps(report, indent=2))
@@ -1879,7 +1923,10 @@ def right_join(
     return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
-def insert_dataframe_in_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
+def insert_dataframe_in_sqlite_database(
+        db_path: str,
+        tablename: str,
+        df: pd.DataFrame) -> None:
     """
     Inserts a Pandas DataFrame into a SQLite database table.
@@ -1941,7 +1988,10 @@ def insert_dataframe_in_sqlite_database(db_path: str, tablename: str, df: pd.Dat
         df.to_sql(tablename, conn, if_exists='append', index=False)
-def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
+def sync_dataframe_to_sqlite_database(
+        db_path: str,
+        tablename: str,
+        df: pd.DataFrame) -> None:
     """
     Processes and saves a DataFrame to an SQLite database, adding a timestamp column
     and replacing the existing table if needed. Creates the table if it does not exist.

rgwfuncs/docs_lib.py ADDED Viewed

@@ -0,0 +1,51 @@
+import os
+import inspect
+from typing import Optional
+import warnings
+# Suppress all FutureWarnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+def docs(method_type_filter: Optional[str] = None) -> None:
+    """
+    Print a list of function names in alphabetical order from all modules.
+    If method_type_filter is specified, print the docstrings of the functions
+    that match the filter based on a substring. Using '*' as a filter will print
+    the docstrings for all functions.
+    Parameters:
+        method_type_filter: Optional filter string representing a filter for
+        function names, or '*' to display docstrings for all functions.
+    """
+    # Directory containing your modules
+    module_dir = os.path.dirname(__file__)
+    # Iterate over each file in the module directory
+    for filename in sorted(os.listdir(module_dir)):
+        if filename.endswith('.py') and filename != '__init__.py':
+            module_name, _ = os.path.splitext(filename)
+            print(f"\n# {module_name}.py")
+            # Import the module
+            module_path = f"rgwfuncs.{module_name}"
+            module = __import__(module_path, fromlist=[module_name])
+            # Get all functions from the module
+            functions = {
+                name: obj for name, obj
+                in inspect.getmembers(module, inspect.isfunction)
+                if obj.__module__ == module_path
+            }
+            # List function names
+            function_names = sorted(functions.keys())
+            for name in function_names:
+                # If a filter is provided or '*', check if the function name
+                # contains the filter
+                if method_type_filter and (
+                        method_type_filter == '*' or method_type_filter in name):
+                    docstring: Optional[str] = functions[name].__doc__
+                    if docstring:
+                        print(f"\n{name}:\n{docstring}")

rgwfuncs/interactive_shell_lib.py ADDED Viewed

@@ -0,0 +1,32 @@
+import code
+import readline
+import rlcompleter  # noqa: F401
+import sys  # noqa: F401
+from typing import Dict, Any
+from .df_lib import *  # noqa: F401, F403, E402
+from .algebra_lib import *  # noqa: F401, F403, E402
+from .str_lib import *  # noqa: F401, F403, E402
+from .docs_lib import *  # noqa: F401, F403, E402
+def interactive_shell(local_vars: Dict[str, Any]) -> None:
+    """
+    Launches an interactive prompt for inspecting and modifying local variables, making all methods
+    in the rgwfuncs library available by default.
+    Parameters:
+        local_vars (dict): Dictionary of local variables to be available in the interactive shell.
+    """
+    if not isinstance(local_vars, dict):
+        raise TypeError("local_vars must be a dictionary")
+    readline.parse_and_bind("tab: complete")
+    # Make imported functions available in the REPL
+    local_vars.update(globals())
+    # Create interactive console with local context
+    console = code.InteractiveConsole(locals=local_vars)
+    # Start interactive session
+    console.interact(banner="Welcome to the rgwfuncs interactive shell.")

rgwfuncs/str_lib.py CHANGED Viewed

@@ -1,53 +1,16 @@
 import os
 import json
 import requests
-import inspect
-from typing import Tuple, Optional, Dict, Callable
+from typing import Tuple
 import warnings
 # Suppress all FutureWarnings
 warnings.filterwarnings("ignore", category=FutureWarning)
-def str_docs(method_type_filter: Optional[str] = None) -> None:
-    """
-    Print a list of function names in alphabetical order. If method_type_filter
-    is specified, print the docstrings of the functions that match the filter.
-    Using '*' as a filter will print the docstrings for all functions.
-    Parameters:
-        method_type_filter: Optional filter string representing a function name,
-        or '*' to display docstrings for all functions.
-    """
-    # Get the current module's namespace
-    current_module = __name__
-    local_functions: Dict[str, Callable] = {
-        name: obj for name, obj in globals().items()
-        if inspect.isfunction(obj) and obj.__module__ == current_module
-    }
-    # List of function names sorted alphabetically
-    function_names = sorted(local_functions.keys())
-    # Print function names
-    print("Functions in alphabetical order:")
-    for name in function_names:
-        print(name)
-    # If a filter is provided or '*', print the docstrings of functions
-    if method_type_filter:
-        # print("\nFiltered function documentation:")
-        for name, func in local_functions.items():
-            docstring: Optional[str] = func.__doc__
-            if docstring:
-                if method_type_filter == '*' or method_type_filter == name:
-                    # Print the entire docstring for the matching function
-                    print(f"\n{name}:\n{docstring}")
 def send_telegram_message(preset_name: str, message: str) -> None:
-    """Send a Telegram message using the specified preset.
+    """
+    Send a Telegram message using the specified preset.
     Args:
         preset_name (str): The name of the preset to use for sending the message.
@@ -73,19 +36,20 @@ def send_telegram_message(preset_name: str, message: str) -> None:
                 return preset
         return None
-    def get_telegram_bot_details(config: dict, preset_name: str) -> Tuple[str, str]:
+    def get_telegram_bot_details(
+            config: dict, preset_name: str) -> Tuple[str, str]:
         """Retrieve the Telegram bot token and chat ID from the preset."""
         preset = get_telegram_preset(config, preset_name)
         if not preset:
-            raise RuntimeError(f"Telegram bot preset '{preset_name}' not found in the configuration file")
+            raise RuntimeError(
+                f"Telegram bot preset '{preset_name}' not found in the configuration file")
         bot_token = preset.get("bot_token")
         chat_id = preset.get("chat_id")
         if not bot_token or not chat_id:
             raise RuntimeError(
-                f"Telegram bot token or chat ID for '{preset_name}' not found in the configuration file"
-            )
+                f"Telegram bot token or chat ID for '{preset_name}' not found in the configuration file")
         return bot_token, chat_id

rgwfuncs 0.0.21__py3-none-any.whl → 0.0.54__py3-none-any.whl

rgwfuncs 0.0.21py3-none-any.whl → 0.0.54py3-none-any.whl