rgwfuncs 0.0.63__py3-none-any.whl → 0.0.65__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- rgwfuncs/__init__.py +2 -2
- rgwfuncs/df_lib.py +67 -6
- {rgwfuncs-0.0.63.dist-info → rgwfuncs-0.0.65.dist-info}/METADATA +28 -1
- rgwfuncs-0.0.65.dist-info/RECORD +12 -0
- rgwfuncs-0.0.63.dist-info/RECORD +0 -12
- {rgwfuncs-0.0.63.dist-info → rgwfuncs-0.0.65.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.63.dist-info → rgwfuncs-0.0.65.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.63.dist-info → rgwfuncs-0.0.65.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.63.dist-info → rgwfuncs-0.0.65.dist-info}/top_level.txt +0 -0
rgwfuncs/__init__.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# This file is automatically generated
|
2
2
|
# Dynamically importing functions from modules
|
3
3
|
|
4
|
+
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, load_fresh_data_or_pull_from_cache, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
5
|
+
from .interactive_shell_lib import interactive_shell
|
4
6
|
from .algebra_lib import cancel_polynomial_expression, compute_constant_expression, compute_constant_expression_involving_matrices, compute_constant_expression_involving_ordered_series, compute_prime_factors, expand_polynomial_expression, factor_polynomial_expression, plot_polynomial_functions, plot_x_points_of_polynomial_functions, python_polynomial_expression_to_latex, simplify_polynomial_expression, solve_homogeneous_polynomial_expression
|
5
|
-
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
6
7
|
from .docs_lib import docs
|
7
|
-
from .interactive_shell_lib import interactive_shell
|
8
8
|
from .str_lib import send_telegram_message
|
rgwfuncs/df_lib.py
CHANGED
@@ -23,7 +23,7 @@ from googleapiclient.discovery import build
|
|
23
23
|
import base64
|
24
24
|
import boto3
|
25
25
|
# import inspect
|
26
|
-
from typing import Optional, Dict, List, Tuple, Any
|
26
|
+
from typing import Optional, Dict, List, Tuple, Any, Callable
|
27
27
|
import warnings
|
28
28
|
|
29
29
|
# Suppress all FutureWarnings
|
@@ -685,7 +685,7 @@ def bottom_n_unique_values(
|
|
685
685
|
report[column] = {
|
686
686
|
str(value): str(count) for value,
|
687
687
|
count in bottom_n_values.items()}
|
688
|
-
print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
|
688
|
+
print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
|
689
689
|
else:
|
690
690
|
print(f"Column '{column}' does not exist in the DataFrame.")
|
691
691
|
else:
|
@@ -1221,7 +1221,7 @@ def append_ranged_classification_column(
|
|
1221
1221
|
for r in range_list
|
1222
1222
|
)
|
1223
1223
|
|
1224
|
-
labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
|
1224
|
+
labels = [f"{pad_number(range_list[i], max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
|
1225
1225
|
|
1226
1226
|
# Ensure the target column is numeric
|
1227
1227
|
df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
|
@@ -1946,8 +1946,7 @@ def insert_dataframe_in_sqlite_database(
|
|
1946
1946
|
cursor = conn.cursor()
|
1947
1947
|
|
1948
1948
|
if not table_exists(cursor, tablename):
|
1949
|
-
columns_with_types = ', '.join(
|
1950
|
-
f'"{col}" {map_dtype(dtype)}' for col, dtype in zip(df.columns, df.dtypes))
|
1949
|
+
columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col, dtype in zip(df.columns, df.dtypes))
|
1951
1950
|
create_table_query = f'CREATE TABLE "{tablename}" ({columns_with_types})'
|
1952
1951
|
conn.execute(create_table_query)
|
1953
1952
|
|
@@ -1992,7 +1991,7 @@ def sync_dataframe_to_sqlite_database(
|
|
1992
1991
|
cursor.execute(f"PRAGMA table_info({new_table_name})")
|
1993
1992
|
if cursor.fetchall() == []: # Table does not exist
|
1994
1993
|
# Create a table using the DataFrame's column names and types
|
1995
|
-
columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
|
1994
|
+
columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col, dtype in zip(df.columns, df.dtypes))
|
1996
1995
|
create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
|
1997
1996
|
conn.execute(create_table_query)
|
1998
1997
|
|
@@ -2009,3 +2008,65 @@ def sync_dataframe_to_sqlite_database(
|
|
2009
2008
|
conn.execute(f"DROP TABLE IF EXISTS {tablename}")
|
2010
2009
|
# Rename the new table to the old table name
|
2011
2010
|
conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
|
2011
|
+
|
2012
|
+
|
2013
|
+
def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
|
2014
|
+
"""
|
2015
|
+
Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
|
2016
|
+
|
2017
|
+
This function checks a specified directory for the most recent cache file matching a specified prefix.
|
2018
|
+
If a recent cache file (within the cutoff time in hours) is found, the data is read from there.
|
2019
|
+
Otherwise, it calls the data-fetching function, saves the newly fetched data to a new cache file, and returns it.
|
2020
|
+
|
2021
|
+
Parameters:
|
2022
|
+
- fetch_func (typing.Callable[[], pd.DataFrame]):
|
2023
|
+
A callable function that, when executed, returns a pandas DataFrame with fresh data.
|
2024
|
+
- cache_dir (str):
|
2025
|
+
The directory where cache files are stored.
|
2026
|
+
- file_prefix (str):
|
2027
|
+
The prefix used for cache filenames to identify relevant cache files.
|
2028
|
+
- cache_cutoff_hours (int):
|
2029
|
+
The maximum age of a cache file (in hours) to be considered valid.
|
2030
|
+
If no file is fresh enough, fresh data will be fetched.
|
2031
|
+
|
2032
|
+
Returns:
|
2033
|
+
- pd.DataFrame:
|
2034
|
+
The pandas DataFrame containing either cached or freshly fetched data.
|
2035
|
+
"""
|
2036
|
+
|
2037
|
+
# Ensure the directory exists
|
2038
|
+
os.makedirs(cache_dir, exist_ok=True)
|
2039
|
+
|
2040
|
+
# Generate the current timestamp in the required format
|
2041
|
+
now: datetime = datetime.now()
|
2042
|
+
|
2043
|
+
# Initialize cache file details
|
2044
|
+
latest_cache_filename: str = None
|
2045
|
+
latest_cache_time: datetime = None
|
2046
|
+
|
2047
|
+
# Retrieve the latest cache file if it exists
|
2048
|
+
for filename in os.listdir(cache_dir):
|
2049
|
+
if filename.startswith(file_prefix) and filename.endswith(".csv"):
|
2050
|
+
timestamp_str: str = filename[len(file_prefix)+1:].replace('.csv', '')
|
2051
|
+
try:
|
2052
|
+
file_time: datetime = datetime.strptime(timestamp_str, '%Y%m%d%H%M%S')
|
2053
|
+
if latest_cache_time is None or file_time > latest_cache_time:
|
2054
|
+
latest_cache_time = file_time
|
2055
|
+
latest_cache_filename = filename
|
2056
|
+
except ValueError:
|
2057
|
+
continue
|
2058
|
+
|
2059
|
+
# If a valid cache exists and is within the cutoff time, read from it
|
2060
|
+
if latest_cache_time and now - latest_cache_time < timedelta(hours=cache_cutoff_hours):
|
2061
|
+
df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename))
|
2062
|
+
else:
|
2063
|
+
# Fetch new data via the provided function
|
2064
|
+
df = fetch_func()
|
2065
|
+
|
2066
|
+
# Save the new data in a cache file
|
2067
|
+
current_time_str: str = now.strftime('%Y%m%d%H%M%S')
|
2068
|
+
cache_filename: str = f"{file_prefix}_{current_time_str}.csv"
|
2069
|
+
df.to_csv(os.path.join(cache_dir, cache_filename), index=False)
|
2070
|
+
|
2071
|
+
return df
|
2072
|
+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.65
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -1713,7 +1713,34 @@ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
|
|
1713
1713
|
|
1714
1714
|
--------------------------------------------------------------------------------
|
1715
1715
|
|
1716
|
+
### 46. `load_fresh_data_or_pull_from_cache`
|
1717
|
+
Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, and returns it. If the cache is too old or doesn't exist, it uses a fetching function to get new data, which it caches and returns.
|
1716
1718
|
|
1719
|
+
• Parameters:
|
1720
|
+
- `fetch_func` (typing.Callable[[], pd.DataFrame]): A callable function that fetches fresh data and returns it as a pandas DataFrame.
|
1721
|
+
- `cache_dir` (str): The directory where cache files are stored.
|
1722
|
+
- `file_prefix` (str): The prefix used for cache filenames to identify relevant cache files.
|
1723
|
+
- `cache_cutoff_hours` (int): The age in hours beyond which a cache file is considered obsolete.
|
1724
|
+
|
1725
|
+
• Returns:
|
1726
|
+
- `pd.DataFrame`: The DataFrame containing cached or freshly fetched data.
|
1727
|
+
|
1728
|
+
• Example:
|
1729
|
+
|
1730
|
+
from rgwfuncs import load_fresh_data_or_pull_from_cache
|
1731
|
+
import pandas as pd
|
1732
|
+
|
1733
|
+
def fetch_data():
|
1734
|
+
# This is your data-fetching logic. Replace with real fetching code.
|
1735
|
+
return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': [4, 5, 6]})
|
1736
|
+
|
1737
|
+
cache_dir = 'cache_directory'
|
1738
|
+
file_prefix = 'cached_data'
|
1739
|
+
cache_cutoff_hours = 24
|
1740
|
+
|
1741
|
+
df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours)
|
1742
|
+
|
1743
|
+
--------------------------------------------------------------------------------
|
1717
1744
|
|
1718
1745
|
## Additional Info
|
1719
1746
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
|
2
|
+
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
+
rgwfuncs/df_lib.py,sha256=FdyGzXxBJUImJYfa0oYiqAfbF581180w-KspG8--pBc,70895
|
4
|
+
rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
|
5
|
+
rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
|
6
|
+
rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
|
7
|
+
rgwfuncs-0.0.65.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.65.dist-info/METADATA,sha256=pI0mJoVRg7f6vm3S2Fm3KI_-KoBlQvJqp06kvAOF-Ic,60288
|
9
|
+
rgwfuncs-0.0.65.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
+
rgwfuncs-0.0.65.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.65.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.65.dist-info/RECORD,,
|
rgwfuncs-0.0.63.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=-rcdj4_9zq82h0Tl00S9GvEqDYh7yhPCNhnhBs3mZCg,1676
|
2
|
-
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=eGEMVv-vRRtH-tGDpBK6M73QOLz5GIGcqUl8UuNyFw8,68158
|
4
|
-
rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
|
5
|
-
rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
|
6
|
-
rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
|
7
|
-
rgwfuncs-0.0.63.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
-
rgwfuncs-0.0.63.dist-info/METADATA,sha256=Cb9W5hAQLMD8nTRWNZ1GCOSesld1R1Bo4BIw0RHjlek,58951
|
9
|
-
rgwfuncs-0.0.63.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
-
rgwfuncs-0.0.63.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
-
rgwfuncs-0.0.63.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
-
rgwfuncs-0.0.63.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|