rgwfuncs 0.0.93__py3-none-any.whl → 0.0.95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/df_lib.py CHANGED
@@ -2085,13 +2085,14 @@ def sync_dataframe_to_sqlite_database(
2085
2085
  conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
2086
2086
 
2087
2087
 
2088
- def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
2088
+ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int, dtype: dict = None) -> pd.DataFrame:
2089
2089
  """
2090
- Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
2090
+ Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, remove older cache files, and return it.
2091
2091
 
2092
2092
  This function checks a specified directory for the most recent cache file matching a specified prefix.
2093
2093
  If a recent cache file (within the cutoff time in hours) is found, the data is read from there.
2094
- Otherwise, it calls the data-fetching function, saves the newly fetched data to a new cache file, and returns it.
2094
+ Otherwise, it calls the data-fetching function, saves the newly fetched data to a new cache file,
2095
+ removes all earlier cache files with the same prefix, and returns the data.
2095
2096
 
2096
2097
  Parameters:
2097
2098
  - fetch_func (typing.Callable[[], pd.DataFrame]):
@@ -2103,16 +2104,18 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
2103
2104
  - cache_cutoff_hours (int):
2104
2105
  The maximum age of a cache file (in hours) to be considered valid.
2105
2106
  If no file is fresh enough, fresh data will be fetched.
2107
+ - dtype (dict, optional):
2108
+ A dictionary specifying the data types for columns when reading the CSV cache file.
2109
+ Passed to pd.read_csv() to handle mixed-type columns explicitly. Defaults to None.
2106
2110
 
2107
2111
  Returns:
2108
2112
  - pd.DataFrame:
2109
2113
  The pandas DataFrame containing either cached or freshly fetched data.
2110
2114
  """
2111
-
2112
2115
  # Ensure the directory exists
2113
2116
  os.makedirs(cache_dir, exist_ok=True)
2114
2117
 
2115
- # Generate the current timestamp in the required format
2118
+ # Generate the current timestamp
2116
2119
  now: datetime = datetime.now()
2117
2120
 
2118
2121
  # Initialize cache file details
@@ -2133,7 +2136,7 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
2133
2136
 
2134
2137
  # If a valid cache exists and is within the cutoff time, read from it
2135
2138
  if latest_cache_time and now - latest_cache_time < timedelta(hours=cache_cutoff_hours):
2136
- df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename))
2139
+ df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename), dtype=dtype)
2137
2140
  else:
2138
2141
  # Fetch new data via the provided function
2139
2142
  df = fetch_func()
@@ -2143,4 +2146,12 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
2143
2146
  cache_filename: str = f"{file_prefix}_{current_time_str}.csv"
2144
2147
  df.to_csv(os.path.join(cache_dir, cache_filename), index=False)
2145
2148
 
2149
+ # Remove all earlier cache files with the same prefix
2150
+ for filename in os.listdir(cache_dir):
2151
+ if filename.startswith(file_prefix) and filename.endswith(".csv") and filename != cache_filename:
2152
+ try:
2153
+ os.remove(os.path.join(cache_dir, filename))
2154
+ except OSError:
2155
+ continue
2156
+
2146
2157
  return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rgwfuncs
3
- Version: 0.0.93
3
+ Version: 0.0.95
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1734,32 +1734,39 @@ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
1734
1734
  --------------------------------------------------------------------------------
1735
1735
 
1736
1736
  ### 46. `load_fresh_data_or_pull_from_cache`
1737
- Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, and returns it. If the cache is too old or doesn't exist, it uses a fetching function to get new data, which it caches and returns.
1738
1737
 
1739
- Parameters:
1740
- - `fetch_func` (typing.Callable[[], pd.DataFrame]): A callable function that fetches fresh data and returns it as a pandas DataFrame.
1741
- - `cache_dir` (str): The directory where cache files are stored.
1742
- - `file_prefix` (str): The prefix used for cache filenames to identify relevant cache files.
1743
- - `cache_cutoff_hours` (int): The age in hours beyond which a cache file is considered obsolete.
1738
+ Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, removes older cache files, and returns it. If the cache is too old or doesn’t exist, it uses a fetching function to get new data, which it caches and returns. When fresh data is fetched and saved, all earlier cache files with the same prefix are deleted to keep the cache directory clean. An optional `dtype` parameter allows specifying column data types when reading from the cache, preventing issues with mixed-type columns.
1744
1739
 
1745
- Returns:
1746
- - `pd.DataFrame`: The DataFrame containing cached or freshly fetched data.
1740
+ #### Parameters:
1741
+ - **`fetch_func` (typing.Callable[[], pd.DataFrame])**: A callable function that fetches fresh data and returns it as a pandas DataFrame.
1742
+ - **`cache_dir` (str)**: The directory where cache files are stored.
1743
+ - **`file_prefix` (str)**: The prefix used for cache filenames to identify relevant cache files.
1744
+ - **`cache_cutoff_hours` (int)**: The age in hours beyond which a cache file is considered obsolete.
1745
+ - **`dtype` (dict, optional)**: A dictionary specifying the data types for columns when reading the CSV cache file. Passed to `pd.read_csv()` to handle mixed-type columns explicitly. Defaults to `None`, in which case pandas infers the types.
1747
1746
 
1748
- Example:
1747
+ #### Returns:
1748
+ - **`pd.DataFrame`**: The DataFrame containing cached or freshly unmarked data.
1749
+
1750
+ #### Example:
1749
1751
 
1750
1752
  from rgwfuncs import load_fresh_data_or_pull_from_cache
1751
1753
  import pandas as pd
1752
1754
 
1753
1755
  def fetch_data():
1754
1756
  # This is your data-fetching logic. Replace with real fetching code.
1755
- return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': [4, 5, 6]})
1757
+ return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': ['4', '5', '6']})
1756
1758
 
1757
1759
  cache_dir = 'cache_directory'
1758
1760
  file_prefix = 'cached_data'
1759
1761
  cache_cutoff_hours = 24
1760
1762
 
1763
+ # Without dtype (pandas infers types)
1761
1764
  df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours)
1762
1765
 
1766
+ # With dtype to handle mixed types
1767
+ df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours, dtype={'Column2': str})
1768
+ print(df)
1769
+
1763
1770
  --------------------------------------------------------------------------------
1764
1771
 
1765
1772
  ## Additional Info
@@ -1,12 +1,12 @@
1
1
  rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
2
2
  rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
3
- rgwfuncs/df_lib.py,sha256=LHG6E-umLGVdDRWjziFrRb_YSlTronHv2QwEFBrTAt4,75528
3
+ rgwfuncs/df_lib.py,sha256=BoBICtf_uy1AOuduGenJemtTTpF4alalSq0Nuyy88F4,76207
4
4
  rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
5
5
  rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
6
6
  rgwfuncs/str_lib.py,sha256=hE0VfP6rhQpczsKyCZvH3G1aMRwngKnkW3NTYCEc0Po,3208
7
- rgwfuncs-0.0.93.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
- rgwfuncs-0.0.93.dist-info/METADATA,sha256=eEiNMD4k_feeK6mxhrVqlvDTRONwsPosvR-pj9hYlAI,61443
9
- rgwfuncs-0.0.93.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
- rgwfuncs-0.0.93.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
- rgwfuncs-0.0.93.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
- rgwfuncs-0.0.93.dist-info/RECORD,,
7
+ rgwfuncs-0.0.95.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
+ rgwfuncs-0.0.95.dist-info/METADATA,sha256=TPX6a5GjVlF_KIUytC2J_bi5nILxBSp9ArUoMuwRC8s,62223
9
+ rgwfuncs-0.0.95.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
+ rgwfuncs-0.0.95.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
+ rgwfuncs-0.0.95.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
+ rgwfuncs-0.0.95.dist-info/RECORD,,