rgwfuncs 0.0.93__py3-none-any.whl → 0.0.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/df_lib.py CHANGED
@@ -2085,7 +2085,7 @@ def sync_dataframe_to_sqlite_database(
2085
2085
  conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
2086
2086
 
2087
2087
 
2088
- def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
2088
+ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int, dtype: dict = None) -> pd.DataFrame:
2089
2089
  """
2090
2090
  Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
2091
2091
 
@@ -2103,16 +2103,18 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
2103
2103
  - cache_cutoff_hours (int):
2104
2104
  The maximum age of a cache file (in hours) to be considered valid.
2105
2105
  If no file is fresh enough, fresh data will be fetched.
2106
+ - dtype (dict, optional):
2107
+ A dictionary specifying the data types for columns when reading the CSV cache file.
2108
+ Passed to pd.read_csv() to handle mixed-type columns explicitly. Defaults to None.
2106
2109
 
2107
2110
  Returns:
2108
2111
  - pd.DataFrame:
2109
2112
  The pandas DataFrame containing either cached or freshly fetched data.
2110
2113
  """
2111
-
2112
2114
  # Ensure the directory exists
2113
2115
  os.makedirs(cache_dir, exist_ok=True)
2114
2116
 
2115
- # Generate the current timestamp in the required format
2117
+ # Generate the current timestamp
2116
2118
  now: datetime = datetime.now()
2117
2119
 
2118
2120
  # Initialize cache file details
@@ -2133,7 +2135,7 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
2133
2135
 
2134
2136
  # If a valid cache exists and is within the cutoff time, read from it
2135
2137
  if latest_cache_time and now - latest_cache_time < timedelta(hours=cache_cutoff_hours):
2136
- df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename))
2138
+ df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename), dtype=dtype)
2137
2139
  else:
2138
2140
  # Fetch new data via the provided function
2139
2141
  df = fetch_func()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rgwfuncs
3
- Version: 0.0.93
3
+ Version: 0.0.94
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1734,32 +1734,39 @@ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
1734
1734
  --------------------------------------------------------------------------------
1735
1735
 
1736
1736
  ### 46. `load_fresh_data_or_pull_from_cache`
1737
- Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, and returns it. If the cache is too old or doesn't exist, it uses a fetching function to get new data, which it caches and returns.
1738
1737
 
1739
- Parameters:
1740
- - `fetch_func` (typing.Callable[[], pd.DataFrame]): A callable function that fetches fresh data and returns it as a pandas DataFrame.
1741
- - `cache_dir` (str): The directory where cache files are stored.
1742
- - `file_prefix` (str): The prefix used for cache filenames to identify relevant cache files.
1743
- - `cache_cutoff_hours` (int): The age in hours beyond which a cache file is considered obsolete.
1738
+ Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, and returns it. If the cache is too old or doesn’t exist, it uses a fetching function to get new data, which it caches and returns. An optional `dtype` parameter allows specifying column data types when reading from the cache, preventing issues with mixed-type columns.
1744
1739
 
1745
- Returns:
1746
- - `pd.DataFrame`: The DataFrame containing cached or freshly fetched data.
1740
+ #### Parameters:
1741
+ - **`fetch_func` (typing.Callable[[], pd.DataFrame])**: A callable function that fetches fresh data and returns it as a pandas DataFrame.
1742
+ - **`cache_dir` (str)**: The directory where cache files are stored.
1743
+ - **`file_prefix` (str)**: The prefix used for cache filenames to identify relevant cache files.
1744
+ - **`cache_cutoff_hours` (int)**: The age in hours beyond which a cache file is considered obsolete.
1745
+ - **`dtype` (dict, optional)**: A dictionary specifying the data types for columns when reading the CSV cache file. Passed to `pd.read_csv()` to handle mixed-type columns explicitly. Defaults to `None`, in which case pandas infers the types.
1747
1746
 
1748
- Example:
1747
+ #### Returns:
1748
+ - **`pd.DataFrame`**: The DataFrame containing cached or freshly unmarked data.
1749
+
1750
+ #### Example:
1749
1751
 
1750
1752
  from rgwfuncs import load_fresh_data_or_pull_from_cache
1751
1753
  import pandas as pd
1752
1754
 
1753
1755
  def fetch_data():
1754
1756
  # This is your data-fetching logic. Replace with real fetching code.
1755
- return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': [4, 5, 6]})
1757
+ return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': ['4', '5', '6']})
1756
1758
 
1757
1759
  cache_dir = 'cache_directory'
1758
1760
  file_prefix = 'cached_data'
1759
1761
  cache_cutoff_hours = 24
1760
1762
 
1763
+ # Without dtype (pandas infers types)
1761
1764
  df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours)
1762
1765
 
1766
+ # With dtype to handle mixed types
1767
+ df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours, dtype={'Column2': str})
1768
+ print(df)
1769
+
1763
1770
  --------------------------------------------------------------------------------
1764
1771
 
1765
1772
  ## Additional Info
@@ -1,12 +1,12 @@
1
1
  rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
2
2
  rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
3
- rgwfuncs/df_lib.py,sha256=LHG6E-umLGVdDRWjziFrRb_YSlTronHv2QwEFBrTAt4,75528
3
+ rgwfuncs/df_lib.py,sha256=SUEjUc8kCELtbQE2luMsBGh18aTWS97Wb5s3RdMcmHc,75750
4
4
  rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
5
5
  rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
6
6
  rgwfuncs/str_lib.py,sha256=hE0VfP6rhQpczsKyCZvH3G1aMRwngKnkW3NTYCEc0Po,3208
7
- rgwfuncs-0.0.93.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
- rgwfuncs-0.0.93.dist-info/METADATA,sha256=eEiNMD4k_feeK6mxhrVqlvDTRONwsPosvR-pj9hYlAI,61443
9
- rgwfuncs-0.0.93.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
- rgwfuncs-0.0.93.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
- rgwfuncs-0.0.93.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
- rgwfuncs-0.0.93.dist-info/RECORD,,
7
+ rgwfuncs-0.0.94.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
+ rgwfuncs-0.0.94.dist-info/METADATA,sha256=K0ehKuNHmsn7IbtRWM8o7_323F8RUnYNtry2QHosFWo,62066
9
+ rgwfuncs-0.0.94.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
+ rgwfuncs-0.0.94.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
+ rgwfuncs-0.0.94.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
+ rgwfuncs-0.0.94.dist-info/RECORD,,