rgwfuncs 0.0.93__py3-none-any.whl → 0.0.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +6 -4
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.94.dist-info}/METADATA +18 -11
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.94.dist-info}/RECORD +7 -7
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.94.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.94.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.94.dist-info}/licenses/LICENSE +0 -0
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.94.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -2085,7 +2085,7 @@ def sync_dataframe_to_sqlite_database(
|
|
2085
2085
|
conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
|
2086
2086
|
|
2087
2087
|
|
2088
|
-
def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
|
2088
|
+
def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int, dtype: dict = None) -> pd.DataFrame:
|
2089
2089
|
"""
|
2090
2090
|
Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
|
2091
2091
|
|
@@ -2103,16 +2103,18 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
|
|
2103
2103
|
- cache_cutoff_hours (int):
|
2104
2104
|
The maximum age of a cache file (in hours) to be considered valid.
|
2105
2105
|
If no file is fresh enough, fresh data will be fetched.
|
2106
|
+
- dtype (dict, optional):
|
2107
|
+
A dictionary specifying the data types for columns when reading the CSV cache file.
|
2108
|
+
Passed to pd.read_csv() to handle mixed-type columns explicitly. Defaults to None.
|
2106
2109
|
|
2107
2110
|
Returns:
|
2108
2111
|
- pd.DataFrame:
|
2109
2112
|
The pandas DataFrame containing either cached or freshly fetched data.
|
2110
2113
|
"""
|
2111
|
-
|
2112
2114
|
# Ensure the directory exists
|
2113
2115
|
os.makedirs(cache_dir, exist_ok=True)
|
2114
2116
|
|
2115
|
-
# Generate the current timestamp
|
2117
|
+
# Generate the current timestamp
|
2116
2118
|
now: datetime = datetime.now()
|
2117
2119
|
|
2118
2120
|
# Initialize cache file details
|
@@ -2133,7 +2135,7 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
|
|
2133
2135
|
|
2134
2136
|
# If a valid cache exists and is within the cutoff time, read from it
|
2135
2137
|
if latest_cache_time and now - latest_cache_time < timedelta(hours=cache_cutoff_hours):
|
2136
|
-
df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename))
|
2138
|
+
df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename), dtype=dtype)
|
2137
2139
|
else:
|
2138
2140
|
# Fetch new data via the provided function
|
2139
2141
|
df = fetch_func()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.94
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -1734,32 +1734,39 @@ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
|
|
1734
1734
|
--------------------------------------------------------------------------------
|
1735
1735
|
|
1736
1736
|
### 46. `load_fresh_data_or_pull_from_cache`
|
1737
|
-
Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, and returns it. If the cache is too old or doesn't exist, it uses a fetching function to get new data, which it caches and returns.
|
1738
1737
|
|
1739
|
-
|
1740
|
-
- `fetch_func` (typing.Callable[[], pd.DataFrame]): A callable function that fetches fresh data and returns it as a pandas DataFrame.
|
1741
|
-
- `cache_dir` (str): The directory where cache files are stored.
|
1742
|
-
- `file_prefix` (str): The prefix used for cache filenames to identify relevant cache files.
|
1743
|
-
- `cache_cutoff_hours` (int): The age in hours beyond which a cache file is considered obsolete.
|
1738
|
+
Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, and returns it. If the cache is too old or doesn’t exist, it uses a fetching function to get new data, which it caches and returns. An optional `dtype` parameter allows specifying column data types when reading from the cache, preventing issues with mixed-type columns.
|
1744
1739
|
|
1745
|
-
|
1746
|
-
|
1740
|
+
#### Parameters:
|
1741
|
+
- **`fetch_func` (typing.Callable[[], pd.DataFrame])**: A callable function that fetches fresh data and returns it as a pandas DataFrame.
|
1742
|
+
- **`cache_dir` (str)**: The directory where cache files are stored.
|
1743
|
+
- **`file_prefix` (str)**: The prefix used for cache filenames to identify relevant cache files.
|
1744
|
+
- **`cache_cutoff_hours` (int)**: The age in hours beyond which a cache file is considered obsolete.
|
1745
|
+
- **`dtype` (dict, optional)**: A dictionary specifying the data types for columns when reading the CSV cache file. Passed to `pd.read_csv()` to handle mixed-type columns explicitly. Defaults to `None`, in which case pandas infers the types.
|
1747
1746
|
|
1748
|
-
|
1747
|
+
#### Returns:
|
1748
|
+
- **`pd.DataFrame`**: The DataFrame containing cached or freshly unmarked data.
|
1749
|
+
|
1750
|
+
#### Example:
|
1749
1751
|
|
1750
1752
|
from rgwfuncs import load_fresh_data_or_pull_from_cache
|
1751
1753
|
import pandas as pd
|
1752
1754
|
|
1753
1755
|
def fetch_data():
|
1754
1756
|
# This is your data-fetching logic. Replace with real fetching code.
|
1755
|
-
return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': [4, 5, 6]})
|
1757
|
+
return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': ['4', '5', '6']})
|
1756
1758
|
|
1757
1759
|
cache_dir = 'cache_directory'
|
1758
1760
|
file_prefix = 'cached_data'
|
1759
1761
|
cache_cutoff_hours = 24
|
1760
1762
|
|
1763
|
+
# Without dtype (pandas infers types)
|
1761
1764
|
df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours)
|
1762
1765
|
|
1766
|
+
# With dtype to handle mixed types
|
1767
|
+
df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours, dtype={'Column2': str})
|
1768
|
+
print(df)
|
1769
|
+
|
1763
1770
|
--------------------------------------------------------------------------------
|
1764
1771
|
|
1765
1772
|
## Additional Info
|
@@ -1,12 +1,12 @@
|
|
1
1
|
rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
|
2
2
|
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=
|
3
|
+
rgwfuncs/df_lib.py,sha256=SUEjUc8kCELtbQE2luMsBGh18aTWS97Wb5s3RdMcmHc,75750
|
4
4
|
rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
|
5
5
|
rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
|
6
6
|
rgwfuncs/str_lib.py,sha256=hE0VfP6rhQpczsKyCZvH3G1aMRwngKnkW3NTYCEc0Po,3208
|
7
|
-
rgwfuncs-0.0.
|
8
|
-
rgwfuncs-0.0.
|
9
|
-
rgwfuncs-0.0.
|
10
|
-
rgwfuncs-0.0.
|
11
|
-
rgwfuncs-0.0.
|
12
|
-
rgwfuncs-0.0.
|
7
|
+
rgwfuncs-0.0.94.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.94.dist-info/METADATA,sha256=K0ehKuNHmsn7IbtRWM8o7_323F8RUnYNtry2QHosFWo,62066
|
9
|
+
rgwfuncs-0.0.94.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
10
|
+
rgwfuncs-0.0.94.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.94.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.94.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|