rgwfuncs 0.0.93__py3-none-any.whl → 0.0.95__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +17 -6
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.95.dist-info}/METADATA +18 -11
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.95.dist-info}/RECORD +7 -7
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.95.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.95.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.95.dist-info}/licenses/LICENSE +0 -0
- {rgwfuncs-0.0.93.dist-info → rgwfuncs-0.0.95.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -2085,13 +2085,14 @@ def sync_dataframe_to_sqlite_database(
|
|
2085
2085
|
conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
|
2086
2086
|
|
2087
2087
|
|
2088
|
-
def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
|
2088
|
+
def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int, dtype: dict = None) -> pd.DataFrame:
|
2089
2089
|
"""
|
2090
|
-
Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
|
2090
|
+
Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, remove older cache files, and return it.
|
2091
2091
|
|
2092
2092
|
This function checks a specified directory for the most recent cache file matching a specified prefix.
|
2093
2093
|
If a recent cache file (within the cutoff time in hours) is found, the data is read from there.
|
2094
|
-
Otherwise, it calls the data-fetching function, saves the newly fetched data to a new cache file,
|
2094
|
+
Otherwise, it calls the data-fetching function, saves the newly fetched data to a new cache file,
|
2095
|
+
removes all earlier cache files with the same prefix, and returns the data.
|
2095
2096
|
|
2096
2097
|
Parameters:
|
2097
2098
|
- fetch_func (typing.Callable[[], pd.DataFrame]):
|
@@ -2103,16 +2104,18 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
|
|
2103
2104
|
- cache_cutoff_hours (int):
|
2104
2105
|
The maximum age of a cache file (in hours) to be considered valid.
|
2105
2106
|
If no file is fresh enough, fresh data will be fetched.
|
2107
|
+
- dtype (dict, optional):
|
2108
|
+
A dictionary specifying the data types for columns when reading the CSV cache file.
|
2109
|
+
Passed to pd.read_csv() to handle mixed-type columns explicitly. Defaults to None.
|
2106
2110
|
|
2107
2111
|
Returns:
|
2108
2112
|
- pd.DataFrame:
|
2109
2113
|
The pandas DataFrame containing either cached or freshly fetched data.
|
2110
2114
|
"""
|
2111
|
-
|
2112
2115
|
# Ensure the directory exists
|
2113
2116
|
os.makedirs(cache_dir, exist_ok=True)
|
2114
2117
|
|
2115
|
-
# Generate the current timestamp
|
2118
|
+
# Generate the current timestamp
|
2116
2119
|
now: datetime = datetime.now()
|
2117
2120
|
|
2118
2121
|
# Initialize cache file details
|
@@ -2133,7 +2136,7 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
|
|
2133
2136
|
|
2134
2137
|
# If a valid cache exists and is within the cutoff time, read from it
|
2135
2138
|
if latest_cache_time and now - latest_cache_time < timedelta(hours=cache_cutoff_hours):
|
2136
|
-
df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename))
|
2139
|
+
df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename), dtype=dtype)
|
2137
2140
|
else:
|
2138
2141
|
# Fetch new data via the provided function
|
2139
2142
|
df = fetch_func()
|
@@ -2143,4 +2146,12 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
|
|
2143
2146
|
cache_filename: str = f"{file_prefix}_{current_time_str}.csv"
|
2144
2147
|
df.to_csv(os.path.join(cache_dir, cache_filename), index=False)
|
2145
2148
|
|
2149
|
+
# Remove all earlier cache files with the same prefix
|
2150
|
+
for filename in os.listdir(cache_dir):
|
2151
|
+
if filename.startswith(file_prefix) and filename.endswith(".csv") and filename != cache_filename:
|
2152
|
+
try:
|
2153
|
+
os.remove(os.path.join(cache_dir, filename))
|
2154
|
+
except OSError:
|
2155
|
+
continue
|
2156
|
+
|
2146
2157
|
return df
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.95
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -1734,32 +1734,39 @@ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
|
|
1734
1734
|
--------------------------------------------------------------------------------
|
1735
1735
|
|
1736
1736
|
### 46. `load_fresh_data_or_pull_from_cache`
|
1737
|
-
Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, and returns it. If the cache is too old or doesn't exist, it uses a fetching function to get new data, which it caches and returns.
|
1738
1737
|
|
1739
|
-
|
1740
|
-
- `fetch_func` (typing.Callable[[], pd.DataFrame]): A callable function that fetches fresh data and returns it as a pandas DataFrame.
|
1741
|
-
- `cache_dir` (str): The directory where cache files are stored.
|
1742
|
-
- `file_prefix` (str): The prefix used for cache filenames to identify relevant cache files.
|
1743
|
-
- `cache_cutoff_hours` (int): The age in hours beyond which a cache file is considered obsolete.
|
1738
|
+
Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, removes older cache files, and returns it. If the cache is too old or doesn’t exist, it uses a fetching function to get new data, which it caches and returns. When fresh data is fetched and saved, all earlier cache files with the same prefix are deleted to keep the cache directory clean. An optional `dtype` parameter allows specifying column data types when reading from the cache, preventing issues with mixed-type columns.
|
1744
1739
|
|
1745
|
-
|
1746
|
-
|
1740
|
+
#### Parameters:
|
1741
|
+
- **`fetch_func` (typing.Callable[[], pd.DataFrame])**: A callable function that fetches fresh data and returns it as a pandas DataFrame.
|
1742
|
+
- **`cache_dir` (str)**: The directory where cache files are stored.
|
1743
|
+
- **`file_prefix` (str)**: The prefix used for cache filenames to identify relevant cache files.
|
1744
|
+
- **`cache_cutoff_hours` (int)**: The age in hours beyond which a cache file is considered obsolete.
|
1745
|
+
- **`dtype` (dict, optional)**: A dictionary specifying the data types for columns when reading the CSV cache file. Passed to `pd.read_csv()` to handle mixed-type columns explicitly. Defaults to `None`, in which case pandas infers the types.
|
1747
1746
|
|
1748
|
-
|
1747
|
+
#### Returns:
|
1748
|
+
- **`pd.DataFrame`**: The DataFrame containing cached or freshly unmarked data.
|
1749
|
+
|
1750
|
+
#### Example:
|
1749
1751
|
|
1750
1752
|
from rgwfuncs import load_fresh_data_or_pull_from_cache
|
1751
1753
|
import pandas as pd
|
1752
1754
|
|
1753
1755
|
def fetch_data():
|
1754
1756
|
# This is your data-fetching logic. Replace with real fetching code.
|
1755
|
-
return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': [4, 5, 6]})
|
1757
|
+
return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': ['4', '5', '6']})
|
1756
1758
|
|
1757
1759
|
cache_dir = 'cache_directory'
|
1758
1760
|
file_prefix = 'cached_data'
|
1759
1761
|
cache_cutoff_hours = 24
|
1760
1762
|
|
1763
|
+
# Without dtype (pandas infers types)
|
1761
1764
|
df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours)
|
1762
1765
|
|
1766
|
+
# With dtype to handle mixed types
|
1767
|
+
df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours, dtype={'Column2': str})
|
1768
|
+
print(df)
|
1769
|
+
|
1763
1770
|
--------------------------------------------------------------------------------
|
1764
1771
|
|
1765
1772
|
## Additional Info
|
@@ -1,12 +1,12 @@
|
|
1
1
|
rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
|
2
2
|
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=
|
3
|
+
rgwfuncs/df_lib.py,sha256=BoBICtf_uy1AOuduGenJemtTTpF4alalSq0Nuyy88F4,76207
|
4
4
|
rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
|
5
5
|
rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
|
6
6
|
rgwfuncs/str_lib.py,sha256=hE0VfP6rhQpczsKyCZvH3G1aMRwngKnkW3NTYCEc0Po,3208
|
7
|
-
rgwfuncs-0.0.
|
8
|
-
rgwfuncs-0.0.
|
9
|
-
rgwfuncs-0.0.
|
10
|
-
rgwfuncs-0.0.
|
11
|
-
rgwfuncs-0.0.
|
12
|
-
rgwfuncs-0.0.
|
7
|
+
rgwfuncs-0.0.95.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.95.dist-info/METADATA,sha256=TPX6a5GjVlF_KIUytC2J_bi5nILxBSp9ArUoMuwRC8s,62223
|
9
|
+
rgwfuncs-0.0.95.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
10
|
+
rgwfuncs-0.0.95.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.95.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.95.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|