rgwfuncs 0.0.65__py3-none-any.whl → 0.0.67__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +14 -9
- {rgwfuncs-0.0.65.dist-info → rgwfuncs-0.0.67.dist-info}/METADATA +1 -1
- {rgwfuncs-0.0.65.dist-info → rgwfuncs-0.0.67.dist-info}/RECORD +7 -7
- {rgwfuncs-0.0.65.dist-info → rgwfuncs-0.0.67.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.65.dist-info → rgwfuncs-0.0.67.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.65.dist-info → rgwfuncs-0.0.67.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.65.dist-info → rgwfuncs-0.0.67.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
|
|
2
2
|
import pymssql
|
3
3
|
import os
|
4
4
|
import json
|
5
|
-
from datetime import datetime
|
5
|
+
from datetime import datetime, timedelta
|
6
6
|
import time
|
7
7
|
import gc
|
8
8
|
import mysql.connector
|
@@ -509,6 +509,10 @@ def load_data_from_path(file_path: str) -> pd.DataFrame:
|
|
509
509
|
# Ensure the file path is absolute
|
510
510
|
file_path = os.path.abspath(file_path)
|
511
511
|
|
512
|
+
# Check if the file exists
|
513
|
+
if not os.path.isfile(file_path):
|
514
|
+
raise ValueError(f"File not found: {file_path}")
|
515
|
+
|
512
516
|
# Determine file type by extension
|
513
517
|
file_extension = file_path.split('.')[-1].lower()
|
514
518
|
|
@@ -518,6 +522,8 @@ def load_data_from_path(file_path: str) -> pd.DataFrame:
|
|
518
522
|
df.replace('', None, inplace=True)
|
519
523
|
elif file_extension in ['xls', 'xlsx']:
|
520
524
|
df = pd.read_excel(file_path)
|
525
|
+
elif file_extension == 'ods':
|
526
|
+
df = pd.read_excel(file_path, engine='odf')
|
521
527
|
elif file_extension == 'json':
|
522
528
|
df = pd.read_json(file_path)
|
523
529
|
elif file_extension == 'parquet':
|
@@ -2013,24 +2019,24 @@ def sync_dataframe_to_sqlite_database(
|
|
2013
2019
|
def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
|
2014
2020
|
"""
|
2015
2021
|
Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
|
2016
|
-
|
2022
|
+
|
2017
2023
|
This function checks a specified directory for the most recent cache file matching a specified prefix.
|
2018
2024
|
If a recent cache file (within the cutoff time in hours) is found, the data is read from there.
|
2019
2025
|
Otherwise, it calls the data-fetching function, saves the newly fetched data to a new cache file, and returns it.
|
2020
2026
|
|
2021
2027
|
Parameters:
|
2022
|
-
- fetch_func (typing.Callable[[], pd.DataFrame]):
|
2028
|
+
- fetch_func (typing.Callable[[], pd.DataFrame]):
|
2023
2029
|
A callable function that, when executed, returns a pandas DataFrame with fresh data.
|
2024
|
-
- cache_dir (str):
|
2030
|
+
- cache_dir (str):
|
2025
2031
|
The directory where cache files are stored.
|
2026
|
-
- file_prefix (str):
|
2032
|
+
- file_prefix (str):
|
2027
2033
|
The prefix used for cache filenames to identify relevant cache files.
|
2028
|
-
- cache_cutoff_hours (int):
|
2034
|
+
- cache_cutoff_hours (int):
|
2029
2035
|
The maximum age of a cache file (in hours) to be considered valid.
|
2030
2036
|
If no file is fresh enough, fresh data will be fetched.
|
2031
2037
|
|
2032
2038
|
Returns:
|
2033
|
-
- pd.DataFrame:
|
2039
|
+
- pd.DataFrame:
|
2034
2040
|
The pandas DataFrame containing either cached or freshly fetched data.
|
2035
2041
|
"""
|
2036
2042
|
|
@@ -2047,7 +2053,7 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
|
|
2047
2053
|
# Retrieve the latest cache file if it exists
|
2048
2054
|
for filename in os.listdir(cache_dir):
|
2049
2055
|
if filename.startswith(file_prefix) and filename.endswith(".csv"):
|
2050
|
-
timestamp_str: str = filename[len(file_prefix)+1:].replace('.csv', '')
|
2056
|
+
timestamp_str: str = filename[len(file_prefix) + 1:].replace('.csv', '')
|
2051
2057
|
try:
|
2052
2058
|
file_time: datetime = datetime.strptime(timestamp_str, '%Y%m%d%H%M%S')
|
2053
2059
|
if latest_cache_time is None or file_time > latest_cache_time:
|
@@ -2069,4 +2075,3 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
|
|
2069
2075
|
df.to_csv(os.path.join(cache_dir, cache_filename), index=False)
|
2070
2076
|
|
2071
2077
|
return df
|
2072
|
-
|
@@ -1,12 +1,12 @@
|
|
1
1
|
rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
|
2
2
|
rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
|
3
|
-
rgwfuncs/df_lib.py,sha256=
|
3
|
+
rgwfuncs/df_lib.py,sha256=r6T-MwyDq9NAPW1Xf6NzSy7ZFicIKdemR-UKu6TZt5g,71111
|
4
4
|
rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
|
5
5
|
rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
|
6
6
|
rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
|
7
|
-
rgwfuncs-0.0.
|
8
|
-
rgwfuncs-0.0.
|
9
|
-
rgwfuncs-0.0.
|
10
|
-
rgwfuncs-0.0.
|
11
|
-
rgwfuncs-0.0.
|
12
|
-
rgwfuncs-0.0.
|
7
|
+
rgwfuncs-0.0.67.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
|
8
|
+
rgwfuncs-0.0.67.dist-info/METADATA,sha256=N1PBQQAjEWCY0f4Fi3O0u5MqRCSKlCtGAnLO9WdK51k,60288
|
9
|
+
rgwfuncs-0.0.67.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
+
rgwfuncs-0.0.67.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
11
|
+
rgwfuncs-0.0.67.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
12
|
+
rgwfuncs-0.0.67.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|