rgwfuncs 0.0.65__tar.gz → 0.0.67__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {rgwfuncs-0.0.65/src/rgwfuncs.egg-info → rgwfuncs-0.0.67}/PKG-INFO +1 -1
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/pyproject.toml +1 -1
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/setup.cfg +1 -1
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs/df_lib.py +14 -9
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67/src/rgwfuncs.egg-info}/PKG-INFO +1 -1
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/LICENSE +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/README.md +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs/__init__.py +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs/algebra_lib.py +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs/docs_lib.py +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs/interactive_shell_lib.py +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs/str_lib.py +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs.egg-info/SOURCES.txt +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs.egg-info/dependency_links.txt +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs.egg-info/entry_points.txt +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs.egg-info/requires.txt +0 -0
- {rgwfuncs-0.0.65 → rgwfuncs-0.0.67}/src/rgwfuncs.egg-info/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import pandas as pd
|
|
2
2
|
import pymssql
|
3
3
|
import os
|
4
4
|
import json
|
5
|
-
from datetime import datetime
|
5
|
+
from datetime import datetime, timedelta
|
6
6
|
import time
|
7
7
|
import gc
|
8
8
|
import mysql.connector
|
@@ -509,6 +509,10 @@ def load_data_from_path(file_path: str) -> pd.DataFrame:
|
|
509
509
|
# Ensure the file path is absolute
|
510
510
|
file_path = os.path.abspath(file_path)
|
511
511
|
|
512
|
+
# Check if the file exists
|
513
|
+
if not os.path.isfile(file_path):
|
514
|
+
raise ValueError(f"File not found: {file_path}")
|
515
|
+
|
512
516
|
# Determine file type by extension
|
513
517
|
file_extension = file_path.split('.')[-1].lower()
|
514
518
|
|
@@ -518,6 +522,8 @@ def load_data_from_path(file_path: str) -> pd.DataFrame:
|
|
518
522
|
df.replace('', None, inplace=True)
|
519
523
|
elif file_extension in ['xls', 'xlsx']:
|
520
524
|
df = pd.read_excel(file_path)
|
525
|
+
elif file_extension == 'ods':
|
526
|
+
df = pd.read_excel(file_path, engine='odf')
|
521
527
|
elif file_extension == 'json':
|
522
528
|
df = pd.read_json(file_path)
|
523
529
|
elif file_extension == 'parquet':
|
@@ -2013,24 +2019,24 @@ def sync_dataframe_to_sqlite_database(
|
|
2013
2019
|
def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
|
2014
2020
|
"""
|
2015
2021
|
Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
|
2016
|
-
|
2022
|
+
|
2017
2023
|
This function checks a specified directory for the most recent cache file matching a specified prefix.
|
2018
2024
|
If a recent cache file (within the cutoff time in hours) is found, the data is read from there.
|
2019
2025
|
Otherwise, it calls the data-fetching function, saves the newly fetched data to a new cache file, and returns it.
|
2020
2026
|
|
2021
2027
|
Parameters:
|
2022
|
-
- fetch_func (typing.Callable[[], pd.DataFrame]):
|
2028
|
+
- fetch_func (typing.Callable[[], pd.DataFrame]):
|
2023
2029
|
A callable function that, when executed, returns a pandas DataFrame with fresh data.
|
2024
|
-
- cache_dir (str):
|
2030
|
+
- cache_dir (str):
|
2025
2031
|
The directory where cache files are stored.
|
2026
|
-
- file_prefix (str):
|
2032
|
+
- file_prefix (str):
|
2027
2033
|
The prefix used for cache filenames to identify relevant cache files.
|
2028
|
-
- cache_cutoff_hours (int):
|
2034
|
+
- cache_cutoff_hours (int):
|
2029
2035
|
The maximum age of a cache file (in hours) to be considered valid.
|
2030
2036
|
If no file is fresh enough, fresh data will be fetched.
|
2031
2037
|
|
2032
2038
|
Returns:
|
2033
|
-
- pd.DataFrame:
|
2039
|
+
- pd.DataFrame:
|
2034
2040
|
The pandas DataFrame containing either cached or freshly fetched data.
|
2035
2041
|
"""
|
2036
2042
|
|
@@ -2047,7 +2053,7 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
|
|
2047
2053
|
# Retrieve the latest cache file if it exists
|
2048
2054
|
for filename in os.listdir(cache_dir):
|
2049
2055
|
if filename.startswith(file_prefix) and filename.endswith(".csv"):
|
2050
|
-
timestamp_str: str = filename[len(file_prefix)+1:].replace('.csv', '')
|
2056
|
+
timestamp_str: str = filename[len(file_prefix) + 1:].replace('.csv', '')
|
2051
2057
|
try:
|
2052
2058
|
file_time: datetime = datetime.strptime(timestamp_str, '%Y%m%d%H%M%S')
|
2053
2059
|
if latest_cache_time is None or file_time > latest_cache_time:
|
@@ -2069,4 +2075,3 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
|
|
2069
2075
|
df.to_csv(os.path.join(cache_dir, cache_filename), index=False)
|
2070
2076
|
|
2071
2077
|
return df
|
2072
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|