rgwfuncs 0.0.65__py3-none-any.whl → 0.0.67__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
rgwfuncs/df_lib.py CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
2
2
  import pymssql
3
3
  import os
4
4
  import json
5
- from datetime import datetime
5
+ from datetime import datetime, timedelta
6
6
  import time
7
7
  import gc
8
8
  import mysql.connector
@@ -509,6 +509,10 @@ def load_data_from_path(file_path: str) -> pd.DataFrame:
509
509
  # Ensure the file path is absolute
510
510
  file_path = os.path.abspath(file_path)
511
511
 
512
+ # Check if the file exists
513
+ if not os.path.isfile(file_path):
514
+ raise ValueError(f"File not found: {file_path}")
515
+
512
516
  # Determine file type by extension
513
517
  file_extension = file_path.split('.')[-1].lower()
514
518
 
@@ -518,6 +522,8 @@ def load_data_from_path(file_path: str) -> pd.DataFrame:
518
522
  df.replace('', None, inplace=True)
519
523
  elif file_extension in ['xls', 'xlsx']:
520
524
  df = pd.read_excel(file_path)
525
+ elif file_extension == 'ods':
526
+ df = pd.read_excel(file_path, engine='odf')
521
527
  elif file_extension == 'json':
522
528
  df = pd.read_json(file_path)
523
529
  elif file_extension == 'parquet':
@@ -2013,24 +2019,24 @@ def sync_dataframe_to_sqlite_database(
2013
2019
  def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
2014
2020
  """
2015
2021
  Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
2016
-
2022
+
2017
2023
  This function checks a specified directory for the most recent cache file matching a specified prefix.
2018
2024
  If a recent cache file (within the cutoff time in hours) is found, the data is read from there.
2019
2025
  Otherwise, it calls the data-fetching function, saves the newly fetched data to a new cache file, and returns it.
2020
2026
 
2021
2027
  Parameters:
2022
- - fetch_func (typing.Callable[[], pd.DataFrame]):
2028
+ - fetch_func (typing.Callable[[], pd.DataFrame]):
2023
2029
  A callable function that, when executed, returns a pandas DataFrame with fresh data.
2024
- - cache_dir (str):
2030
+ - cache_dir (str):
2025
2031
  The directory where cache files are stored.
2026
- - file_prefix (str):
2032
+ - file_prefix (str):
2027
2033
  The prefix used for cache filenames to identify relevant cache files.
2028
- - cache_cutoff_hours (int):
2034
+ - cache_cutoff_hours (int):
2029
2035
  The maximum age of a cache file (in hours) to be considered valid.
2030
2036
  If no file is fresh enough, fresh data will be fetched.
2031
2037
 
2032
2038
  Returns:
2033
- - pd.DataFrame:
2039
+ - pd.DataFrame:
2034
2040
  The pandas DataFrame containing either cached or freshly fetched data.
2035
2041
  """
2036
2042
 
@@ -2047,7 +2053,7 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
2047
2053
  # Retrieve the latest cache file if it exists
2048
2054
  for filename in os.listdir(cache_dir):
2049
2055
  if filename.startswith(file_prefix) and filename.endswith(".csv"):
2050
- timestamp_str: str = filename[len(file_prefix)+1:].replace('.csv', '')
2056
+ timestamp_str: str = filename[len(file_prefix) + 1:].replace('.csv', '')
2051
2057
  try:
2052
2058
  file_time: datetime = datetime.strptime(timestamp_str, '%Y%m%d%H%M%S')
2053
2059
  if latest_cache_time is None or file_time > latest_cache_time:
@@ -2069,4 +2075,3 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
2069
2075
  df.to_csv(os.path.join(cache_dir, cache_filename), index=False)
2070
2076
 
2071
2077
  return df
2072
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.65
3
+ Version: 0.0.67
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1,12 +1,12 @@
1
1
  rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
2
2
  rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
3
- rgwfuncs/df_lib.py,sha256=FdyGzXxBJUImJYfa0oYiqAfbF581180w-KspG8--pBc,70895
3
+ rgwfuncs/df_lib.py,sha256=r6T-MwyDq9NAPW1Xf6NzSy7ZFicIKdemR-UKu6TZt5g,71111
4
4
  rgwfuncs/docs_lib.py,sha256=y3wSAOPO3qsA4HZ7xAtW8HimM8w-c8hjcEzMRLJ96ao,1960
5
5
  rgwfuncs/interactive_shell_lib.py,sha256=A7EWsYxAfDev_N0-2GjRvAtp0bAwBPHIczXb8Gu9fzI,1107
6
6
  rgwfuncs/str_lib.py,sha256=rtAdRlnSJIu3JhI-tA_A0wCiPK2m-zn5RoGpBxv_g-4,2228
7
- rgwfuncs-0.0.65.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
- rgwfuncs-0.0.65.dist-info/METADATA,sha256=pI0mJoVRg7f6vm3S2Fm3KI_-KoBlQvJqp06kvAOF-Ic,60288
9
- rgwfuncs-0.0.65.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
- rgwfuncs-0.0.65.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
- rgwfuncs-0.0.65.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
- rgwfuncs-0.0.65.dist-info/RECORD,,
7
+ rgwfuncs-0.0.67.dist-info/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
8
+ rgwfuncs-0.0.67.dist-info/METADATA,sha256=N1PBQQAjEWCY0f4Fi3O0u5MqRCSKlCtGAnLO9WdK51k,60288
9
+ rgwfuncs-0.0.67.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
+ rgwfuncs-0.0.67.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
11
+ rgwfuncs-0.0.67.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
12
+ rgwfuncs-0.0.67.dist-info/RECORD,,