PyPI - rgwfuncs - Versions diffs - 0.0.92__py3-none-any.whl → 0.0.94__py3-none-any.whl - Mend

rgwfuncs 0.0.92py3-none-any.whl → 0.0.94py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

rgwfuncs/df_lib.py CHANGED Viewed

@@ -1279,7 +1279,7 @@ def append_ranged_classification_column(df: pd.DataFrame, ranges: List[Union[int
                 decimal=True
             )
             if i == len(ranges) - 1:
-                label = f"{start} to infinity"
+                label = f"{start}+"
             else:
                 end = pad_number(
                     ranges[i + 1],
@@ -1287,7 +1287,7 @@ def append_ranged_classification_column(df: pd.DataFrame, ranges: List[Union[int
                     max_decimal_length,
                     decimal=True
                 )
-                label = f"{start} to {end}"
+                label = f"{start} - {end}"
             labels.append(label)
     else:
@@ -1298,10 +1298,10 @@ def append_ranged_classification_column(df: pd.DataFrame, ranges: List[Union[int
         for i in range(len(ranges)):
             start = pad_number(ranges[i], max_integer_length)
             if i == len(ranges) - 1:
-                label = f"{start} to infinity"
+                label = f"{start}+"
             else:
                 end = pad_number(ranges[i + 1], max_integer_length)
-                label = f"{start} to {end}"
+                label = f"{start} - {end}"
             labels.append(label)
     # Ensure the target column is numeric
@@ -1365,7 +1365,7 @@ def append_percentile_classification_column(df: pd.DataFrame, percentiles: List[
                 max_decimal_length,
                 decimal=True
             )
-            label = f"{start} to {end}"
+            label = f"{start} - {end}"
             labels.append(label)
     else:
         percentiles_list = [int(p) for p in percentiles]
@@ -1375,7 +1375,7 @@ def append_percentile_classification_column(df: pd.DataFrame, percentiles: List[
         for i in range(len(percentiles_list) - 1):
             start = pad_number(percentiles_list[i], max_integer_length)
             end = pad_number(percentiles_list[i + 1], max_integer_length)
-            label = f"{start} to {end}"
+            label = f"{start} - {end}"
             labels.append(label)
     # Ensure the target column is numeric
@@ -1412,7 +1412,7 @@ def append_ranged_date_classification_column(df: pd.DataFrame, date_ranges: list
     for i in range(len(date_list) - 1):
         start_date = date_list[i].strftime('%Y-%m-%d')
         end_date = date_list[i + 1].strftime('%Y-%m-%d')
-        label = f"{start_date} to {end_date}"
+        label = f"{start_date} - {end_date}"
         labels.append(label)
     df[new_col_name] = pd.cut(
@@ -2085,7 +2085,7 @@ def sync_dataframe_to_sqlite_database(
             conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
-def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int) -> pd.DataFrame:
+def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], cache_dir: str, file_prefix: str, cache_cutoff_hours: int, dtype: dict = None) -> pd.DataFrame:
     """
     Retrieve data from a cache if a recent cache file exists, or fetch fresh data, save it to the cache, and return it.
@@ -2103,16 +2103,18 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
     - cache_cutoff_hours (int):
         The maximum age of a cache file (in hours) to be considered valid.
         If no file is fresh enough, fresh data will be fetched.
+    - dtype (dict, optional):
+        A dictionary specifying the data types for columns when reading the CSV cache file.
+        Passed to pd.read_csv() to handle mixed-type columns explicitly. Defaults to None.
     Returns:
     - pd.DataFrame:
         The pandas DataFrame containing either cached or freshly fetched data.
     """
     # Ensure the directory exists
     os.makedirs(cache_dir, exist_ok=True)
-    # Generate the current timestamp in the required format
+    # Generate the current timestamp
     now: datetime = datetime.now()
     # Initialize cache file details
@@ -2133,7 +2135,7 @@ def load_fresh_data_or_pull_from_cache(fetch_func: Callable[[], pd.DataFrame], c
     # If a valid cache exists and is within the cutoff time, read from it
     if latest_cache_time and now - latest_cache_time < timedelta(hours=cache_cutoff_hours):
-        df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename))
+        df: pd.DataFrame = pd.read_csv(os.path.join(cache_dir, latest_cache_filename), dtype=dtype)
     else:
         # Fetch new data via the provided function
         df = fetch_func()

{rgwfuncs-0.0.92.dist-info → rgwfuncs-0.0.94.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rgwfuncs
-Version: 0.0.92
+Version: 0.0.94
 Summary: A functional programming paradigm for mathematical modelling and data science
 Home-page: https://github.com/ryangerardwilson/rgwfunc
 Author: Ryan Gerard Wilson
@@ -1734,32 +1734,39 @@ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
 --------------------------------------------------------------------------------
 ### 46. `load_fresh_data_or_pull_from_cache`
-Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, and returns it. If the cache is too old or doesn't exist, it uses a fetching function to get new data, which it caches and returns.
-• Parameters:
-  - `fetch_func` (typing.Callable[[], pd.DataFrame]): A callable function that fetches fresh data and returns it as a pandas DataFrame.
-  - `cache_dir` (str): The directory where cache files are stored.
-  - `file_prefix` (str): The prefix used for cache filenames to identify relevant cache files.
-  - `cache_cutoff_hours` (int): The age in hours beyond which a cache file is considered obsolete.
+Retrieves data from a cache if a recent cache file exists, or fetches fresh data, saves it to the cache, and returns it. If the cache is too old or doesn’t exist, it uses a fetching function to get new data, which it caches and returns. An optional `dtype` parameter allows specifying column data types when reading from the cache, preventing issues with mixed-type columns.
-• Returns:
-  - `pd.DataFrame`: The DataFrame containing cached or freshly fetched data.
+#### Parameters:
+- **`fetch_func` (typing.Callable[[], pd.DataFrame])**: A callable function that fetches fresh data and returns it as a pandas DataFrame.
+- **`cache_dir` (str)**: The directory where cache files are stored.
+- **`file_prefix` (str)**: The prefix used for cache filenames to identify relevant cache files.
+- **`cache_cutoff_hours` (int)**: The age in hours beyond which a cache file is considered obsolete.
+- **`dtype` (dict, optional)**: A dictionary specifying the data types for columns when reading the CSV cache file. Passed to `pd.read_csv()` to handle mixed-type columns explicitly. Defaults to `None`, in which case pandas infers the types.
-• Example:
+#### Returns:
+- **`pd.DataFrame`**: The DataFrame containing cached or freshly unmarked data.
+#### Example:
     from rgwfuncs import load_fresh_data_or_pull_from_cache
     import pandas as pd
     def fetch_data():
         # This is your data-fetching logic. Replace with real fetching code.
-        return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': [4, 5, 6]})
+        return pd.DataFrame({'Column1': [1, 2, 3], 'Column2': ['4', '5', '6']})
     cache_dir = 'cache_directory'
     file_prefix = 'cached_data'
     cache_cutoff_hours = 24
+    # Without dtype (pandas infers types)
     df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours)
+    # With dtype to handle mixed types
+    df = load_fresh_data_or_pull_from_cache(fetch_data, cache_dir, file_prefix, cache_cutoff_hours, dtype={'Column2': str})
+    print(df)
 --------------------------------------------------------------------------------
 ## Additional Info

{rgwfuncs-0.0.92.dist-info → rgwfuncs-0.0.94.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 rgwfuncs/__init__.py,sha256=LSn54Tlyskcb6Wab_wUpPLB6UGMe5LdrB3GU88mDEbU,1712
 rgwfuncs/algebra_lib.py,sha256=rKFITfpWfgdBswnbMUuS41XgndEt-jUVz2ObO_ik7eM,42234
-rgwfuncs/df_lib.py,sha256=uhP5qv1PTBNTuZSzUe_-Qwwtm20rPU8JpEQa8OEetHk,75555
+rgwfuncs/df_lib.py,sha256=SUEjUc8kCELtbQE2luMsBGh18aTWS97Wb5s3RdMcmHc,75750
 rgwfuncs/docs_lib.py,sha256=i63NzX-V8cGhikYdtkRGAEe2VcuwpXxDUyTRa9xI7l8,1972
 rgwfuncs/interactive_shell_lib.py,sha256=YN0ZnM5twIsOeDKuOQ9ZGURCvvBX0RZjM4a1vO1C3E8,4281
 rgwfuncs/str_lib.py,sha256=hE0VfP6rhQpczsKyCZvH3G1aMRwngKnkW3NTYCEc0Po,3208
-rgwfuncs-0.0.92.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
-rgwfuncs-0.0.92.dist-info/METADATA,sha256=Vx7bicfYGVHY2ER5s4gpjDdNsVYsfQx6_2kbLGS6EVU,61443
-rgwfuncs-0.0.92.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-rgwfuncs-0.0.92.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
-rgwfuncs-0.0.92.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
-rgwfuncs-0.0.92.dist-info/RECORD,,
+rgwfuncs-0.0.94.dist-info/licenses/LICENSE,sha256=jLvt20gcUZYB8UOvyBvyKQ1qhYYhD__qP7ZDx2lPFkU,1062
+rgwfuncs-0.0.94.dist-info/METADATA,sha256=K0ehKuNHmsn7IbtRWM8o7_323F8RUnYNtry2QHosFWo,62066
+rgwfuncs-0.0.94.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+rgwfuncs-0.0.94.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
+rgwfuncs-0.0.94.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
+rgwfuncs-0.0.94.dist-info/RECORD,,

{rgwfuncs-0.0.92.dist-info → rgwfuncs-0.0.94.dist-info}/WHEEL RENAMED Viewed

File without changes

{rgwfuncs-0.0.92.dist-info → rgwfuncs-0.0.94.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rgwfuncs-0.0.92.dist-info → rgwfuncs-0.0.94.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rgwfuncs-0.0.92.dist-info → rgwfuncs-0.0.94.dist-info}/top_level.txt RENAMED Viewed

File without changes

rgwfuncs 0.0.92__py3-none-any.whl → 0.0.94__py3-none-any.whl

rgwfuncs 0.0.92py3-none-any.whl → 0.0.94py3-none-any.whl