PyPI - rgwfuncs - Versions diffs - 0.0.58__tar.gz → 0.0.60__tar.gz - Mend

rgwfuncs 0.0.58tar.gz → 0.0.60tar.gz

Files changed (17) hide show

{rgwfuncs-0.0.58/src/rgwfuncs.egg-info → rgwfuncs-0.0.60}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: rgwfuncs
-Version: 0.0.58
+Version: 0.0.60
 Summary: A functional programming paradigm for mathematical modelling and data science
 Home-page: https://github.com/ryangerardwilson/rgwfunc
 Author: Ryan Gerard Wilson

{rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "rgwfuncs"
-version = "0.0.58"
+version = "0.0.60"
 authors = [
   { name = "Ryan Gerard Wilson", email = "ryangerardwilson@gmail.com" },
 ]

{rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = rgwfuncs
-version = 0.0.58
+version = 0.0.60
 author = Ryan Gerard Wilson
 author_email = ryangerardwilson@gmail.com
 description = A functional programming paradigm for mathematical modelling and data science

{rgwfuncs-0.0.58 → rgwfuncs-0.0.60}/src/rgwfuncs/df_lib.py RENAMED Viewed

@@ -212,9 +212,7 @@ def update_rows(
     invalid_cols = [col for col in updates if col not in df.columns]
     if invalid_cols:
-        raise ValueError(
-            f"Columns {
-                ', '.join(invalid_cols)} do not exist in the DataFrame.")
+        raise ValueError(f"Columns {', '.join(invalid_cols)} do not exist in the DataFrame.")
     new_df = df.copy()
     for col_name, new_value in updates.items():
@@ -645,11 +643,7 @@ def top_n_unique_values(df: pd.DataFrame, n: int, columns: List[str]) -> None:
                 top_n_values = frequency.nlargest(n)
                 report[column] = {str(value): str(count)
                                   for value, count in top_n_values.items()}
-                print(
-                    f"Top {n} unique values for column '{column}':\n{
-                        json.dumps(
-                            report[column],
-                            indent=2)}\n")
+                print(f"Top {n} unique values for column '{column}':\n{json.dumps(report[column], indent=2)}\n")
             else:
                 print(f"Column '{column}' does not exist in the DataFrame.")
     else:
@@ -692,11 +686,7 @@ def bottom_n_unique_values(
                 report[column] = {
                     str(value): str(count) for value,
                     count in bottom_n_values.items()}
-                print(
-                    f"Bottom {n} unique values for column '{column}':\n{
-                        json.dumps(
-                            report[column],
-                            indent=2)}\n")
+                print(f"Bottom {n} unique values for column '{column}':\n{json.dumps(report[column],indent=2)}\n")
             else:
                 print(f"Column '{column}' does not exist in the DataFrame.")
     else:
@@ -755,8 +745,7 @@ def print_memory_usage(df: pd.DataFrame) -> None:
     - ValueError: If the DataFrame is `None`.
     """
     if df is not None:
-        memory_usage = df.memory_usage(deep=True).sum(
-        ) / (1024 * 1024)  # Convert bytes to MB
+        memory_usage = df.memory_usage(deep=True).sum() / (1024 * 1024)  # Convert bytes to MB
         print(f"Memory usage of DataFrame: {memory_usage:.2f} MB")
     else:
         raise ValueError("No DataFrame to print. Please provide a DataFrame.")
@@ -1236,9 +1225,7 @@ def append_ranged_classification_column(
             for r in range_list
         )
-        labels = [f"{pad_number(range_list[i],
-                                max_integer_length)} to {pad_number(range_list[i + 1],
-                                                                    max_integer_length)}" for i in range(len(range_list) - 1)]
+        labels = [f"{pad_number(range_list[i],max_integer_length)} to {pad_number(range_list[i + 1], max_integer_length)}" for i in range(len(range_list) - 1)]
     # Ensure the target column is numeric
     df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
@@ -1381,8 +1368,7 @@ def rename_columns(df: pd.DataFrame,
         A new DataFrame with columns renamed.
     """
     if df is None:
-        raise ValueError(
-            "No DataFrame to rename columns. Please provide a valid DataFrame.")
+        raise ValueError("No DataFrame to rename columns. Please provide a valid DataFrame.")
     return df.rename(columns=rename_pairs)
@@ -1400,8 +1386,7 @@ def cascade_sort(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
         A new DataFrame sorted by specified columns.
     """
     if df is None:
-        raise ValueError(
-            "No DataFrame to sort. Please provide a valid DataFrame.")
+        raise ValueError("No DataFrame to sort. Please provide a valid DataFrame.")
     col_names = []
     asc_order = []
@@ -1436,8 +1421,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
         A new DataFrame with XGB_TYPE labels appended.
     """
     if df is None:
-        raise ValueError(
-            "No DataFrame to add labels. Please provide a valid DataFrame.")
+        raise ValueError("No DataFrame to add labels. Please provide a valid DataFrame.")
     ratios = list(map(int, ratio_str.split(':')))
     total_ratio = sum(ratios)
@@ -1454,8 +1438,7 @@ def append_xgb_labels(df: pd.DataFrame, ratio_str: str) -> pd.DataFrame:
         labels = ['TRAIN'] * train_rows + ['VALIDATE'] * \
             validate_rows + ['TEST'] * test_rows
     else:
-        raise ValueError(
-            "Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
+        raise ValueError("Invalid ratio string format. Use 'TRAIN:TEST' or 'TRAIN:VALIDATE:TEST'.")
     df_with_labels = df.copy()
     df_with_labels['XGB_TYPE'] = labels
@@ -1485,8 +1468,7 @@ def append_xgb_regression_predictions(
         DataFrame with predictions appended.
     """
     if df is None or 'XGB_TYPE' not in df.columns:
-        raise ValueError(
-            "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
+        raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
     features = feature_cols.replace(' ', '').split(',')
@@ -1560,8 +1542,7 @@ def append_xgb_logistic_regression_predictions(
         DataFrame with predictions appended.
     """
     if df is None or 'XGB_TYPE' not in df.columns:
-        raise ValueError(
-            "DataFrame is not initialized or 'XGB_TYPE' column is missing.")
+        raise ValueError("DataFrame is not initialized or 'XGB_TYPE' column is missing.")
     features = feature_cols.replace(' ', '').split(',')
@@ -1605,8 +1586,7 @@ def append_xgb_logistic_regression_predictions(
     if model_path:
         model.save_model(model_path)
-    columns_order = [col for col in df.columns if col not in [
-        'XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
+    columns_order = [col for col in df.columns if col not in ['XGB_TYPE', target_col, pred_col]] + ['XGB_TYPE', target_col, pred_col]
     df = df[columns_order]
     return df
@@ -1854,8 +1834,7 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
         ValueError: If the DataFrames do not have the same columns.
     """
     if set(df1.columns) != set(df2.columns):
-        raise ValueError(
-            "Both DataFrames must have the same columns for a union join")
+        raise ValueError("Both DataFrames must have the same columns for a union join")
     result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
     return result_df
@@ -1876,8 +1855,7 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
         ValueError: If the DataFrames do not have the same columns.
     """
     if set(df1.columns) != set(df2.columns):
-        raise ValueError(
-            "Both DataFrames must have the same columns for a bag union join")
+        raise ValueError("Both DataFrames must have the same columns for a bag union join")
     result_df = pd.concat([df1, df2], ignore_index=True)
     return result_df
@@ -2026,12 +2004,7 @@ def sync_dataframe_to_sqlite_database(
         cursor.execute(f"PRAGMA table_info({new_table_name})")
         if cursor.fetchall() == []:  # Table does not exist
             # Create a table using the DataFrame's column names and types
-            columns_with_types = ', '.join(
-                f'"{col}" {
-                    map_dtype(dtype)}' for col,
-                dtype in zip(
-                    df.columns,
-                    df.dtypes))
+            columns_with_types = ', '.join(f'"{col}" {map_dtype(dtype)}' for col,dtype in zip(df.columns,df.dtypes))
             create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
             conn.execute(create_table_query)

{rgwfuncs-0.0.58 → rgwfuncs-0.0.60/src/rgwfuncs.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: rgwfuncs
-Version: 0.0.58
+Version: 0.0.60
 Summary: A functional programming paradigm for mathematical modelling and data science
 Home-page: https://github.com/ryangerardwilson/rgwfunc
 Author: Ryan Gerard Wilson