PyPI - rgwfuncs - Versions diffs - 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl - Mend

rgwfuncs 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rgwfuncs/df_lib.py CHANGED Viewed

@@ -61,6 +61,7 @@ def docs(method_type_filter: Optional[str] = None) -> None:
                     # Print the entire docstring for the matching function
                     print(f"\n{name}:\n{docstring}")
 def numeric_clean(df: pd.DataFrame, column_names: str, column_type: str, irregular_value_treatment: str) -> pd.DataFrame:
     """
     Cleans the numeric columns based on specified treatments.
@@ -1628,28 +1629,29 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
     Raises:
         ValueError: If the DataFrames do not have the same columns.
     """
-    if set(df1.columns) != set(df2.columns):
-        raise ValueError("Both DataFrames must have the same columns for a union join")
     # Inspect initial columns
-    print("Initial df1 columns:", df1.columns)
-    print("Initial df2 columns:", df2.columns)
+    # print("Initial df1 columns:", df1.columns)
+    # print("Initial df2 columns:", df2.columns)
-    # Drop all-NA columns, if any
-    df1_clean = df1.dropna(axis=1, how='all')
-    df2_clean = df2.dropna(axis=1, how='all')
+    # Standardize columns by adding missing columns filled with NaN
+    for col in df2.columns:
+        if col not in df1:
+            df1[col] = pd.NA
-    # Inspect resulting columns after dropping all-NA columns
-    print("Cleaned df1 columns:", df1_clean.columns)
-    print("Cleaned df2 columns:", df2_clean.columns)
+    for col in df1.columns:
+        if col not in df2:
+            df2[col] = pd.NA
-    # Ensure they still have the same columns after dropping all-NA columns
-    if set(df1_clean.columns) != set(df2_clean.columns):
-        raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
+    # print("Standardized df1 columns:", df1.columns)
+    # print("Standardized df2 columns:", df2.columns)
-    result_df = pd.concat([df1_clean, df2_clean], ignore_index=True).drop_duplicates()
-    return result_df
+    # Check if columns match now
+    if set(df1.columns) != set(df2.columns):
+        raise ValueError("Both DataFrames must have the same columns after standardizing columns")
+    # Concatenate and drop duplicates
+    result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
+    return result_df
 def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
@@ -1666,18 +1668,28 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
     Raises:
         ValueError: If the DataFrames do not have the same columns.
     """
-    if set(df1.columns) != set(df2.columns):
-        raise ValueError("Both DataFrames must have the same columns for a bag union join")
+    # Inspect initial columns
+    # print("Initial df1 columns:", df1.columns)
+    # print("Initial df2 columns:", df2.columns)
+    # Standardize columns by adding missing columns filled with NaN
+    for col in df2.columns:
+        if col not in df1:
+            df1[col] = pd.NA
-    # Drop all-NA columns, if any
-    df1_clean = df1.dropna(axis=1, how='all')
-    df2_clean = df2.dropna(axis=1, how='all')
+    for col in df1.columns:
+        if col not in df2:
+            df2[col] = pd.NA
-    # Ensure they still have the same columns after dropping all-NA columns
-    if set(df1_clean.columns) != set(df2_clean.columns):
-        raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
+    # print("Standardized df1 columns:", df1.columns)
+    # print("Standardized df2 columns:", df2.columns)
+    # Ensure they have the same columns after standardizing
+    if set(df1.columns) != set(df2.columns):
+        raise ValueError("Both DataFrames must have the same columns after standardizing columns")
-    result_df = pd.concat([df1_clean, df2_clean], ignore_index=True)
+    # Concatenate without dropping duplicates
+    result_df = pd.concat([df1, df2], ignore_index=True)
     return result_df
@@ -1712,6 +1724,7 @@ def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str
     """
     return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
 def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
     """
     Processes and saves a DataFrame to an SQLite database, adding a timestamp column
@@ -1766,5 +1779,3 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
             conn.execute(f"DROP TABLE IF EXISTS {tablename}")
             # Rename the new table to the old table name
             conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: rgwfuncs
-Version: 0.0.11
+Version: 0.0.13
 Summary: A functional programming paradigm for mathematical modelling and data science
 Home-page: https://github.com/ryangerardwilson/rgwfunc
 Author: Ryan Gerard Wilson

rgwfuncs-0.0.13.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
+rgwfuncs/df_lib.py,sha256=2JU1Z0wjBrpcmLuJgY6-Bi1OFXHyNk1XeRGESkDI934,64077
+rgwfuncs-0.0.13.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
+rgwfuncs-0.0.13.dist-info/METADATA,sha256=TZoSn8ANcHG2IOAW1SwNbu3ZnPKxdWbKzoCAXLTB06w,32059
+rgwfuncs-0.0.13.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+rgwfuncs-0.0.13.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
+rgwfuncs-0.0.13.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
+rgwfuncs-0.0.13.dist-info/RECORD,,

rgwfuncs-0.0.11.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
-rgwfuncs/df_lib.py,sha256=N5XCe9LYMIVjh0pg2Xc1i3Md--mdNJyJ5t7NpO9x4-s,63973
-rgwfuncs-0.0.11.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
-rgwfuncs-0.0.11.dist-info/METADATA,sha256=mqOIwsthIDAdpgtcFt35vW_xc99f2xtu5O_k4_Y-wQY,32059
-rgwfuncs-0.0.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-rgwfuncs-0.0.11.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
-rgwfuncs-0.0.11.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
-rgwfuncs-0.0.11.dist-info/RECORD,,

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.13.dist-info}/LICENSE RENAMED Viewed

File without changes

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.13.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.13.dist-info}/top_level.txt RENAMED Viewed

File without changes

rgwfuncs 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

rgwfuncs 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl