PyPI - rgwfuncs - Versions diffs - 0.0.10__tar.gz → 0.0.12__tar.gz - Mend

rgwfuncs 0.0.10tar.gz → 0.0.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{rgwfuncs-0.0.10/src/rgwfuncs.egg-info → rgwfuncs-0.0.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: rgwfuncs
-Version: 0.0.10
+Version: 0.0.12
 Summary: A functional programming paradigm for mathematical modelling and data science
 Home-page: https://github.com/ryangerardwilson/rgwfunc
 Author: Ryan Gerard Wilson

{rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "rgwfuncs"
-version = "0.0.10"
+version = "0.0.12"
 authors = [
   { name = "Ryan Gerard Wilson", email = "ryangerardwilson@gmail.com" },
 ]

{rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = rgwfuncs
-version = 0.0.10
+version = 0.0.12
 author = Ryan Gerard Wilson
 author_email = ryangerardwilson@gmail.com
 description = A functional programming paradigm for mathematical modelling and data science

{rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/src/rgwfuncs/df_lib.py RENAMED Viewed

@@ -1628,20 +1628,29 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
     Raises:
         ValueError: If the DataFrames do not have the same columns.
     """
-    if set(df1.columns) != set(df2.columns):
-        raise ValueError("Both DataFrames must have the same columns for a union join")
+    # Inspect initial columns
+    print("Initial df1 columns:", df1.columns)
+    print("Initial df2 columns:", df2.columns)
-    # Drop all-NA columns, if any
-    df1_clean = df1.dropna(axis=1, how='all')
-    df2_clean = df2.dropna(axis=1, how='all')
+    # Standardize columns by adding missing columns filled with NaN
+    for col in df2.columns:
+        if col not in df1:
+            df1[col] = pd.NA
-    # Ensure they still have the same columns after dropping all-NA columns
-    if set(df1_clean.columns) != set(df2_clean.columns):
-        raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
-    result_df = pd.concat([df1_clean, df2_clean], ignore_index=True).drop_duplicates()
-    return result_df
+    for col in df1.columns:
+        if col not in df2:
+            df2[col] = pd.NA
+    print("Standardized df1 columns:", df1.columns)
+    print("Standardized df2 columns:", df2.columns)
+    # Check if columns match now
+    if set(df1.columns) != set(df2.columns):
+        raise ValueError("Both DataFrames must have the same columns after standardizing columns")
+    # Concatenate and drop duplicates
+    result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
+    return result_df
 def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
     """
@@ -1657,18 +1666,28 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
     Raises:
         ValueError: If the DataFrames do not have the same columns.
     """
-    if set(df1.columns) != set(df2.columns):
-        raise ValueError("Both DataFrames must have the same columns for a bag union join")
+    # Inspect initial columns
+    print("Initial df1 columns:", df1.columns)
+    print("Initial df2 columns:", df2.columns)
+    # Standardize columns by adding missing columns filled with NaN
+    for col in df2.columns:
+        if col not in df1:
+            df1[col] = pd.NA
-    # Drop all-NA columns, if any
-    df1_clean = df1.dropna(axis=1, how='all')
-    df2_clean = df2.dropna(axis=1, how='all')
+    for col in df1.columns:
+        if col not in df2:
+            df2[col] = pd.NA
-    # Ensure they still have the same columns after dropping all-NA columns
-    if set(df1_clean.columns) != set(df2_clean.columns):
-        raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
+    print("Standardized df1 columns:", df1.columns)
+    print("Standardized df2 columns:", df2.columns)
+    # Ensure they have the same columns after standardizing
+    if set(df1.columns) != set(df2.columns):
+        raise ValueError("Both DataFrames must have the same columns after standardizing columns")
-    result_df = pd.concat([df1_clean, df2_clean], ignore_index=True)
+    # Concatenate without dropping duplicates
+    result_df = pd.concat([df1, df2], ignore_index=True)
     return result_df

{rgwfuncs-0.0.10 → rgwfuncs-0.0.12/src/rgwfuncs.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: rgwfuncs
-Version: 0.0.10
+Version: 0.0.12
 Summary: A functional programming paradigm for mathematical modelling and data science
 Home-page: https://github.com/ryangerardwilson/rgwfunc
 Author: Ryan Gerard Wilson