PyPI - rgwfuncs - Versions diffs - 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl - Mend

rgwfuncs 0.0.11py3-none-any.whl → 0.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rgwfuncs/df_lib.py CHANGED Viewed

@@ -1628,29 +1628,29 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
     Raises:
         ValueError: If the DataFrames do not have the same columns.
     """
-    if set(df1.columns) != set(df2.columns):
-        raise ValueError("Both DataFrames must have the same columns for a union join")
     # Inspect initial columns
     print("Initial df1 columns:", df1.columns)
     print("Initial df2 columns:", df2.columns)
-    # Drop all-NA columns, if any
-    df1_clean = df1.dropna(axis=1, how='all')
-    df2_clean = df2.dropna(axis=1, how='all')
+    # Standardize columns by adding missing columns filled with NaN
+    for col in df2.columns:
+        if col not in df1:
+            df1[col] = pd.NA
-    # Inspect resulting columns after dropping all-NA columns
-    print("Cleaned df1 columns:", df1_clean.columns)
-    print("Cleaned df2 columns:", df2_clean.columns)
+    for col in df1.columns:
+        if col not in df2:
+            df2[col] = pd.NA
-    # Ensure they still have the same columns after dropping all-NA columns
-    if set(df1_clean.columns) != set(df2_clean.columns):
-        raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
-    result_df = pd.concat([df1_clean, df2_clean], ignore_index=True).drop_duplicates()
-    return result_df
+    print("Standardized df1 columns:", df1.columns)
+    print("Standardized df2 columns:", df2.columns)
+    # Check if columns match now
+    if set(df1.columns) != set(df2.columns):
+        raise ValueError("Both DataFrames must have the same columns after standardizing columns")
+    # Concatenate and drop duplicates
+    result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
+    return result_df
 def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
     """
@@ -1666,18 +1666,28 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
     Raises:
         ValueError: If the DataFrames do not have the same columns.
     """
-    if set(df1.columns) != set(df2.columns):
-        raise ValueError("Both DataFrames must have the same columns for a bag union join")
+    # Inspect initial columns
+    print("Initial df1 columns:", df1.columns)
+    print("Initial df2 columns:", df2.columns)
-    # Drop all-NA columns, if any
-    df1_clean = df1.dropna(axis=1, how='all')
-    df2_clean = df2.dropna(axis=1, how='all')
+    # Standardize columns by adding missing columns filled with NaN
+    for col in df2.columns:
+        if col not in df1:
+            df1[col] = pd.NA
-    # Ensure they still have the same columns after dropping all-NA columns
-    if set(df1_clean.columns) != set(df2_clean.columns):
-        raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
+    for col in df1.columns:
+        if col not in df2:
+            df2[col] = pd.NA
+    print("Standardized df1 columns:", df1.columns)
+    print("Standardized df2 columns:", df2.columns)
+    # Ensure they have the same columns after standardizing
+    if set(df1.columns) != set(df2.columns):
+        raise ValueError("Both DataFrames must have the same columns after standardizing columns")
-    result_df = pd.concat([df1_clean, df2_clean], ignore_index=True)
+    # Concatenate without dropping duplicates
+    result_df = pd.concat([df1, df2], ignore_index=True)
     return result_df

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: rgwfuncs
-Version: 0.0.11
+Version: 0.0.12
 Summary: A functional programming paradigm for mathematical modelling and data science
 Home-page: https://github.com/ryangerardwilson/rgwfunc
 Author: Ryan Gerard Wilson

rgwfuncs-0.0.12.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
+rgwfuncs/df_lib.py,sha256=mIOEvnqpkkHW2Az23_09Xt61nltTtG6uNW0b81aW5Mg,64060
+rgwfuncs-0.0.12.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
+rgwfuncs-0.0.12.dist-info/METADATA,sha256=rgeOUbLIXmgb-JuqmAcdF2G2vBS7iWCdsop74rfuTfc,32059
+rgwfuncs-0.0.12.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+rgwfuncs-0.0.12.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
+rgwfuncs-0.0.12.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
+rgwfuncs-0.0.12.dist-info/RECORD,,

rgwfuncs-0.0.11.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
-rgwfuncs/df_lib.py,sha256=N5XCe9LYMIVjh0pg2Xc1i3Md--mdNJyJ5t7NpO9x4-s,63973
-rgwfuncs-0.0.11.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
-rgwfuncs-0.0.11.dist-info/METADATA,sha256=mqOIwsthIDAdpgtcFt35vW_xc99f2xtu5O_k4_Y-wQY,32059
-rgwfuncs-0.0.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-rgwfuncs-0.0.11.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
-rgwfuncs-0.0.11.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
-rgwfuncs-0.0.11.dist-info/RECORD,,

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/LICENSE RENAMED Viewed

File without changes

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/top_level.txt RENAMED Viewed

File without changes

rgwfuncs 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

rgwfuncs 0.0.11py3-none-any.whl → 0.0.12py3-none-any.whl