rgwfuncs 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/df_lib.py CHANGED
@@ -1628,29 +1628,29 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1628
1628
  Raises:
1629
1629
  ValueError: If the DataFrames do not have the same columns.
1630
1630
  """
1631
- if set(df1.columns) != set(df2.columns):
1632
- raise ValueError("Both DataFrames must have the same columns for a union join")
1633
-
1634
1631
  # Inspect initial columns
1635
1632
  print("Initial df1 columns:", df1.columns)
1636
1633
  print("Initial df2 columns:", df2.columns)
1637
1634
 
1638
- # Drop all-NA columns, if any
1639
- df1_clean = df1.dropna(axis=1, how='all')
1640
- df2_clean = df2.dropna(axis=1, how='all')
1635
+ # Standardize columns by adding missing columns filled with NaN
1636
+ for col in df2.columns:
1637
+ if col not in df1:
1638
+ df1[col] = pd.NA
1641
1639
 
1642
- # Inspect resulting columns after dropping all-NA columns
1643
- print("Cleaned df1 columns:", df1_clean.columns)
1644
- print("Cleaned df2 columns:", df2_clean.columns)
1640
+ for col in df1.columns:
1641
+ if col not in df2:
1642
+ df2[col] = pd.NA
1645
1643
 
1646
- # Ensure they still have the same columns after dropping all-NA columns
1647
- if set(df1_clean.columns) != set(df2_clean.columns):
1648
- raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1649
-
1650
- result_df = pd.concat([df1_clean, df2_clean], ignore_index=True).drop_duplicates()
1651
- return result_df
1644
+ print("Standardized df1 columns:", df1.columns)
1645
+ print("Standardized df2 columns:", df2.columns)
1652
1646
 
1647
+ # Check if columns match now
1648
+ if set(df1.columns) != set(df2.columns):
1649
+ raise ValueError("Both DataFrames must have the same columns after standardizing columns")
1653
1650
 
1651
+ # Concatenate and drop duplicates
1652
+ result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1653
+ return result_df
1654
1654
 
1655
1655
  def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1656
1656
  """
@@ -1666,18 +1666,28 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1666
1666
  Raises:
1667
1667
  ValueError: If the DataFrames do not have the same columns.
1668
1668
  """
1669
- if set(df1.columns) != set(df2.columns):
1670
- raise ValueError("Both DataFrames must have the same columns for a bag union join")
1669
+ # Inspect initial columns
1670
+ print("Initial df1 columns:", df1.columns)
1671
+ print("Initial df2 columns:", df2.columns)
1671
1672
 
1672
- # Drop all-NA columns, if any
1673
- df1_clean = df1.dropna(axis=1, how='all')
1674
- df2_clean = df2.dropna(axis=1, how='all')
1673
+ # Standardize columns by adding missing columns filled with NaN
1674
+ for col in df2.columns:
1675
+ if col not in df1:
1676
+ df1[col] = pd.NA
1675
1677
 
1676
- # Ensure they still have the same columns after dropping all-NA columns
1677
- if set(df1_clean.columns) != set(df2_clean.columns):
1678
- raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1678
+ for col in df1.columns:
1679
+ if col not in df2:
1680
+ df2[col] = pd.NA
1681
+
1682
+ print("Standardized df1 columns:", df1.columns)
1683
+ print("Standardized df2 columns:", df2.columns)
1684
+
1685
+ # Ensure they have the same columns after standardizing
1686
+ if set(df1.columns) != set(df2.columns):
1687
+ raise ValueError("Both DataFrames must have the same columns after standardizing columns")
1679
1688
 
1680
- result_df = pd.concat([df1_clean, df2_clean], ignore_index=True)
1689
+ # Concatenate without dropping duplicates
1690
+ result_df = pd.concat([df1, df2], ignore_index=True)
1681
1691
  return result_df
1682
1692
 
1683
1693
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -0,0 +1,8 @@
1
+ rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
2
+ rgwfuncs/df_lib.py,sha256=mIOEvnqpkkHW2Az23_09Xt61nltTtG6uNW0b81aW5Mg,64060
3
+ rgwfuncs-0.0.12.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
4
+ rgwfuncs-0.0.12.dist-info/METADATA,sha256=rgeOUbLIXmgb-JuqmAcdF2G2vBS7iWCdsop74rfuTfc,32059
5
+ rgwfuncs-0.0.12.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ rgwfuncs-0.0.12.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
7
+ rgwfuncs-0.0.12.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
8
+ rgwfuncs-0.0.12.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
2
- rgwfuncs/df_lib.py,sha256=N5XCe9LYMIVjh0pg2Xc1i3Md--mdNJyJ5t7NpO9x4-s,63973
3
- rgwfuncs-0.0.11.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
4
- rgwfuncs-0.0.11.dist-info/METADATA,sha256=mqOIwsthIDAdpgtcFt35vW_xc99f2xtu5O_k4_Y-wQY,32059
5
- rgwfuncs-0.0.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- rgwfuncs-0.0.11.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
7
- rgwfuncs-0.0.11.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
8
- rgwfuncs-0.0.11.dist-info/RECORD,,