rgwfuncs 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rgwfuncs"
7
- version = "0.0.10"
7
+ version = "0.0.12"
8
8
  authors = [
9
9
  { name = "Ryan Gerard Wilson", email = "ryangerardwilson@gmail.com" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = rgwfuncs
3
- version = 0.0.10
3
+ version = 0.0.12
4
4
  author = Ryan Gerard Wilson
5
5
  author_email = ryangerardwilson@gmail.com
6
6
  description = A functional programming paradigm for mathematical modelling and data science
@@ -1628,20 +1628,29 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1628
1628
  Raises:
1629
1629
  ValueError: If the DataFrames do not have the same columns.
1630
1630
  """
1631
- if set(df1.columns) != set(df2.columns):
1632
- raise ValueError("Both DataFrames must have the same columns for a union join")
1631
+ # Inspect initial columns
1632
+ print("Initial df1 columns:", df1.columns)
1633
+ print("Initial df2 columns:", df2.columns)
1633
1634
 
1634
- # Drop all-NA columns, if any
1635
- df1_clean = df1.dropna(axis=1, how='all')
1636
- df2_clean = df2.dropna(axis=1, how='all')
1635
+ # Standardize columns by adding missing columns filled with NaN
1636
+ for col in df2.columns:
1637
+ if col not in df1:
1638
+ df1[col] = pd.NA
1637
1639
 
1638
- # Ensure they still have the same columns after dropping all-NA columns
1639
- if set(df1_clean.columns) != set(df2_clean.columns):
1640
- raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1641
-
1642
- result_df = pd.concat([df1_clean, df2_clean], ignore_index=True).drop_duplicates()
1643
- return result_df
1640
+ for col in df1.columns:
1641
+ if col not in df2:
1642
+ df2[col] = pd.NA
1643
+
1644
+ print("Standardized df1 columns:", df1.columns)
1645
+ print("Standardized df2 columns:", df2.columns)
1646
+
1647
+ # Check if columns match now
1648
+ if set(df1.columns) != set(df2.columns):
1649
+ raise ValueError("Both DataFrames must have the same columns after standardizing columns")
1644
1650
 
1651
+ # Concatenate and drop duplicates
1652
+ result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1653
+ return result_df
1645
1654
 
1646
1655
  def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1647
1656
  """
@@ -1657,18 +1666,28 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1657
1666
  Raises:
1658
1667
  ValueError: If the DataFrames do not have the same columns.
1659
1668
  """
1660
- if set(df1.columns) != set(df2.columns):
1661
- raise ValueError("Both DataFrames must have the same columns for a bag union join")
1669
+ # Inspect initial columns
1670
+ print("Initial df1 columns:", df1.columns)
1671
+ print("Initial df2 columns:", df2.columns)
1672
+
1673
+ # Standardize columns by adding missing columns filled with NaN
1674
+ for col in df2.columns:
1675
+ if col not in df1:
1676
+ df1[col] = pd.NA
1662
1677
 
1663
- # Drop all-NA columns, if any
1664
- df1_clean = df1.dropna(axis=1, how='all')
1665
- df2_clean = df2.dropna(axis=1, how='all')
1678
+ for col in df1.columns:
1679
+ if col not in df2:
1680
+ df2[col] = pd.NA
1666
1681
 
1667
- # Ensure they still have the same columns after dropping all-NA columns
1668
- if set(df1_clean.columns) != set(df2_clean.columns):
1669
- raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1682
+ print("Standardized df1 columns:", df1.columns)
1683
+ print("Standardized df2 columns:", df2.columns)
1684
+
1685
+ # Ensure they have the same columns after standardizing
1686
+ if set(df1.columns) != set(df2.columns):
1687
+ raise ValueError("Both DataFrames must have the same columns after standardizing columns")
1670
1688
 
1671
- result_df = pd.concat([df1_clean, df2_clean], ignore_index=True)
1689
+ # Concatenate without dropping duplicates
1690
+ result_df = pd.concat([df1, df2], ignore_index=True)
1672
1691
  return result_df
1673
1692
 
1674
1693
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
File without changes
File without changes