rgwfuncs 0.0.9__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.9
3
+ Version: 0.0.11
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rgwfuncs"
7
- version = "0.0.9"
7
+ version = "0.0.11"
8
8
  authors = [
9
9
  { name = "Ryan Gerard Wilson", email = "ryangerardwilson@gmail.com" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = rgwfuncs
3
- version = 0.0.9
3
+ version = 0.0.11
4
4
  author = Ryan Gerard Wilson
5
5
  author_email = ryangerardwilson@gmail.com
6
6
  description = A functional programming paradigm for mathematical modelling and data science
@@ -1631,10 +1631,27 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1631
1631
  if set(df1.columns) != set(df2.columns):
1632
1632
  raise ValueError("Both DataFrames must have the same columns for a union join")
1633
1633
 
1634
- result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1634
+ # Inspect initial columns
1635
+ print("Initial df1 columns:", df1.columns)
1636
+ print("Initial df2 columns:", df2.columns)
1637
+
1638
+ # Drop all-NA columns, if any
1639
+ df1_clean = df1.dropna(axis=1, how='all')
1640
+ df2_clean = df2.dropna(axis=1, how='all')
1641
+
1642
+ # Inspect resulting columns after dropping all-NA columns
1643
+ print("Cleaned df1 columns:", df1_clean.columns)
1644
+ print("Cleaned df2 columns:", df2_clean.columns)
1645
+
1646
+ # Ensure they still have the same columns after dropping all-NA columns
1647
+ if set(df1_clean.columns) != set(df2_clean.columns):
1648
+ raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1649
+
1650
+ result_df = pd.concat([df1_clean, df2_clean], ignore_index=True).drop_duplicates()
1635
1651
  return result_df
1636
1652
 
1637
1653
 
1654
+
1638
1655
  def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1639
1656
  """
1640
1657
  Perform a bag union join, concatenating the two DataFrames without dropping duplicates.
@@ -1652,7 +1669,15 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1652
1669
  if set(df1.columns) != set(df2.columns):
1653
1670
  raise ValueError("Both DataFrames must have the same columns for a bag union join")
1654
1671
 
1655
- result_df = pd.concat([df1, df2], ignore_index=True)
1672
+ # Drop all-NA columns, if any
1673
+ df1_clean = df1.dropna(axis=1, how='all')
1674
+ df2_clean = df2.dropna(axis=1, how='all')
1675
+
1676
+ # Ensure they still have the same columns after dropping all-NA columns
1677
+ if set(df1_clean.columns) != set(df2_clean.columns):
1678
+ raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1679
+
1680
+ result_df = pd.concat([df1_clean, df2_clean], ignore_index=True)
1656
1681
  return result_df
1657
1682
 
1658
1683
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.9
3
+ Version: 0.0.11
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
File without changes
File without changes