rgwfuncs 0.0.10__tar.gz → 0.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rgwfuncs-0.0.10/src/rgwfuncs.egg-info → rgwfuncs-0.0.12}/PKG-INFO +1 -1
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/pyproject.toml +1 -1
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/setup.cfg +1 -1
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/src/rgwfuncs/df_lib.py +39 -20
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12/src/rgwfuncs.egg-info}/PKG-INFO +1 -1
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/LICENSE +0 -0
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/README.md +0 -0
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/src/rgwfuncs/__init__.py +0 -0
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/src/rgwfuncs.egg-info/SOURCES.txt +0 -0
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/src/rgwfuncs.egg-info/dependency_links.txt +0 -0
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/src/rgwfuncs.egg-info/entry_points.txt +0 -0
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/src/rgwfuncs.egg-info/requires.txt +0 -0
- {rgwfuncs-0.0.10 → rgwfuncs-0.0.12}/src/rgwfuncs.egg-info/top_level.txt +0 -0
@@ -1628,20 +1628,29 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1628
1628
|
Raises:
|
1629
1629
|
ValueError: If the DataFrames do not have the same columns.
|
1630
1630
|
"""
|
1631
|
-
|
1632
|
-
|
1631
|
+
# Inspect initial columns
|
1632
|
+
print("Initial df1 columns:", df1.columns)
|
1633
|
+
print("Initial df2 columns:", df2.columns)
|
1633
1634
|
|
1634
|
-
#
|
1635
|
-
|
1636
|
-
|
1635
|
+
# Standardize columns by adding missing columns filled with NaN
|
1636
|
+
for col in df2.columns:
|
1637
|
+
if col not in df1:
|
1638
|
+
df1[col] = pd.NA
|
1637
1639
|
|
1638
|
-
|
1639
|
-
|
1640
|
-
|
1641
|
-
|
1642
|
-
|
1643
|
-
|
1640
|
+
for col in df1.columns:
|
1641
|
+
if col not in df2:
|
1642
|
+
df2[col] = pd.NA
|
1643
|
+
|
1644
|
+
print("Standardized df1 columns:", df1.columns)
|
1645
|
+
print("Standardized df2 columns:", df2.columns)
|
1646
|
+
|
1647
|
+
# Check if columns match now
|
1648
|
+
if set(df1.columns) != set(df2.columns):
|
1649
|
+
raise ValueError("Both DataFrames must have the same columns after standardizing columns")
|
1644
1650
|
|
1651
|
+
# Concatenate and drop duplicates
|
1652
|
+
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1653
|
+
return result_df
|
1645
1654
|
|
1646
1655
|
def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
1647
1656
|
"""
|
@@ -1657,18 +1666,28 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1657
1666
|
Raises:
|
1658
1667
|
ValueError: If the DataFrames do not have the same columns.
|
1659
1668
|
"""
|
1660
|
-
|
1661
|
-
|
1669
|
+
# Inspect initial columns
|
1670
|
+
print("Initial df1 columns:", df1.columns)
|
1671
|
+
print("Initial df2 columns:", df2.columns)
|
1672
|
+
|
1673
|
+
# Standardize columns by adding missing columns filled with NaN
|
1674
|
+
for col in df2.columns:
|
1675
|
+
if col not in df1:
|
1676
|
+
df1[col] = pd.NA
|
1662
1677
|
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1678
|
+
for col in df1.columns:
|
1679
|
+
if col not in df2:
|
1680
|
+
df2[col] = pd.NA
|
1666
1681
|
|
1667
|
-
|
1668
|
-
|
1669
|
-
|
1682
|
+
print("Standardized df1 columns:", df1.columns)
|
1683
|
+
print("Standardized df2 columns:", df2.columns)
|
1684
|
+
|
1685
|
+
# Ensure they have the same columns after standardizing
|
1686
|
+
if set(df1.columns) != set(df2.columns):
|
1687
|
+
raise ValueError("Both DataFrames must have the same columns after standardizing columns")
|
1670
1688
|
|
1671
|
-
|
1689
|
+
# Concatenate without dropping duplicates
|
1690
|
+
result_df = pd.concat([df1, df2], ignore_index=True)
|
1672
1691
|
return result_df
|
1673
1692
|
|
1674
1693
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|