rgwfuncs 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +34 -24
- {rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/METADATA +1 -1
- rgwfuncs-0.0.12.dist-info/RECORD +8 -0
- rgwfuncs-0.0.11.dist-info/RECORD +0 -8
- {rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.11.dist-info → rgwfuncs-0.0.12.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -1628,29 +1628,29 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1628
1628
|
Raises:
|
1629
1629
|
ValueError: If the DataFrames do not have the same columns.
|
1630
1630
|
"""
|
1631
|
-
if set(df1.columns) != set(df2.columns):
|
1632
|
-
raise ValueError("Both DataFrames must have the same columns for a union join")
|
1633
|
-
|
1634
1631
|
# Inspect initial columns
|
1635
1632
|
print("Initial df1 columns:", df1.columns)
|
1636
1633
|
print("Initial df2 columns:", df2.columns)
|
1637
1634
|
|
1638
|
-
#
|
1639
|
-
|
1640
|
-
|
1635
|
+
# Standardize columns by adding missing columns filled with NaN
|
1636
|
+
for col in df2.columns:
|
1637
|
+
if col not in df1:
|
1638
|
+
df1[col] = pd.NA
|
1641
1639
|
|
1642
|
-
|
1643
|
-
|
1644
|
-
|
1640
|
+
for col in df1.columns:
|
1641
|
+
if col not in df2:
|
1642
|
+
df2[col] = pd.NA
|
1645
1643
|
|
1646
|
-
|
1647
|
-
|
1648
|
-
raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
|
1649
|
-
|
1650
|
-
result_df = pd.concat([df1_clean, df2_clean], ignore_index=True).drop_duplicates()
|
1651
|
-
return result_df
|
1644
|
+
print("Standardized df1 columns:", df1.columns)
|
1645
|
+
print("Standardized df2 columns:", df2.columns)
|
1652
1646
|
|
1647
|
+
# Check if columns match now
|
1648
|
+
if set(df1.columns) != set(df2.columns):
|
1649
|
+
raise ValueError("Both DataFrames must have the same columns after standardizing columns")
|
1653
1650
|
|
1651
|
+
# Concatenate and drop duplicates
|
1652
|
+
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1653
|
+
return result_df
|
1654
1654
|
|
1655
1655
|
def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
1656
1656
|
"""
|
@@ -1666,18 +1666,28 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1666
1666
|
Raises:
|
1667
1667
|
ValueError: If the DataFrames do not have the same columns.
|
1668
1668
|
"""
|
1669
|
-
|
1670
|
-
|
1669
|
+
# Inspect initial columns
|
1670
|
+
print("Initial df1 columns:", df1.columns)
|
1671
|
+
print("Initial df2 columns:", df2.columns)
|
1671
1672
|
|
1672
|
-
#
|
1673
|
-
|
1674
|
-
|
1673
|
+
# Standardize columns by adding missing columns filled with NaN
|
1674
|
+
for col in df2.columns:
|
1675
|
+
if col not in df1:
|
1676
|
+
df1[col] = pd.NA
|
1675
1677
|
|
1676
|
-
|
1677
|
-
|
1678
|
-
|
1678
|
+
for col in df1.columns:
|
1679
|
+
if col not in df2:
|
1680
|
+
df2[col] = pd.NA
|
1681
|
+
|
1682
|
+
print("Standardized df1 columns:", df1.columns)
|
1683
|
+
print("Standardized df2 columns:", df2.columns)
|
1684
|
+
|
1685
|
+
# Ensure they have the same columns after standardizing
|
1686
|
+
if set(df1.columns) != set(df2.columns):
|
1687
|
+
raise ValueError("Both DataFrames must have the same columns after standardizing columns")
|
1679
1688
|
|
1680
|
-
|
1689
|
+
# Concatenate without dropping duplicates
|
1690
|
+
result_df = pd.concat([df1, df2], ignore_index=True)
|
1681
1691
|
return result_df
|
1682
1692
|
|
1683
1693
|
|
@@ -0,0 +1,8 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
|
2
|
+
rgwfuncs/df_lib.py,sha256=mIOEvnqpkkHW2Az23_09Xt61nltTtG6uNW0b81aW5Mg,64060
|
3
|
+
rgwfuncs-0.0.12.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
4
|
+
rgwfuncs-0.0.12.dist-info/METADATA,sha256=rgeOUbLIXmgb-JuqmAcdF2G2vBS7iWCdsop74rfuTfc,32059
|
5
|
+
rgwfuncs-0.0.12.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
+
rgwfuncs-0.0.12.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
7
|
+
rgwfuncs-0.0.12.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
8
|
+
rgwfuncs-0.0.12.dist-info/RECORD,,
|
rgwfuncs-0.0.11.dist-info/RECORD
DELETED
@@ -1,8 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
|
2
|
-
rgwfuncs/df_lib.py,sha256=N5XCe9LYMIVjh0pg2Xc1i3Md--mdNJyJ5t7NpO9x4-s,63973
|
3
|
-
rgwfuncs-0.0.11.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
4
|
-
rgwfuncs-0.0.11.dist-info/METADATA,sha256=mqOIwsthIDAdpgtcFt35vW_xc99f2xtu5O_k4_Y-wQY,32059
|
5
|
-
rgwfuncs-0.0.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
-
rgwfuncs-0.0.11.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
7
|
-
rgwfuncs-0.0.11.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
8
|
-
rgwfuncs-0.0.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|