rgwfuncs 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +15 -17
- {rgwfuncs-0.0.13.dist-info → rgwfuncs-0.0.14.dist-info}/METADATA +1 -1
- rgwfuncs-0.0.14.dist-info/RECORD +8 -0
- rgwfuncs-0.0.13.dist-info/RECORD +0 -8
- {rgwfuncs-0.0.13.dist-info → rgwfuncs-0.0.14.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.13.dist-info → rgwfuncs-0.0.14.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.13.dist-info → rgwfuncs-0.0.14.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.13.dist-info → rgwfuncs-0.0.14.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -1614,7 +1614,6 @@ def mask_against_dataframe_converse(df: pd.DataFrame, other_df: pd.DataFrame, co
|
|
1614
1614
|
|
1615
1615
|
return df[~df[column_name].isin(other_df[column_name])]
|
1616
1616
|
|
1617
|
-
|
1618
1617
|
def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
1619
1618
|
"""
|
1620
1619
|
Perform a union join, concatenating the two DataFrames and dropping duplicates.
|
@@ -1630,22 +1629,22 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1630
1629
|
ValueError: If the DataFrames do not have the same columns.
|
1631
1630
|
"""
|
1632
1631
|
# Inspect initial columns
|
1633
|
-
|
1634
|
-
|
1632
|
+
print("Initial df1 columns:", df1.columns)
|
1633
|
+
print("Initial df2 columns:", df2.columns)
|
1635
1634
|
|
1636
|
-
# Standardize columns by adding missing columns filled with
|
1635
|
+
# Standardize columns by adding missing columns filled with empty strings
|
1637
1636
|
for col in df2.columns:
|
1638
1637
|
if col not in df1:
|
1639
|
-
df1[col] =
|
1638
|
+
df1[col] = ""
|
1640
1639
|
|
1641
1640
|
for col in df1.columns:
|
1642
1641
|
if col not in df2:
|
1643
|
-
df2[col] =
|
1642
|
+
df2[col] = ""
|
1644
1643
|
|
1645
|
-
|
1646
|
-
|
1644
|
+
print("Standardized df1 columns:", df1.columns)
|
1645
|
+
print("Standardized df2 columns:", df2.columns)
|
1647
1646
|
|
1648
|
-
#
|
1647
|
+
# Ensure they have the same columns after standardizing
|
1649
1648
|
if set(df1.columns) != set(df2.columns):
|
1650
1649
|
raise ValueError("Both DataFrames must have the same columns after standardizing columns")
|
1651
1650
|
|
@@ -1653,7 +1652,6 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1653
1652
|
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1654
1653
|
return result_df
|
1655
1654
|
|
1656
|
-
|
1657
1655
|
def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
1658
1656
|
"""
|
1659
1657
|
Perform a bag union join, concatenating the two DataFrames without dropping duplicates.
|
@@ -1669,20 +1667,20 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1669
1667
|
ValueError: If the DataFrames do not have the same columns.
|
1670
1668
|
"""
|
1671
1669
|
# Inspect initial columns
|
1672
|
-
|
1673
|
-
|
1670
|
+
print("Initial df1 columns:", df1.columns)
|
1671
|
+
print("Initial df2 columns:", df2.columns)
|
1674
1672
|
|
1675
|
-
# Standardize columns by adding missing columns filled with
|
1673
|
+
# Standardize columns by adding missing columns filled with empty strings
|
1676
1674
|
for col in df2.columns:
|
1677
1675
|
if col not in df1:
|
1678
|
-
df1[col] =
|
1676
|
+
df1[col] = ""
|
1679
1677
|
|
1680
1678
|
for col in df1.columns:
|
1681
1679
|
if col not in df2:
|
1682
|
-
df2[col] =
|
1680
|
+
df2[col] = ""
|
1683
1681
|
|
1684
|
-
|
1685
|
-
|
1682
|
+
print("Standardized df1 columns:", df1.columns)
|
1683
|
+
print("Standardized df2 columns:", df2.columns)
|
1686
1684
|
|
1687
1685
|
# Ensure they have the same columns after standardizing
|
1688
1686
|
if set(df1.columns) != set(df2.columns):
|
@@ -0,0 +1,8 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
|
2
|
+
rgwfuncs/df_lib.py,sha256=GlFATJvcvLxQK-twfmcBIMJk_1-cq4HCuiODqyoZN78,64094
|
3
|
+
rgwfuncs-0.0.14.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
4
|
+
rgwfuncs-0.0.14.dist-info/METADATA,sha256=QSQo_msYCNSoln73NBfxfgvudh4LmTwVwzvM2JHc114,32059
|
5
|
+
rgwfuncs-0.0.14.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
+
rgwfuncs-0.0.14.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
7
|
+
rgwfuncs-0.0.14.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
8
|
+
rgwfuncs-0.0.14.dist-info/RECORD,,
|
rgwfuncs-0.0.13.dist-info/RECORD
DELETED
@@ -1,8 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
|
2
|
-
rgwfuncs/df_lib.py,sha256=2JU1Z0wjBrpcmLuJgY6-Bi1OFXHyNk1XeRGESkDI934,64077
|
3
|
-
rgwfuncs-0.0.13.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
4
|
-
rgwfuncs-0.0.13.dist-info/METADATA,sha256=TZoSn8ANcHG2IOAW1SwNbu3ZnPKxdWbKzoCAXLTB06w,32059
|
5
|
-
rgwfuncs-0.0.13.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
-
rgwfuncs-0.0.13.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
7
|
-
rgwfuncs-0.0.13.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
8
|
-
rgwfuncs-0.0.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|