rgwfuncs 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rgwfuncs/df_lib.py +7 -38
- {rgwfuncs-0.0.14.dist-info → rgwfuncs-0.0.16.dist-info}/METADATA +1 -1
- rgwfuncs-0.0.16.dist-info/RECORD +8 -0
- rgwfuncs-0.0.14.dist-info/RECORD +0 -8
- {rgwfuncs-0.0.14.dist-info → rgwfuncs-0.0.16.dist-info}/LICENSE +0 -0
- {rgwfuncs-0.0.14.dist-info → rgwfuncs-0.0.16.dist-info}/WHEEL +0 -0
- {rgwfuncs-0.0.14.dist-info → rgwfuncs-0.0.16.dist-info}/entry_points.txt +0 -0
- {rgwfuncs-0.0.14.dist-info → rgwfuncs-0.0.16.dist-info}/top_level.txt +0 -0
rgwfuncs/df_lib.py
CHANGED
@@ -23,7 +23,10 @@ from googleapiclient.discovery import build
|
|
23
23
|
import base64
|
24
24
|
import inspect
|
25
25
|
from typing import Optional, Callable, Dict, List, Tuple, Any
|
26
|
+
import warnings
|
26
27
|
|
28
|
+
# Suppress all FutureWarnings
|
29
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
27
30
|
|
28
31
|
def docs(method_type_filter: Optional[str] = None) -> None:
|
29
32
|
"""
|
@@ -1614,6 +1617,7 @@ def mask_against_dataframe_converse(df: pd.DataFrame, other_df: pd.DataFrame, co
|
|
1614
1617
|
|
1615
1618
|
return df[~df[column_name].isin(other_df[column_name])]
|
1616
1619
|
|
1620
|
+
|
1617
1621
|
def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
1618
1622
|
"""
|
1619
1623
|
Perform a union join, concatenating the two DataFrames and dropping duplicates.
|
@@ -1628,30 +1632,13 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1628
1632
|
Raises:
|
1629
1633
|
ValueError: If the DataFrames do not have the same columns.
|
1630
1634
|
"""
|
1631
|
-
# Inspect initial columns
|
1632
|
-
print("Initial df1 columns:", df1.columns)
|
1633
|
-
print("Initial df2 columns:", df2.columns)
|
1634
|
-
|
1635
|
-
# Standardize columns by adding missing columns filled with empty strings
|
1636
|
-
for col in df2.columns:
|
1637
|
-
if col not in df1:
|
1638
|
-
df1[col] = ""
|
1639
|
-
|
1640
|
-
for col in df1.columns:
|
1641
|
-
if col not in df2:
|
1642
|
-
df2[col] = ""
|
1643
|
-
|
1644
|
-
print("Standardized df1 columns:", df1.columns)
|
1645
|
-
print("Standardized df2 columns:", df2.columns)
|
1646
|
-
|
1647
|
-
# Ensure they have the same columns after standardizing
|
1648
1635
|
if set(df1.columns) != set(df2.columns):
|
1649
|
-
raise ValueError("Both DataFrames must have the same columns
|
1636
|
+
raise ValueError("Both DataFrames must have the same columns for a union join")
|
1650
1637
|
|
1651
|
-
# Concatenate and drop duplicates
|
1652
1638
|
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1653
1639
|
return result_df
|
1654
1640
|
|
1641
|
+
|
1655
1642
|
def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
1656
1643
|
"""
|
1657
1644
|
Perform a bag union join, concatenating the two DataFrames without dropping duplicates.
|
@@ -1666,27 +1653,9 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1666
1653
|
Raises:
|
1667
1654
|
ValueError: If the DataFrames do not have the same columns.
|
1668
1655
|
"""
|
1669
|
-
# Inspect initial columns
|
1670
|
-
print("Initial df1 columns:", df1.columns)
|
1671
|
-
print("Initial df2 columns:", df2.columns)
|
1672
|
-
|
1673
|
-
# Standardize columns by adding missing columns filled with empty strings
|
1674
|
-
for col in df2.columns:
|
1675
|
-
if col not in df1:
|
1676
|
-
df1[col] = ""
|
1677
|
-
|
1678
|
-
for col in df1.columns:
|
1679
|
-
if col not in df2:
|
1680
|
-
df2[col] = ""
|
1681
|
-
|
1682
|
-
print("Standardized df1 columns:", df1.columns)
|
1683
|
-
print("Standardized df2 columns:", df2.columns)
|
1684
|
-
|
1685
|
-
# Ensure they have the same columns after standardizing
|
1686
1656
|
if set(df1.columns) != set(df2.columns):
|
1687
|
-
raise ValueError("Both DataFrames must have the same columns
|
1657
|
+
raise ValueError("Both DataFrames must have the same columns for a bag union join")
|
1688
1658
|
|
1689
|
-
# Concatenate without dropping duplicates
|
1690
1659
|
result_df = pd.concat([df1, df2], ignore_index=True)
|
1691
1660
|
return result_df
|
1692
1661
|
|
@@ -0,0 +1,8 @@
|
|
1
|
+
rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
|
2
|
+
rgwfuncs/df_lib.py,sha256=OZPI7M35mbue6YsieWmlzjM5RUkaow0v0d3P-V71L6o,63034
|
3
|
+
rgwfuncs-0.0.16.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
4
|
+
rgwfuncs-0.0.16.dist-info/METADATA,sha256=oKTScVPzrgTTWdCQ7vxEdKYRnc-S_90hKwefifayeDU,32059
|
5
|
+
rgwfuncs-0.0.16.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
+
rgwfuncs-0.0.16.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
7
|
+
rgwfuncs-0.0.16.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
8
|
+
rgwfuncs-0.0.16.dist-info/RECORD,,
|
rgwfuncs-0.0.14.dist-info/RECORD
DELETED
@@ -1,8 +0,0 @@
|
|
1
|
-
rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
|
2
|
-
rgwfuncs/df_lib.py,sha256=GlFATJvcvLxQK-twfmcBIMJk_1-cq4HCuiODqyoZN78,64094
|
3
|
-
rgwfuncs-0.0.14.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
|
4
|
-
rgwfuncs-0.0.14.dist-info/METADATA,sha256=QSQo_msYCNSoln73NBfxfgvudh4LmTwVwzvM2JHc114,32059
|
5
|
-
rgwfuncs-0.0.14.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
6
|
-
rgwfuncs-0.0.14.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
|
7
|
-
rgwfuncs-0.0.14.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
|
8
|
-
rgwfuncs-0.0.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|