rgwfuncs 0.0.11__tar.gz → 0.0.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rgwfuncs-0.0.11/src/rgwfuncs.egg-info → rgwfuncs-0.0.13}/PKG-INFO +1 -1
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/pyproject.toml +1 -1
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/setup.cfg +1 -1
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/src/rgwfuncs/df_lib.py +38 -27
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13/src/rgwfuncs.egg-info}/PKG-INFO +1 -1
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/LICENSE +0 -0
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/README.md +0 -0
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/src/rgwfuncs/__init__.py +0 -0
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/src/rgwfuncs.egg-info/SOURCES.txt +0 -0
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/src/rgwfuncs.egg-info/dependency_links.txt +0 -0
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/src/rgwfuncs.egg-info/entry_points.txt +0 -0
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/src/rgwfuncs.egg-info/requires.txt +0 -0
- {rgwfuncs-0.0.11 → rgwfuncs-0.0.13}/src/rgwfuncs.egg-info/top_level.txt +0 -0
@@ -61,6 +61,7 @@ def docs(method_type_filter: Optional[str] = None) -> None:
|
|
61
61
|
# Print the entire docstring for the matching function
|
62
62
|
print(f"\n{name}:\n{docstring}")
|
63
63
|
|
64
|
+
|
64
65
|
def numeric_clean(df: pd.DataFrame, column_names: str, column_type: str, irregular_value_treatment: str) -> pd.DataFrame:
|
65
66
|
"""
|
66
67
|
Cleans the numeric columns based on specified treatments.
|
@@ -1628,28 +1629,29 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1628
1629
|
Raises:
|
1629
1630
|
ValueError: If the DataFrames do not have the same columns.
|
1630
1631
|
"""
|
1631
|
-
if set(df1.columns) != set(df2.columns):
|
1632
|
-
raise ValueError("Both DataFrames must have the same columns for a union join")
|
1633
|
-
|
1634
1632
|
# Inspect initial columns
|
1635
|
-
print("Initial df1 columns:", df1.columns)
|
1636
|
-
print("Initial df2 columns:", df2.columns)
|
1633
|
+
# print("Initial df1 columns:", df1.columns)
|
1634
|
+
# print("Initial df2 columns:", df2.columns)
|
1637
1635
|
|
1638
|
-
#
|
1639
|
-
|
1640
|
-
|
1636
|
+
# Standardize columns by adding missing columns filled with NaN
|
1637
|
+
for col in df2.columns:
|
1638
|
+
if col not in df1:
|
1639
|
+
df1[col] = pd.NA
|
1641
1640
|
|
1642
|
-
|
1643
|
-
|
1644
|
-
|
1641
|
+
for col in df1.columns:
|
1642
|
+
if col not in df2:
|
1643
|
+
df2[col] = pd.NA
|
1645
1644
|
|
1646
|
-
#
|
1647
|
-
|
1648
|
-
raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
|
1645
|
+
# print("Standardized df1 columns:", df1.columns)
|
1646
|
+
# print("Standardized df2 columns:", df2.columns)
|
1649
1647
|
|
1650
|
-
|
1651
|
-
|
1648
|
+
# Check if columns match now
|
1649
|
+
if set(df1.columns) != set(df2.columns):
|
1650
|
+
raise ValueError("Both DataFrames must have the same columns after standardizing columns")
|
1652
1651
|
|
1652
|
+
# Concatenate and drop duplicates
|
1653
|
+
result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
|
1654
|
+
return result_df
|
1653
1655
|
|
1654
1656
|
|
1655
1657
|
def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
@@ -1666,18 +1668,28 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
1666
1668
|
Raises:
|
1667
1669
|
ValueError: If the DataFrames do not have the same columns.
|
1668
1670
|
"""
|
1669
|
-
|
1670
|
-
|
1671
|
+
# Inspect initial columns
|
1672
|
+
# print("Initial df1 columns:", df1.columns)
|
1673
|
+
# print("Initial df2 columns:", df2.columns)
|
1674
|
+
|
1675
|
+
# Standardize columns by adding missing columns filled with NaN
|
1676
|
+
for col in df2.columns:
|
1677
|
+
if col not in df1:
|
1678
|
+
df1[col] = pd.NA
|
1671
1679
|
|
1672
|
-
|
1673
|
-
|
1674
|
-
|
1680
|
+
for col in df1.columns:
|
1681
|
+
if col not in df2:
|
1682
|
+
df2[col] = pd.NA
|
1675
1683
|
|
1676
|
-
#
|
1677
|
-
|
1678
|
-
|
1684
|
+
# print("Standardized df1 columns:", df1.columns)
|
1685
|
+
# print("Standardized df2 columns:", df2.columns)
|
1686
|
+
|
1687
|
+
# Ensure they have the same columns after standardizing
|
1688
|
+
if set(df1.columns) != set(df2.columns):
|
1689
|
+
raise ValueError("Both DataFrames must have the same columns after standardizing columns")
|
1679
1690
|
|
1680
|
-
|
1691
|
+
# Concatenate without dropping duplicates
|
1692
|
+
result_df = pd.concat([df1, df2], ignore_index=True)
|
1681
1693
|
return result_df
|
1682
1694
|
|
1683
1695
|
|
@@ -1712,6 +1724,7 @@ def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str
|
|
1712
1724
|
"""
|
1713
1725
|
return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
|
1714
1726
|
|
1727
|
+
|
1715
1728
|
def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
|
1716
1729
|
"""
|
1717
1730
|
Processes and saves a DataFrame to an SQLite database, adding a timestamp column
|
@@ -1766,5 +1779,3 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
|
|
1766
1779
|
conn.execute(f"DROP TABLE IF EXISTS {tablename}")
|
1767
1780
|
# Rename the new table to the old table name
|
1768
1781
|
conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
|
1769
|
-
|
1770
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|