rgwfuncs 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/df_lib.py CHANGED
@@ -61,6 +61,7 @@ def docs(method_type_filter: Optional[str] = None) -> None:
61
61
  # Print the entire docstring for the matching function
62
62
  print(f"\n{name}:\n{docstring}")
63
63
 
64
+
64
65
  def numeric_clean(df: pd.DataFrame, column_names: str, column_type: str, irregular_value_treatment: str) -> pd.DataFrame:
65
66
  """
66
67
  Cleans the numeric columns based on specified treatments.
@@ -1628,28 +1629,29 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1628
1629
  Raises:
1629
1630
  ValueError: If the DataFrames do not have the same columns.
1630
1631
  """
1631
- if set(df1.columns) != set(df2.columns):
1632
- raise ValueError("Both DataFrames must have the same columns for a union join")
1633
-
1634
1632
  # Inspect initial columns
1635
- print("Initial df1 columns:", df1.columns)
1636
- print("Initial df2 columns:", df2.columns)
1633
+ # print("Initial df1 columns:", df1.columns)
1634
+ # print("Initial df2 columns:", df2.columns)
1637
1635
 
1638
- # Drop all-NA columns, if any
1639
- df1_clean = df1.dropna(axis=1, how='all')
1640
- df2_clean = df2.dropna(axis=1, how='all')
1636
+ # Standardize columns by adding missing columns filled with NaN
1637
+ for col in df2.columns:
1638
+ if col not in df1:
1639
+ df1[col] = pd.NA
1641
1640
 
1642
- # Inspect resulting columns after dropping all-NA columns
1643
- print("Cleaned df1 columns:", df1_clean.columns)
1644
- print("Cleaned df2 columns:", df2_clean.columns)
1641
+ for col in df1.columns:
1642
+ if col not in df2:
1643
+ df2[col] = pd.NA
1645
1644
 
1646
- # Ensure they still have the same columns after dropping all-NA columns
1647
- if set(df1_clean.columns) != set(df2_clean.columns):
1648
- raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1645
+ # print("Standardized df1 columns:", df1.columns)
1646
+ # print("Standardized df2 columns:", df2.columns)
1649
1647
 
1650
- result_df = pd.concat([df1_clean, df2_clean], ignore_index=True).drop_duplicates()
1651
- return result_df
1648
+ # Check if columns match now
1649
+ if set(df1.columns) != set(df2.columns):
1650
+ raise ValueError("Both DataFrames must have the same columns after standardizing columns")
1652
1651
 
1652
+ # Concatenate and drop duplicates
1653
+ result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1654
+ return result_df
1653
1655
 
1654
1656
 
1655
1657
  def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
@@ -1666,18 +1668,28 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1666
1668
  Raises:
1667
1669
  ValueError: If the DataFrames do not have the same columns.
1668
1670
  """
1669
- if set(df1.columns) != set(df2.columns):
1670
- raise ValueError("Both DataFrames must have the same columns for a bag union join")
1671
+ # Inspect initial columns
1672
+ # print("Initial df1 columns:", df1.columns)
1673
+ # print("Initial df2 columns:", df2.columns)
1674
+
1675
+ # Standardize columns by adding missing columns filled with NaN
1676
+ for col in df2.columns:
1677
+ if col not in df1:
1678
+ df1[col] = pd.NA
1671
1679
 
1672
- # Drop all-NA columns, if any
1673
- df1_clean = df1.dropna(axis=1, how='all')
1674
- df2_clean = df2.dropna(axis=1, how='all')
1680
+ for col in df1.columns:
1681
+ if col not in df2:
1682
+ df2[col] = pd.NA
1675
1683
 
1676
- # Ensure they still have the same columns after dropping all-NA columns
1677
- if set(df1_clean.columns) != set(df2_clean.columns):
1678
- raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1684
+ # print("Standardized df1 columns:", df1.columns)
1685
+ # print("Standardized df2 columns:", df2.columns)
1686
+
1687
+ # Ensure they have the same columns after standardizing
1688
+ if set(df1.columns) != set(df2.columns):
1689
+ raise ValueError("Both DataFrames must have the same columns after standardizing columns")
1679
1690
 
1680
- result_df = pd.concat([df1_clean, df2_clean], ignore_index=True)
1691
+ # Concatenate without dropping duplicates
1692
+ result_df = pd.concat([df1, df2], ignore_index=True)
1681
1693
  return result_df
1682
1694
 
1683
1695
 
@@ -1712,6 +1724,7 @@ def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str
1712
1724
  """
1713
1725
  return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
1714
1726
 
1727
+
1715
1728
  def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
1716
1729
  """
1717
1730
  Processes and saves a DataFrame to an SQLite database, adding a timestamp column
@@ -1766,5 +1779,3 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
1766
1779
  conn.execute(f"DROP TABLE IF EXISTS {tablename}")
1767
1780
  # Rename the new table to the old table name
1768
1781
  conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
1769
-
1770
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.11
3
+ Version: 0.0.13
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -0,0 +1,8 @@
1
+ rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
2
+ rgwfuncs/df_lib.py,sha256=2JU1Z0wjBrpcmLuJgY6-Bi1OFXHyNk1XeRGESkDI934,64077
3
+ rgwfuncs-0.0.13.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
4
+ rgwfuncs-0.0.13.dist-info/METADATA,sha256=TZoSn8ANcHG2IOAW1SwNbu3ZnPKxdWbKzoCAXLTB06w,32059
5
+ rgwfuncs-0.0.13.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ rgwfuncs-0.0.13.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
7
+ rgwfuncs-0.0.13.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
8
+ rgwfuncs-0.0.13.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
2
- rgwfuncs/df_lib.py,sha256=N5XCe9LYMIVjh0pg2Xc1i3Md--mdNJyJ5t7NpO9x4-s,63973
3
- rgwfuncs-0.0.11.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
4
- rgwfuncs-0.0.11.dist-info/METADATA,sha256=mqOIwsthIDAdpgtcFt35vW_xc99f2xtu5O_k4_Y-wQY,32059
5
- rgwfuncs-0.0.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- rgwfuncs-0.0.11.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
7
- rgwfuncs-0.0.11.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
8
- rgwfuncs-0.0.11.dist-info/RECORD,,