rgwfuncs 0.0.8__tar.gz → 0.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1151,6 +1151,30 @@ Perform a right join on two DataFrames.
1151
1151
 
1152
1152
  --------------------------------------------------------------------------------
1153
1153
 
1154
+ ### 45. `sync_dataframe_to_sqlite_database`
1155
+ Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
1156
+
1157
+ • Parameters:
1158
+ - `db_path` (str): Path to the SQLite database file.
1159
+ - `tablename` (str): The name of the table in the database.
1160
+ - `df` (pd.DataFrame): The DataFrame to be processed and saved.
1161
+
1162
+ • Returns:
1163
+ - None
1164
+
1165
+ • Example:
1166
+
1167
+ from rgwfuncs import sync_dataframe_to_sqlite_database
1168
+ import pandas as pd
1169
+
1170
+ df = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
1171
+ db_path = 'my_database.db'
1172
+ tablename = 'my_table'
1173
+
1174
+ sync_dataframe_to_sqlite_database(db_path, tablename, df)
1175
+
1176
+ --------------------------------------------------------------------------------
1177
+
1154
1178
  ## Additional Info
1155
1179
 
1156
1180
  For more information, refer to each function’s docstring by calling:
@@ -1125,6 +1125,30 @@ Perform a right join on two DataFrames.
1125
1125
 
1126
1126
  --------------------------------------------------------------------------------
1127
1127
 
1128
+ ### 45. `sync_dataframe_to_sqlite_database`
1129
+ Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
1130
+
1131
+ • Parameters:
1132
+ - `db_path` (str): Path to the SQLite database file.
1133
+ - `tablename` (str): The name of the table in the database.
1134
+ - `df` (pd.DataFrame): The DataFrame to be processed and saved.
1135
+
1136
+ • Returns:
1137
+ - None
1138
+
1139
+ • Example:
1140
+
1141
+ from rgwfuncs import sync_dataframe_to_sqlite_database
1142
+ import pandas as pd
1143
+
1144
+ df = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
1145
+ db_path = 'my_database.db'
1146
+ tablename = 'my_table'
1147
+
1148
+ sync_dataframe_to_sqlite_database(db_path, tablename, df)
1149
+
1150
+ --------------------------------------------------------------------------------
1151
+
1128
1152
  ## Additional Info
1129
1153
 
1130
1154
  For more information, refer to each function’s docstring by calling:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rgwfuncs"
7
- version = "0.0.8"
7
+ version = "0.0.10"
8
8
  authors = [
9
9
  { name = "Ryan Gerard Wilson", email = "ryangerardwilson@gmail.com" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = rgwfuncs
3
- version = 0.0.8
3
+ version = 0.0.10
4
4
  author = Ryan Gerard Wilson
5
5
  author_email = ryangerardwilson@gmail.com
6
6
  description = A functional programming paradigm for mathematical modelling and data science
@@ -1,4 +1,4 @@
1
1
  # This file is automatically generated
2
2
  # Dynamically importing functions from modules
3
3
 
4
- from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, top_n_unique_values, union_join, update_rows
4
+ from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
@@ -1631,7 +1631,15 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1631
1631
  if set(df1.columns) != set(df2.columns):
1632
1632
  raise ValueError("Both DataFrames must have the same columns for a union join")
1633
1633
 
1634
- result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1634
+ # Drop all-NA columns, if any
1635
+ df1_clean = df1.dropna(axis=1, how='all')
1636
+ df2_clean = df2.dropna(axis=1, how='all')
1637
+
1638
+ # Ensure they still have the same columns after dropping all-NA columns
1639
+ if set(df1_clean.columns) != set(df2_clean.columns):
1640
+ raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1641
+
1642
+ result_df = pd.concat([df1_clean, df2_clean], ignore_index=True).drop_duplicates()
1635
1643
  return result_df
1636
1644
 
1637
1645
 
@@ -1652,7 +1660,15 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1652
1660
  if set(df1.columns) != set(df2.columns):
1653
1661
  raise ValueError("Both DataFrames must have the same columns for a bag union join")
1654
1662
 
1655
- result_df = pd.concat([df1, df2], ignore_index=True)
1663
+ # Drop all-NA columns, if any
1664
+ df1_clean = df1.dropna(axis=1, how='all')
1665
+ df2_clean = df2.dropna(axis=1, how='all')
1666
+
1667
+ # Ensure they still have the same columns after dropping all-NA columns
1668
+ if set(df1_clean.columns) != set(df2_clean.columns):
1669
+ raise ValueError("Both DataFrames must have the same columns after dropping all-NA columns")
1670
+
1671
+ result_df = pd.concat([df1_clean, df2_clean], ignore_index=True)
1656
1672
  return result_df
1657
1673
 
1658
1674
 
@@ -1686,3 +1702,60 @@ def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str
1686
1702
  A new DataFrame as the result of a right join.
1687
1703
  """
1688
1704
  return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
1705
+
1706
+ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
1707
+ """
1708
+ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
1709
+ and replacing the existing table if needed. Creates the table if it does not exist.
1710
+
1711
+ Parameters:
1712
+ - db_path (str): Path to the SQLite database file.
1713
+ - tablename (str): The name of the table in the database.
1714
+ - df (pd.DataFrame): The DataFrame to be processed and saved.
1715
+ """
1716
+ # Step 1: Add a timestamp column to the dataframe
1717
+ df['rgwfuncs_sync_timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
1718
+
1719
+ # Define a simple mapping from pandas dtypes to SQLite types
1720
+ dtype_mapping = {
1721
+ 'int64': 'INTEGER',
1722
+ 'float64': 'REAL',
1723
+ 'object': 'TEXT',
1724
+ 'datetime64[ns]': 'TEXT', # Dates are stored as text in SQLite
1725
+ 'bool': 'INTEGER', # SQLite does not have a separate Boolean storage class
1726
+ }
1727
+
1728
+ # Helper function to map pandas dtype to SQLite type
1729
+ def map_dtype(dtype):
1730
+ return dtype_mapping.get(str(dtype), 'TEXT')
1731
+
1732
+ # Step 2: Save df in SQLite3 db as '{tablename}_new'
1733
+ with sqlite3.connect(db_path) as conn:
1734
+ new_table_name = f"{tablename}_new"
1735
+
1736
+ # Check if the new table already exists, create if not
1737
+ cursor = conn.cursor()
1738
+ cursor.execute(f"PRAGMA table_info({new_table_name})")
1739
+ if cursor.fetchall() == []: # Table does not exist
1740
+ # Create a table using the DataFrame's column names and types
1741
+ columns_with_types = ', '.join(
1742
+ f'"{col}" {map_dtype(dtype)}' for col, dtype in zip(df.columns, df.dtypes)
1743
+ )
1744
+ create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
1745
+ conn.execute(create_table_query)
1746
+
1747
+ # Insert data into the new table
1748
+ df.to_sql(new_table_name, conn, if_exists='replace', index=False)
1749
+
1750
+ # Step 3: If '{tablename}_new' is not empty, delete table '{tablename}' (if it exists), and rename '{tablename}_new' to '{tablename}'
1751
+ # Check if the new table is not empty
1752
+ cursor.execute(f"SELECT COUNT(*) FROM {new_table_name}")
1753
+ count = cursor.fetchone()[0]
1754
+
1755
+ if count > 0:
1756
+ # Drop the old table if it exists
1757
+ conn.execute(f"DROP TABLE IF EXISTS {tablename}")
1758
+ # Rename the new table to the old table name
1759
+ conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
1760
+
1761
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -1151,6 +1151,30 @@ Perform a right join on two DataFrames.
1151
1151
 
1152
1152
  --------------------------------------------------------------------------------
1153
1153
 
1154
+ ### 45. `sync_dataframe_to_sqlite_database`
1155
+ Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
1156
+
1157
+ • Parameters:
1158
+ - `db_path` (str): Path to the SQLite database file.
1159
+ - `tablename` (str): The name of the table in the database.
1160
+ - `df` (pd.DataFrame): The DataFrame to be processed and saved.
1161
+
1162
+ • Returns:
1163
+ - None
1164
+
1165
+ • Example:
1166
+
1167
+ from rgwfuncs import sync_dataframe_to_sqlite_database
1168
+ import pandas as pd
1169
+
1170
+ df = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
1171
+ db_path = 'my_database.db'
1172
+ tablename = 'my_table'
1173
+
1174
+ sync_dataframe_to_sqlite_database(db_path, tablename, df)
1175
+
1176
+ --------------------------------------------------------------------------------
1177
+
1154
1178
  ## Additional Info
1155
1179
 
1156
1180
  For more information, refer to each function’s docstring by calling:
File without changes