rgwfuncs 0.0.15__tar.gz → 0.0.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.15
3
+ Version: 0.0.17
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -40,9 +40,9 @@ Install the package using:
40
40
 
41
41
  --------------------------------------------------------------------------------
42
42
 
43
- ## Create a `rgwml.config` File
43
+ ## Create a `.rgwfuncsrc` File
44
44
 
45
- A `rgwml.config` file (located at `vi ~/Documents/rgwml.config) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
45
+ A `.rgwfuncsrc` file (located at `vi ~/.rgwfuncsrc) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
46
46
 
47
47
  {
48
48
  "db_presets" : [
@@ -381,28 +381,30 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
381
381
  --------------------------------------------------------------------------------
382
382
 
383
383
  ### 12. `load_data_from_query`
384
+
384
385
  Load data from a database query into a DataFrame based on a configuration preset.
385
386
 
386
- Parameters:
387
- - `db_preset_name` (str): Name of the database preset in the config file.
388
- - query (str): The SQL query to execute.
389
- - `config_file_name` (str): Name of the configuration file (default: "rgwml.config").
387
+ - **Parameters:**
388
+ - `db_preset_name` (str): Name of the database preset in the configuration file.
389
+ - `query` (str): The SQL query to execute.
390
390
 
391
- Returns:
392
- - pd.DataFrame: A DataFrame containing the query result.
391
+ - **Returns:**
392
+ - `pd.DataFrame`: A DataFrame containing the query result.
393
393
 
394
- Example:
395
-
396
- from rgwfuncs import load_data_from_query
394
+ - **Notes:**
395
+ - The configuration file is assumed to be located at `~/.rgwfuncsrc`.
397
396
 
398
- df = load_data_from_query(
399
- db_preset_name="MyDBPreset",
400
- query="SELECT * FROM my_table",
401
- config_file_name="rgwml.config"
402
- )
403
- print(df)
404
-
397
+ - **Example:**
398
+
399
+ from rgwfuncs import load_data_from_query
400
+
401
+ df = load_data_from_query(
402
+ db_preset_name="MyDBPreset",
403
+ query="SELECT * FROM my_table"
404
+ )
405
+ print(df)
405
406
 
407
+
406
408
  --------------------------------------------------------------------------------
407
409
 
408
410
  ### 13. `load_data_from_path`
@@ -1148,10 +1150,47 @@ Perform a right join on two DataFrames.
1148
1150
  df_right_join = right_join(df1, df2, 'ID', 'ID')
1149
1151
  print(df_right_join)
1150
1152
 
1153
+ --------------------------------------------------------------------------------
1154
+
1155
+ ### 45. `insert_dataframe_in_sqlite_database`
1156
+
1157
+ Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
1158
+
1159
+ - **Parameters:**
1160
+ - `db_path` (str): The path to the SQLite database file. If the database does not exist, it will be created.
1161
+ - `tablename` (str): The name of the table in the database. If the table does not exist, it is created with the DataFrame's columns and data types.
1162
+ - `df` (pd.DataFrame): The DataFrame containing the data to be inserted into the database table.
1163
+
1164
+ - **Returns:**
1165
+ - `None`
1166
+
1167
+ - **Notes:**
1168
+ - Data types in the DataFrame are converted to SQLite-compatible types:
1169
+ - `int64` is mapped to `INTEGER`
1170
+ - `float64` is mapped to `REAL`
1171
+ - `object` is mapped to `TEXT`
1172
+ - `datetime64[ns]` is mapped to `TEXT` (dates are stored as text)
1173
+ - `bool` is mapped to `INTEGER` (SQLite does not have a separate Boolean type)
1174
+
1175
+ - **Example:**
1176
+
1177
+ from rgwfuncs import insert_dataframe_in_sqlite_database
1178
+ import pandas as pd
1179
+
1180
+ df = pd.DataFrame({
1181
+ 'ID': [1, 2, 3],
1182
+ 'Name': ['Alice', 'Bob', 'Charlie'],
1183
+ 'Score': [88.5, 92.3, 85.0]
1184
+ })
1185
+
1186
+ db_path = 'my_database.db'
1187
+ tablename = 'students'
1188
+
1189
+ insert_dataframe_in_sqlite_database(db_path, tablename, df)
1151
1190
 
1152
1191
  --------------------------------------------------------------------------------
1153
1192
 
1154
- ### 45. `sync_dataframe_to_sqlite_database`
1193
+ ### 46. `sync_dataframe_to_sqlite_database`
1155
1194
  Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
1156
1195
 
1157
1196
  • Parameters:
@@ -14,9 +14,9 @@ Install the package using:
14
14
 
15
15
  --------------------------------------------------------------------------------
16
16
 
17
- ## Create a `rgwml.config` File
17
+ ## Create a `.rgwfuncsrc` File
18
18
 
19
- A `rgwml.config` file (located at `vi ~/Documents/rgwml.config) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
19
+ A `.rgwfuncsrc` file (located at `vi ~/.rgwfuncsrc) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
20
20
 
21
21
  {
22
22
  "db_presets" : [
@@ -355,28 +355,30 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
355
355
  --------------------------------------------------------------------------------
356
356
 
357
357
  ### 12. `load_data_from_query`
358
+
358
359
  Load data from a database query into a DataFrame based on a configuration preset.
359
360
 
360
- Parameters:
361
- - `db_preset_name` (str): Name of the database preset in the config file.
362
- - query (str): The SQL query to execute.
363
- - `config_file_name` (str): Name of the configuration file (default: "rgwml.config").
361
+ - **Parameters:**
362
+ - `db_preset_name` (str): Name of the database preset in the configuration file.
363
+ - `query` (str): The SQL query to execute.
364
364
 
365
- Returns:
366
- - pd.DataFrame: A DataFrame containing the query result.
365
+ - **Returns:**
366
+ - `pd.DataFrame`: A DataFrame containing the query result.
367
367
 
368
- Example:
369
-
370
- from rgwfuncs import load_data_from_query
368
+ - **Notes:**
369
+ - The configuration file is assumed to be located at `~/.rgwfuncsrc`.
371
370
 
372
- df = load_data_from_query(
373
- db_preset_name="MyDBPreset",
374
- query="SELECT * FROM my_table",
375
- config_file_name="rgwml.config"
376
- )
377
- print(df)
378
-
371
+ - **Example:**
372
+
373
+ from rgwfuncs import load_data_from_query
374
+
375
+ df = load_data_from_query(
376
+ db_preset_name="MyDBPreset",
377
+ query="SELECT * FROM my_table"
378
+ )
379
+ print(df)
379
380
 
381
+
380
382
  --------------------------------------------------------------------------------
381
383
 
382
384
  ### 13. `load_data_from_path`
@@ -1122,10 +1124,47 @@ Perform a right join on two DataFrames.
1122
1124
  df_right_join = right_join(df1, df2, 'ID', 'ID')
1123
1125
  print(df_right_join)
1124
1126
 
1127
+ --------------------------------------------------------------------------------
1128
+
1129
+ ### 45. `insert_dataframe_in_sqlite_database`
1130
+
1131
+ Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
1132
+
1133
+ - **Parameters:**
1134
+ - `db_path` (str): The path to the SQLite database file. If the database does not exist, it will be created.
1135
+ - `tablename` (str): The name of the table in the database. If the table does not exist, it is created with the DataFrame's columns and data types.
1136
+ - `df` (pd.DataFrame): The DataFrame containing the data to be inserted into the database table.
1137
+
1138
+ - **Returns:**
1139
+ - `None`
1140
+
1141
+ - **Notes:**
1142
+ - Data types in the DataFrame are converted to SQLite-compatible types:
1143
+ - `int64` is mapped to `INTEGER`
1144
+ - `float64` is mapped to `REAL`
1145
+ - `object` is mapped to `TEXT`
1146
+ - `datetime64[ns]` is mapped to `TEXT` (dates are stored as text)
1147
+ - `bool` is mapped to `INTEGER` (SQLite does not have a separate Boolean type)
1148
+
1149
+ - **Example:**
1150
+
1151
+ from rgwfuncs import insert_dataframe_in_sqlite_database
1152
+ import pandas as pd
1153
+
1154
+ df = pd.DataFrame({
1155
+ 'ID': [1, 2, 3],
1156
+ 'Name': ['Alice', 'Bob', 'Charlie'],
1157
+ 'Score': [88.5, 92.3, 85.0]
1158
+ })
1159
+
1160
+ db_path = 'my_database.db'
1161
+ tablename = 'students'
1162
+
1163
+ insert_dataframe_in_sqlite_database(db_path, tablename, df)
1125
1164
 
1126
1165
  --------------------------------------------------------------------------------
1127
1166
 
1128
- ### 45. `sync_dataframe_to_sqlite_database`
1167
+ ### 46. `sync_dataframe_to_sqlite_database`
1129
1168
  Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
1130
1169
 
1131
1170
  • Parameters:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rgwfuncs"
7
- version = "0.0.15"
7
+ version = "0.0.17"
8
8
  authors = [
9
9
  { name = "Ryan Gerard Wilson", email = "ryangerardwilson@gmail.com" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = rgwfuncs
3
- version = 0.0.15
3
+ version = 0.0.17
4
4
  author = Ryan Gerard Wilson
5
5
  author_email = ryangerardwilson@gmail.com
6
6
  description = A functional programming paradigm for mathematical modelling and data science
@@ -0,0 +1,4 @@
1
+ # This file is automatically generated
2
+ # Dynamically importing functions from modules
3
+
4
+ from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
@@ -23,6 +23,10 @@ from googleapiclient.discovery import build
23
23
  import base64
24
24
  import inspect
25
25
  from typing import Optional, Callable, Dict, List, Tuple, Any
26
+ import warnings
27
+
28
+ # Suppress all FutureWarnings
29
+ warnings.filterwarnings("ignore", category=FutureWarning)
26
30
 
27
31
 
28
32
  def docs(method_type_filter: Optional[str] = None) -> None:
@@ -336,16 +340,13 @@ def drop_duplicates_retain_last(df: pd.DataFrame, columns: Optional[str] = None)
336
340
  return df.drop_duplicates(subset=columns_list, keep='last')
337
341
 
338
342
 
339
- def load_data_from_query(db_preset_name: str, query: str, config_file_name: str = "rgwml.config") -> pd.DataFrame:
343
+ def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
340
344
  """
341
- Load data from a database query into a DataFrame based on a configuration
342
- preset.
345
+ Load data from a database query into a DataFrame based on a configuration preset.
343
346
 
344
347
  Parameters:
345
348
  db_preset_name: The name of the database preset in the configuration file.
346
349
  query: The SQL query to execute.
347
- config_file_name: Name of the configuration file
348
- (default: 'rgwml.config').
349
350
 
350
351
  Returns:
351
352
  A DataFrame containing the query result.
@@ -355,17 +356,6 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
355
356
  ValueError: If the database preset or db_type is invalid.
356
357
  """
357
358
 
358
- def locate_config_file(filename: str = config_file_name) -> str:
359
- home_dir = os.path.expanduser("~")
360
- search_paths = [os.path.join(home_dir, "Desktop"), os.path.join(home_dir, "Documents"), os.path.join(home_dir, "Downloads"),]
361
-
362
- for path in search_paths:
363
- for root, dirs, files in os.walk(path):
364
- if filename in files:
365
- return os.path.join(root, filename)
366
- raise FileNotFoundError(
367
- f"{filename} not found in Desktop, Documents, or Downloads folders")
368
-
369
359
  def query_mssql(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
370
360
  server = db_preset['host']
371
361
  user = db_preset['username']
@@ -395,7 +385,6 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
395
385
  return pd.DataFrame(rows, columns=columns)
396
386
 
397
387
  def query_clickhouse(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
398
-
399
388
  host = db_preset['host']
400
389
  user = db_preset['username']
401
390
  password = db_preset['password']
@@ -434,8 +423,8 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
434
423
 
435
424
  return pd.DataFrame(rows, columns=columns)
436
425
 
437
- # Read the configuration file to get the database preset
438
- config_path = locate_config_file()
426
+ # Assume the configuration file is located at ~/.rgwfuncsrc
427
+ config_path = os.path.expanduser('~/.rgwfuncsrc')
439
428
  with open(config_path, 'r') as f:
440
429
  config = json.load(f)
441
430
 
@@ -458,6 +447,7 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
458
447
  raise ValueError(f"Unsupported db_type: {db_type}")
459
448
 
460
449
 
450
+
461
451
  def load_data_from_path(file_path: str) -> pd.DataFrame:
462
452
  """
463
453
  Load data from a file into a DataFrame based on the file extension.
@@ -808,39 +798,36 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
808
798
 
809
799
  Parameters:
810
800
  df: The DataFrame to send.
811
- bot_name: The name of the Telegram bot as specified in the configuration.
812
- message: Custom message to send along with the DataFrame or file.
813
- as_file: Boolean flag to decide whether to send the DataFrame as a file or as text.
814
- remove_after_send: If True, removes the file after sending.
815
- """
801
+ bot_name: The name of the Telegram bot as specified in the configuration file.
802
+ message: Custom message to send along with the DataFrame or file. Defaults to None.
803
+ as_file: Boolean flag to indicate whether the DataFrame should be sent as a file (True) or as text (False). Defaults to True.
804
+ remove_after_send: If True, removes the CSV file after sending. Defaults to True.
816
805
 
817
- def locate_config_file(filename: str = "rgwml.config") -> str:
818
- """Retrieve the configuration file path."""
819
- home_dir = os.path.expanduser("~")
820
- search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
806
+ Raises:
807
+ ValueError: If the specified bot is not found or if no DataFrame is provided.
808
+ Exception: If the message sending fails.
821
809
 
822
- for path in search_paths:
823
- for root, _, files in os.walk(path):
824
- if filename in files:
825
- return os.path.join(root, filename)
826
- raise FileNotFoundError(
827
- f"{filename} not found in Desktop, Documents, or Downloads")
810
+ Notes:
811
+ The configuration file is assumed to be located at `~/.rgwfuncsrc`.
812
+ """
828
813
 
829
814
  def get_config(config_path: str) -> dict:
830
- """Load configuration from a json file."""
815
+ """Load configuration from a JSON file."""
831
816
  with open(config_path, 'r') as file:
832
817
  return json.load(file)
833
818
 
834
- config_path = locate_config_file()
819
+ # Assume the configuration file is located at ~/.rgwfuncsrc
820
+ config_path = os.path.expanduser('~/.rgwfuncsrc')
835
821
  config = get_config(config_path)
836
- bot_config = next((bot for bot in config['telegram_bot_presets'] if bot['name'] == bot_name), None)
837
822
 
823
+ bot_config = next((bot for bot in config['telegram_bot_presets'] if bot['name'] == bot_name), None)
838
824
  if not bot_config:
839
825
  raise ValueError(f"No bot found with the name {bot_name}")
840
826
 
841
827
  if df is None:
842
828
  raise ValueError("No DataFrame to send. Please provide a DataFrame.")
843
829
 
830
+ response = None
844
831
  if as_file:
845
832
  timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
846
833
  file_name = f"df_{timestamp}.csv"
@@ -859,11 +846,12 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
859
846
  df_str = df.to_string()
860
847
  payload = {
861
848
  'chat_id': bot_config['chat_id'],
862
- 'text': message + "\n\n" + df_str if message else df_str,
863
- 'parse_mode': 'HTML'}
849
+ 'text': (message + "\n\n" + df_str) if message else df_str,
850
+ 'parse_mode': 'HTML'
851
+ }
864
852
  response = requests.post(f"https://api.telegram.org/bot{bot_config['bot_token']}/sendMessage", data=payload)
865
853
 
866
- if not response.ok:
854
+ if response and not response.ok:
867
855
  raise Exception(f"Error sending message: {response.text}")
868
856
 
869
857
  print("Message sent successfully.")
@@ -871,28 +859,24 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
871
859
 
872
860
  def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subject: Optional[str] = None, body: Optional[str] = None, as_file: bool = True, remove_after_send: bool = True) -> None:
873
861
  """
874
- Send an email with optional DataFrame attachment using Gmail API via a specified preset.
862
+ Send an email with an optional DataFrame attachment using the Gmail API via a specified preset.
875
863
 
876
864
  Parameters:
877
865
  df: The DataFrame to send.
878
866
  preset_name: The configuration preset name to use for sending the email.
879
867
  to_email: The recipient email address.
880
- subject: Optional subject of the email.
881
- body: Optional message body of the email.
882
- as_file: Boolean flag to decide whether to send the DataFrame as a file.
883
- remove_after_send: If True, removes the CSV file after sending.
884
- """
868
+ subject: Optional subject of the email. Defaults to 'DataFrame CSV File' if not given.
869
+ body: Optional message body of the email. Defaults to 'Please find the CSV file attached.' if not given.
870
+ as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or embed it in the email (False). Defaults to True.
871
+ remove_after_send: If True, removes the CSV file after sending. Defaults to True.
885
872
 
886
- def locate_config_file(filename: str = "rgwml.config") -> str:
887
- """Locate config file in common user directories."""
888
- home_dir = os.path.expanduser("~")
889
- search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
873
+ Raises:
874
+ ValueError: If the preset is not found in the configuration.
875
+ Exception: If the email preparation or sending fails.
890
876
 
891
- for path in search_paths:
892
- for root, _, files in os.walk(path):
893
- if filename in files:
894
- return os.path.join(root, filename)
895
- raise FileNotFoundError(f"{filename} not found in Desktop, Documents, or Downloads folders")
877
+ Notes:
878
+ The configuration file is assumed to be located at `~/.rgwfuncsrc`.
879
+ """
896
880
 
897
881
  def get_config(config_path: str) -> dict:
898
882
  with open(config_path, 'r') as file:
@@ -901,9 +885,7 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
901
885
  except json.JSONDecodeError as e:
902
886
  raise ValueError(f"Invalid JSON format in config file: {e}")
903
887
 
904
- def authenticate_service_account(
905
- service_account_credentials_path: str,
906
- sender_email_id: str) -> Any:
888
+ def authenticate_service_account(service_account_credentials_path: str, sender_email_id: str) -> Any:
907
889
  credentials = service_account.Credentials.from_service_account_file(
908
890
  service_account_credentials_path,
909
891
  scopes=['https://mail.google.com/'],
@@ -911,8 +893,8 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
911
893
  )
912
894
  return build('gmail', 'v1', credentials=credentials)
913
895
 
914
- # Load configuration
915
- config_path = locate_config_file()
896
+ # Load configuration from ~/.rgwfuncsrc
897
+ config_path = os.path.expanduser('~/.rgwfuncsrc')
916
898
  config = get_config(config_path)
917
899
 
918
900
  # Retrieve Gmail preset configuration
@@ -980,30 +962,25 @@ def send_data_to_slack(df: pd.DataFrame, bot_name: str, message: Optional[str] =
980
962
  Parameters:
981
963
  df: The DataFrame to send.
982
964
  bot_name: The Slack bot configuration preset name.
983
- message: Custom message to send along with the DataFrame or file.
984
- as_file: Boolean flag to decide whether to send the DataFrame as a file.
985
- remove_after_send: If True, removes the CSV file after sending.
986
- """
965
+ message: Custom message to send along with the DataFrame or file. Defaults to None.
966
+ as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or as text (False). Defaults to True.
967
+ remove_after_send: If True, removes the CSV file after sending. Defaults to True.
987
968
 
988
- def locate_config_file(filename: str = "rgwml.config") -> str:
989
- """Locate config file in common user directories."""
990
- home_dir = os.path.expanduser("~")
991
- search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
969
+ Raises:
970
+ ValueError: If the specified bot is not found in the configuration.
971
+ Exception: If the message sending fails.
992
972
 
993
- for path in search_paths:
994
- for root, _, files in os.walk(path):
995
- if filename in files:
996
- return os.path.join(root, filename)
997
- raise FileNotFoundError(
998
- f"{filename} not found in Desktop, Documents, or Downloads folders")
973
+ Notes:
974
+ The configuration file is assumed to be located at `~/.rgwfuncsrc`.
975
+ """
999
976
 
1000
977
  def get_config(config_path: str) -> dict:
1001
978
  """Load configuration from a JSON file."""
1002
979
  with open(config_path, 'r') as file:
1003
980
  return json.load(file)
1004
981
 
1005
- # Load the Slack configuration
1006
- config_path = locate_config_file()
982
+ # Load the Slack configuration from ~/.rgwfuncsrc
983
+ config_path = os.path.expanduser('~/.rgwfuncsrc')
1007
984
  config = get_config(config_path)
1008
985
 
1009
986
  bot_config = next((bot for bot in config['slack_bot_presets'] if bot['name'] == bot_name), None)
@@ -1021,13 +998,22 @@ def send_data_to_slack(df: pd.DataFrame, bot_name: str, message: Optional[str] =
1021
998
 
1022
999
  try:
1023
1000
  with open(file_name, 'rb') as file:
1024
- response = client.files_upload(channels=bot_config['channel_id'], file=file, filename=os.path.basename(file_name), title="DataFrame Upload", initial_comment=message or '')
1001
+ response = client.files_upload(
1002
+ channels=bot_config['channel_id'],
1003
+ file=file,
1004
+ filename=os.path.basename(file_name),
1005
+ title="DataFrame Upload",
1006
+ initial_comment=message or ''
1007
+ )
1025
1008
  finally:
1026
1009
  if remove_after_send and os.path.exists(file_name):
1027
1010
  os.remove(file_name)
1028
1011
  else:
1029
1012
  df_str = df.to_string()
1030
- response = client.chat_postMessage(channel=bot_config['channel_id'], text=(message + "\n\n" + df_str) if message else df_str)
1013
+ response = client.chat_postMessage(
1014
+ channel=bot_config['channel_id'],
1015
+ text=(message + "\n\n" + df_str) if message else df_str
1016
+ )
1031
1017
 
1032
1018
  # Check if the message was sent successfully
1033
1019
  if not response["ok"]:
@@ -1614,6 +1600,7 @@ def mask_against_dataframe_converse(df: pd.DataFrame, other_df: pd.DataFrame, co
1614
1600
 
1615
1601
  return df[~df[column_name].isin(other_df[column_name])]
1616
1602
 
1603
+
1617
1604
  def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1618
1605
  """
1619
1606
  Perform a union join, concatenating the two DataFrames and dropping duplicates.
@@ -1628,30 +1615,13 @@ def union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1628
1615
  Raises:
1629
1616
  ValueError: If the DataFrames do not have the same columns.
1630
1617
  """
1631
- # Inspect initial columns
1632
- # print("Initial df1 columns:", df1.columns)
1633
- # print("Initial df2 columns:", df2.columns)
1634
-
1635
- # Standardize columns by adding missing columns filled with empty strings
1636
- for col in df2.columns:
1637
- if col not in df1:
1638
- df1[col] = ""
1639
-
1640
- for col in df1.columns:
1641
- if col not in df2:
1642
- df2[col] = ""
1643
-
1644
- # print("Standardized df1 columns:", df1.columns)
1645
- # print("Standardized df2 columns:", df2.columns)
1646
-
1647
- # Ensure they have the same columns after standardizing
1648
1618
  if set(df1.columns) != set(df2.columns):
1649
- raise ValueError("Both DataFrames must have the same columns after standardizing columns")
1619
+ raise ValueError("Both DataFrames must have the same columns for a union join")
1650
1620
 
1651
- # Concatenate and drop duplicates
1652
1621
  result_df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()
1653
1622
  return result_df
1654
1623
 
1624
+
1655
1625
  def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1656
1626
  """
1657
1627
  Perform a bag union join, concatenating the two DataFrames without dropping duplicates.
@@ -1666,27 +1636,9 @@ def bag_union_join(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
1666
1636
  Raises:
1667
1637
  ValueError: If the DataFrames do not have the same columns.
1668
1638
  """
1669
- # Inspect initial columns
1670
- # print("Initial df1 columns:", df1.columns)
1671
- # print("Initial df2 columns:", df2.columns)
1672
-
1673
- # Standardize columns by adding missing columns filled with empty strings
1674
- for col in df2.columns:
1675
- if col not in df1:
1676
- df1[col] = ""
1677
-
1678
- for col in df1.columns:
1679
- if col not in df2:
1680
- df2[col] = ""
1681
-
1682
- # print("Standardized df1 columns:", df1.columns)
1683
- # print("Standardized df2 columns:", df2.columns)
1684
-
1685
- # Ensure they have the same columns after standardizing
1686
1639
  if set(df1.columns) != set(df2.columns):
1687
- raise ValueError("Both DataFrames must have the same columns after standardizing columns")
1640
+ raise ValueError("Both DataFrames must have the same columns for a bag union join")
1688
1641
 
1689
- # Concatenate without dropping duplicates
1690
1642
  result_df = pd.concat([df1, df2], ignore_index=True)
1691
1643
  return result_df
1692
1644
 
@@ -1723,6 +1675,65 @@ def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str
1723
1675
  return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
1724
1676
 
1725
1677
 
1678
+ def insert_dataframe_in_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
1679
+ """
1680
+ Inserts a Pandas DataFrame into a SQLite database table.
1681
+
1682
+ Parameters:
1683
+ db_path: str
1684
+ The file path to the SQLite database. If the database does not exist,
1685
+ it will be created.
1686
+
1687
+ tablename: str
1688
+ The name of the table where the data will be inserted. If the table does
1689
+ not exist, it will be created based on the DataFrame's columns and types.
1690
+
1691
+ df: pd.DataFrame
1692
+ The DataFrame containing the data to be inserted into the database.
1693
+
1694
+ Functionality:
1695
+ - Checks if the specified table exists in the database.
1696
+ - Creates the table with appropriate column types if it doesn't exist.
1697
+ - Inserts the DataFrame's data into the table, appending to any existing data.
1698
+
1699
+ Data Type Mapping:
1700
+ - Converts Pandas data types to SQLite types: 'int64' to 'INTEGER',
1701
+ 'float64' to 'REAL', 'object' to 'TEXT', 'datetime64[ns]' to 'TEXT',
1702
+ and 'bool' to 'INTEGER'.
1703
+
1704
+ Returns:
1705
+ None
1706
+ """
1707
+
1708
+ def table_exists(cursor, table_name):
1709
+ cursor.execute(f"SELECT count(name) FROM sqlite_master WHERE type='table' AND name='{table_name}'")
1710
+ return cursor.fetchone()[0] == 1
1711
+
1712
+
1713
+ dtype_mapping = {
1714
+ 'int64': 'INTEGER',
1715
+ 'float64': 'REAL',
1716
+ 'object': 'TEXT',
1717
+ 'datetime64[ns]': 'TEXT',
1718
+ 'bool': 'INTEGER',
1719
+ }
1720
+
1721
+ def map_dtype(dtype):
1722
+ return dtype_mapping.get(str(dtype), 'TEXT')
1723
+
1724
+ with sqlite3.connect(db_path) as conn:
1725
+ cursor = conn.cursor()
1726
+
1727
+ if not table_exists(cursor, tablename):
1728
+ columns_with_types = ', '.join(
1729
+ f'"{col}" {map_dtype(dtype)}' for col, dtype in zip(df.columns, df.dtypes)
1730
+ )
1731
+ create_table_query = f'CREATE TABLE "{tablename}" ({columns_with_types})'
1732
+ conn.execute(create_table_query)
1733
+
1734
+ df.to_sql(tablename, conn, if_exists='append', index=False)
1735
+
1736
+
1726
1737
  def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
1727
1738
  """
1728
1739
  Processes and saves a DataFrame to an SQLite database, adding a timestamp column
@@ -1733,6 +1744,10 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
1733
1744
  - tablename (str): The name of the table in the database.
1734
1745
  - df (pd.DataFrame): The DataFrame to be processed and saved.
1735
1746
  """
1747
+ # Helper function to map pandas dtype to SQLite type
1748
+ def map_dtype(dtype):
1749
+ return dtype_mapping.get(str(dtype), 'TEXT')
1750
+
1736
1751
  # Step 1: Add a timestamp column to the dataframe
1737
1752
  df['rgwfuncs_sync_timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
1738
1753
 
@@ -1745,10 +1760,6 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
1745
1760
  'bool': 'INTEGER', # SQLite does not have a separate Boolean storage class
1746
1761
  }
1747
1762
 
1748
- # Helper function to map pandas dtype to SQLite type
1749
- def map_dtype(dtype):
1750
- return dtype_mapping.get(str(dtype), 'TEXT')
1751
-
1752
1763
  # Step 2: Save df in SQLite3 db as '{tablename}_new'
1753
1764
  with sqlite3.connect(db_path) as conn:
1754
1765
  new_table_name = f"{tablename}_new"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.15
3
+ Version: 0.0.17
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -40,9 +40,9 @@ Install the package using:
40
40
 
41
41
  --------------------------------------------------------------------------------
42
42
 
43
- ## Create a `rgwml.config` File
43
+ ## Create a `.rgwfuncsrc` File
44
44
 
45
- A `rgwml.config` file (located at `vi ~/Documents/rgwml.config) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
45
+ A `.rgwfuncsrc` file (located at `vi ~/.rgwfuncsrc) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
46
46
 
47
47
  {
48
48
  "db_presets" : [
@@ -381,28 +381,30 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
381
381
  --------------------------------------------------------------------------------
382
382
 
383
383
  ### 12. `load_data_from_query`
384
+
384
385
  Load data from a database query into a DataFrame based on a configuration preset.
385
386
 
386
- Parameters:
387
- - `db_preset_name` (str): Name of the database preset in the config file.
388
- - query (str): The SQL query to execute.
389
- - `config_file_name` (str): Name of the configuration file (default: "rgwml.config").
387
+ - **Parameters:**
388
+ - `db_preset_name` (str): Name of the database preset in the configuration file.
389
+ - `query` (str): The SQL query to execute.
390
390
 
391
- Returns:
392
- - pd.DataFrame: A DataFrame containing the query result.
391
+ - **Returns:**
392
+ - `pd.DataFrame`: A DataFrame containing the query result.
393
393
 
394
- Example:
395
-
396
- from rgwfuncs import load_data_from_query
394
+ - **Notes:**
395
+ - The configuration file is assumed to be located at `~/.rgwfuncsrc`.
397
396
 
398
- df = load_data_from_query(
399
- db_preset_name="MyDBPreset",
400
- query="SELECT * FROM my_table",
401
- config_file_name="rgwml.config"
402
- )
403
- print(df)
404
-
397
+ - **Example:**
398
+
399
+ from rgwfuncs import load_data_from_query
400
+
401
+ df = load_data_from_query(
402
+ db_preset_name="MyDBPreset",
403
+ query="SELECT * FROM my_table"
404
+ )
405
+ print(df)
405
406
 
407
+
406
408
  --------------------------------------------------------------------------------
407
409
 
408
410
  ### 13. `load_data_from_path`
@@ -1148,10 +1150,47 @@ Perform a right join on two DataFrames.
1148
1150
  df_right_join = right_join(df1, df2, 'ID', 'ID')
1149
1151
  print(df_right_join)
1150
1152
 
1153
+ --------------------------------------------------------------------------------
1154
+
1155
+ ### 45. `insert_dataframe_in_sqlite_database`
1156
+
1157
+ Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
1158
+
1159
+ - **Parameters:**
1160
+ - `db_path` (str): The path to the SQLite database file. If the database does not exist, it will be created.
1161
+ - `tablename` (str): The name of the table in the database. If the table does not exist, it is created with the DataFrame's columns and data types.
1162
+ - `df` (pd.DataFrame): The DataFrame containing the data to be inserted into the database table.
1163
+
1164
+ - **Returns:**
1165
+ - `None`
1166
+
1167
+ - **Notes:**
1168
+ - Data types in the DataFrame are converted to SQLite-compatible types:
1169
+ - `int64` is mapped to `INTEGER`
1170
+ - `float64` is mapped to `REAL`
1171
+ - `object` is mapped to `TEXT`
1172
+ - `datetime64[ns]` is mapped to `TEXT` (dates are stored as text)
1173
+ - `bool` is mapped to `INTEGER` (SQLite does not have a separate Boolean type)
1174
+
1175
+ - **Example:**
1176
+
1177
+ from rgwfuncs import insert_dataframe_in_sqlite_database
1178
+ import pandas as pd
1179
+
1180
+ df = pd.DataFrame({
1181
+ 'ID': [1, 2, 3],
1182
+ 'Name': ['Alice', 'Bob', 'Charlie'],
1183
+ 'Score': [88.5, 92.3, 85.0]
1184
+ })
1185
+
1186
+ db_path = 'my_database.db'
1187
+ tablename = 'students'
1188
+
1189
+ insert_dataframe_in_sqlite_database(db_path, tablename, df)
1151
1190
 
1152
1191
  --------------------------------------------------------------------------------
1153
1192
 
1154
- ### 45. `sync_dataframe_to_sqlite_database`
1193
+ ### 46. `sync_dataframe_to_sqlite_database`
1155
1194
  Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
1156
1195
 
1157
1196
  • Parameters:
@@ -1,4 +0,0 @@
1
- # This file is automatically generated
2
- # Dynamically importing functions from modules
3
-
4
- from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
File without changes