rgwfuncs 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rgwfuncs/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is automatically generated
2
2
  # Dynamically importing functions from modules
3
3
 
4
- from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, top_n_unique_values, union_join, update_rows
4
+ from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
rgwfuncs/df_lib.py CHANGED
@@ -27,10 +27,13 @@ from typing import Optional, Callable, Dict, List, Tuple, Any
27
27
 
28
28
  def docs(method_type_filter: Optional[str] = None) -> None:
29
29
  """
30
- Print a list of function names in alphabetical order. If method_type_filter is specified, print the docstrings of the functions that match the filter. Using '*' as a filter will print the docstrings for all functions.
30
+ Print a list of function names in alphabetical order. If method_type_filter
31
+ is specified, print the docstrings of the functions that match the filter.
32
+ Using '*' as a filter will print the docstrings for all functions.
31
33
 
32
34
  Parameters:
33
- method_type_filter: Optional filter string, comma-separated to select docstring types, or '*' for all.
35
+ method_type_filter: Optional filter string representing a function name,
36
+ or '*' to display docstrings for all functions.
34
37
  """
35
38
  # Get the current module's namespace
36
39
  current_module = __name__
@@ -41,7 +44,7 @@ def docs(method_type_filter: Optional[str] = None) -> None:
41
44
  }
42
45
 
43
46
  # List of function names sorted alphabetically
44
- function_names: List[str] = sorted(local_functions.keys())
47
+ function_names = sorted(local_functions.keys())
45
48
 
46
49
  # Print function names
47
50
  print("Functions in alphabetical order:")
@@ -50,26 +53,13 @@ def docs(method_type_filter: Optional[str] = None) -> None:
50
53
 
51
54
  # If a filter is provided or '*', print the docstrings of functions
52
55
  if method_type_filter:
53
- print("\nFiltered function documentation:")
56
+ # print("\nFiltered function documentation:")
54
57
  for name, func in local_functions.items():
55
58
  docstring: Optional[str] = func.__doc__
56
59
  if docstring:
57
- if method_type_filter == '*':
58
- # Print the entire docstring for each function
60
+ if method_type_filter == '*' or method_type_filter == name:
61
+ # Print the entire docstring for the matching function
59
62
  print(f"\n{name}:\n{docstring}")
60
- else:
61
- # Extract only the first line of the docstring
62
- first_line: str = docstring.split('\n')[0]
63
- if "::" in first_line:
64
- # Find the first occurrence of "::" and split there
65
- split_index: int = first_line.find("::")
66
- function_type: str = first_line[:split_index].strip()
67
- function_type_list: List[str] = [
68
- mt.strip() for mt in method_type_filter.split(',')]
69
- if function_type in function_type_list:
70
- # Print the entire docstring if the filter matches
71
- print(f"\n{name}:\n{docstring}")
72
-
73
63
 
74
64
  def numeric_clean(df: pd.DataFrame, column_names: str, column_type: str, irregular_value_treatment: str) -> pd.DataFrame:
75
65
  """
@@ -1696,3 +1686,60 @@ def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str
1696
1686
  A new DataFrame as the result of a right join.
1697
1687
  """
1698
1688
  return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
1689
+
1690
+ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
1691
+ """
1692
+ Processes and saves a DataFrame to an SQLite database, adding a timestamp column
1693
+ and replacing the existing table if needed. Creates the table if it does not exist.
1694
+
1695
+ Parameters:
1696
+ - db_path (str): Path to the SQLite database file.
1697
+ - tablename (str): The name of the table in the database.
1698
+ - df (pd.DataFrame): The DataFrame to be processed and saved.
1699
+ """
1700
+ # Step 1: Add a timestamp column to the dataframe
1701
+ df['rgwfuncs_sync_timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
1702
+
1703
+ # Define a simple mapping from pandas dtypes to SQLite types
1704
+ dtype_mapping = {
1705
+ 'int64': 'INTEGER',
1706
+ 'float64': 'REAL',
1707
+ 'object': 'TEXT',
1708
+ 'datetime64[ns]': 'TEXT', # Dates are stored as text in SQLite
1709
+ 'bool': 'INTEGER', # SQLite does not have a separate Boolean storage class
1710
+ }
1711
+
1712
+ # Helper function to map pandas dtype to SQLite type
1713
+ def map_dtype(dtype):
1714
+ return dtype_mapping.get(str(dtype), 'TEXT')
1715
+
1716
+ # Step 2: Save df in SQLite3 db as '{tablename}_new'
1717
+ with sqlite3.connect(db_path) as conn:
1718
+ new_table_name = f"{tablename}_new"
1719
+
1720
+ # Check if the new table already exists, create if not
1721
+ cursor = conn.cursor()
1722
+ cursor.execute(f"PRAGMA table_info({new_table_name})")
1723
+ if cursor.fetchall() == []: # Table does not exist
1724
+ # Create a table using the DataFrame's column names and types
1725
+ columns_with_types = ', '.join(
1726
+ f'"{col}" {map_dtype(dtype)}' for col, dtype in zip(df.columns, df.dtypes)
1727
+ )
1728
+ create_table_query = f'CREATE TABLE "{new_table_name}" ({columns_with_types})'
1729
+ conn.execute(create_table_query)
1730
+
1731
+ # Insert data into the new table
1732
+ df.to_sql(new_table_name, conn, if_exists='replace', index=False)
1733
+
1734
+ # Step 3: If '{tablename}_new' is not empty, delete table '{tablename}' (if it exists), and rename '{tablename}_new' to '{tablename}'
1735
+ # Check if the new table is not empty
1736
+ cursor.execute(f"SELECT COUNT(*) FROM {new_table_name}")
1737
+ count = cursor.fetchone()[0]
1738
+
1739
+ if count > 0:
1740
+ # Drop the old table if it exists
1741
+ conn.execute(f"DROP TABLE IF EXISTS {tablename}")
1742
+ # Rename the new table to the old table name
1743
+ conn.execute(f"ALTER TABLE {new_table_name} RENAME TO {tablename}")
1744
+
1745
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -77,7 +77,7 @@ A `rgwml.config` file (located at `vi ~/Documents/rgwml.config) is required for
77
77
  "project_id": ""
78
78
  }
79
79
  ],
80
- "vm_presets": [
80
+ "vm_presets": [
81
81
  {
82
82
  "name": "main_server",
83
83
  "host": "",
@@ -85,34 +85,34 @@ A `rgwml.config` file (located at `vi ~/Documents/rgwml.config) is required for
85
85
  "ssh_key_path": ""
86
86
  }
87
87
  ],
88
- "cloud_storage_presets": [
88
+ "cloud_storage_presets": [
89
89
  {
90
90
  "name": "gcs_bucket_name",
91
- "credential_path": "path/to/your/credentials.json"
91
+ "credential_path": "/path/to/your/credentials.json"
92
92
  }
93
93
  ],
94
- "telegram_bot_presets": [
95
- {
96
- "name": "rgwml-bot",
97
- "chat_id": "",
98
- "bot_token": ""
99
- }
100
- ],
101
- "slack_bot_presets": [
102
- {
103
- "name": "labs-channel",
104
- "channel_id": "",
105
- "bot_token": ""
106
- }
107
- ],
108
- "gmail_bot_presets": [
109
- {
110
- "name": "info@xyz.com",
111
- "service_account_credentials_path": "/home/user/Documents/credentials/your_creds.json"
112
- }
113
- ]
114
- }
115
-
94
+ "telegram_bot_presets": [
95
+ {
96
+ "name": "rgwml-bot",
97
+ "chat_id": "",
98
+ "bot_token": ""
99
+ }
100
+ ],
101
+ "slack_bot_presets": [
102
+ {
103
+ "name": "labs-channel",
104
+ "channel_id": "",
105
+ "bot_token": ""
106
+ }
107
+ ],
108
+ "gmail_bot_presets": [
109
+ {
110
+ "name": "info@xyz.com",
111
+ "service_account_credentials_path": "/path/to/your/credentials.json"
112
+ }
113
+ ]
114
+ }
115
+
116
116
  --------------------------------------------------------------------------------
117
117
 
118
118
  ## Basic Usage
@@ -1151,6 +1151,30 @@ Perform a right join on two DataFrames.
1151
1151
 
1152
1152
  --------------------------------------------------------------------------------
1153
1153
 
1154
+ ### 45. `sync_dataframe_to_sqlite_database`
1155
+ Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
1156
+
1157
+ • Parameters:
1158
+ - `db_path` (str): Path to the SQLite database file.
1159
+ - `tablename` (str): The name of the table in the database.
1160
+ - `df` (pd.DataFrame): The DataFrame to be processed and saved.
1161
+
1162
+ • Returns:
1163
+ - None
1164
+
1165
+ • Example:
1166
+
1167
+ from rgwfuncs import sync_dataframe_to_sqlite_database
1168
+ import pandas as pd
1169
+
1170
+ df = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
1171
+ db_path = 'my_database.db'
1172
+ tablename = 'my_table'
1173
+
1174
+ sync_dataframe_to_sqlite_database(db_path, tablename, df)
1175
+
1176
+ --------------------------------------------------------------------------------
1177
+
1154
1178
  ## Additional Info
1155
1179
 
1156
1180
  For more information, refer to each function’s docstring by calling:
@@ -0,0 +1,8 @@
1
+ rgwfuncs/__init__.py,sha256=BP8Nh8ivyCCz8Ga-21JW3NWInJFOElKoIfRuioJRWbA,1076
2
+ rgwfuncs/df_lib.py,sha256=3PYfu_zs8HfL56C9Sb41jzoyaG9Oc7x5MZQYvo1zy6M,62930
3
+ rgwfuncs-0.0.9.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
4
+ rgwfuncs-0.0.9.dist-info/METADATA,sha256=tJiGspLMJbt78FGyyvf3w10ZbWCn17PJ4070wUsH1ew,32058
5
+ rgwfuncs-0.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ rgwfuncs-0.0.9.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
7
+ rgwfuncs-0.0.9.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
8
+ rgwfuncs-0.0.9.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- rgwfuncs/__init__.py,sha256=o4BBYVERWwAx8dknJ03yVCHqV9o8D1qrRgFeJrtpDWg,1041
2
- rgwfuncs/df_lib.py,sha256=vYKElOUyMqBMC5EYA6vaqknpmVoOzNIyOWdJxMlzGcs,61137
3
- rgwfuncs-0.0.7.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
4
- rgwfuncs-0.0.7.dist-info/METADATA,sha256=CoUF-aVOBZmywYKpz2fLgd18Y0YRQieXGYp1E6ggMw8,31226
5
- rgwfuncs-0.0.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- rgwfuncs-0.0.7.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
7
- rgwfuncs-0.0.7.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
8
- rgwfuncs-0.0.7.dist-info/RECORD,,