rgwfuncs 0.0.16__tar.gz → 0.0.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rgwfuncs-0.0.16/src/rgwfuncs.egg-info → rgwfuncs-0.0.17}/PKG-INFO +59 -20
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/README.md +58 -19
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/pyproject.toml +1 -1
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/setup.cfg +1 -1
- rgwfuncs-0.0.17/src/rgwfuncs/__init__.py +4 -0
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/src/rgwfuncs/df_lib.py +124 -82
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17/src/rgwfuncs.egg-info}/PKG-INFO +59 -20
- rgwfuncs-0.0.16/src/rgwfuncs/__init__.py +0 -4
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/LICENSE +0 -0
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/src/rgwfuncs.egg-info/SOURCES.txt +0 -0
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/src/rgwfuncs.egg-info/dependency_links.txt +0 -0
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/src/rgwfuncs.egg-info/entry_points.txt +0 -0
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/src/rgwfuncs.egg-info/requires.txt +0 -0
- {rgwfuncs-0.0.16 → rgwfuncs-0.0.17}/src/rgwfuncs.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.17
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -40,9 +40,9 @@ Install the package using:
|
|
40
40
|
|
41
41
|
--------------------------------------------------------------------------------
|
42
42
|
|
43
|
-
## Create a `
|
43
|
+
## Create a `.rgwfuncsrc` File
|
44
44
|
|
45
|
-
A `
|
45
|
+
A `.rgwfuncsrc` file (located at `vi ~/.rgwfuncsrc) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
|
46
46
|
|
47
47
|
{
|
48
48
|
"db_presets" : [
|
@@ -381,28 +381,30 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
|
|
381
381
|
--------------------------------------------------------------------------------
|
382
382
|
|
383
383
|
### 12. `load_data_from_query`
|
384
|
+
|
384
385
|
Load data from a database query into a DataFrame based on a configuration preset.
|
385
386
|
|
386
|
-
|
387
|
-
- `db_preset_name` (str): Name of the database preset in the
|
388
|
-
- query (str): The SQL query to execute.
|
389
|
-
- `config_file_name` (str): Name of the configuration file (default: "rgwml.config").
|
387
|
+
- **Parameters:**
|
388
|
+
- `db_preset_name` (str): Name of the database preset in the configuration file.
|
389
|
+
- `query` (str): The SQL query to execute.
|
390
390
|
|
391
|
-
|
392
|
-
- pd.DataFrame
|
391
|
+
- **Returns:**
|
392
|
+
- `pd.DataFrame`: A DataFrame containing the query result.
|
393
393
|
|
394
|
-
|
395
|
-
|
396
|
-
from rgwfuncs import load_data_from_query
|
394
|
+
- **Notes:**
|
395
|
+
- The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
397
396
|
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
397
|
+
- **Example:**
|
398
|
+
|
399
|
+
from rgwfuncs import load_data_from_query
|
400
|
+
|
401
|
+
df = load_data_from_query(
|
402
|
+
db_preset_name="MyDBPreset",
|
403
|
+
query="SELECT * FROM my_table"
|
404
|
+
)
|
405
|
+
print(df)
|
405
406
|
|
407
|
+
|
406
408
|
--------------------------------------------------------------------------------
|
407
409
|
|
408
410
|
### 13. `load_data_from_path`
|
@@ -1148,10 +1150,47 @@ Perform a right join on two DataFrames.
|
|
1148
1150
|
df_right_join = right_join(df1, df2, 'ID', 'ID')
|
1149
1151
|
print(df_right_join)
|
1150
1152
|
|
1153
|
+
--------------------------------------------------------------------------------
|
1154
|
+
|
1155
|
+
### 45. `insert_dataframe_in_sqlite_database`
|
1156
|
+
|
1157
|
+
Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
|
1158
|
+
|
1159
|
+
- **Parameters:**
|
1160
|
+
- `db_path` (str): The path to the SQLite database file. If the database does not exist, it will be created.
|
1161
|
+
- `tablename` (str): The name of the table in the database. If the table does not exist, it is created with the DataFrame's columns and data types.
|
1162
|
+
- `df` (pd.DataFrame): The DataFrame containing the data to be inserted into the database table.
|
1163
|
+
|
1164
|
+
- **Returns:**
|
1165
|
+
- `None`
|
1166
|
+
|
1167
|
+
- **Notes:**
|
1168
|
+
- Data types in the DataFrame are converted to SQLite-compatible types:
|
1169
|
+
- `int64` is mapped to `INTEGER`
|
1170
|
+
- `float64` is mapped to `REAL`
|
1171
|
+
- `object` is mapped to `TEXT`
|
1172
|
+
- `datetime64[ns]` is mapped to `TEXT` (dates are stored as text)
|
1173
|
+
- `bool` is mapped to `INTEGER` (SQLite does not have a separate Boolean type)
|
1174
|
+
|
1175
|
+
- **Example:**
|
1176
|
+
|
1177
|
+
from rgwfuncs import insert_dataframe_in_sqlite_database
|
1178
|
+
import pandas as pd
|
1179
|
+
|
1180
|
+
df = pd.DataFrame({
|
1181
|
+
'ID': [1, 2, 3],
|
1182
|
+
'Name': ['Alice', 'Bob', 'Charlie'],
|
1183
|
+
'Score': [88.5, 92.3, 85.0]
|
1184
|
+
})
|
1185
|
+
|
1186
|
+
db_path = 'my_database.db'
|
1187
|
+
tablename = 'students'
|
1188
|
+
|
1189
|
+
insert_dataframe_in_sqlite_database(db_path, tablename, df)
|
1151
1190
|
|
1152
1191
|
--------------------------------------------------------------------------------
|
1153
1192
|
|
1154
|
-
###
|
1193
|
+
### 46. `sync_dataframe_to_sqlite_database`
|
1155
1194
|
Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
|
1156
1195
|
|
1157
1196
|
• Parameters:
|
@@ -14,9 +14,9 @@ Install the package using:
|
|
14
14
|
|
15
15
|
--------------------------------------------------------------------------------
|
16
16
|
|
17
|
-
## Create a `
|
17
|
+
## Create a `.rgwfuncsrc` File
|
18
18
|
|
19
|
-
A `
|
19
|
+
A `.rgwfuncsrc` file (located at `vi ~/.rgwfuncsrc) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
|
20
20
|
|
21
21
|
{
|
22
22
|
"db_presets" : [
|
@@ -355,28 +355,30 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
|
|
355
355
|
--------------------------------------------------------------------------------
|
356
356
|
|
357
357
|
### 12. `load_data_from_query`
|
358
|
+
|
358
359
|
Load data from a database query into a DataFrame based on a configuration preset.
|
359
360
|
|
360
|
-
|
361
|
-
- `db_preset_name` (str): Name of the database preset in the
|
362
|
-
- query (str): The SQL query to execute.
|
363
|
-
- `config_file_name` (str): Name of the configuration file (default: "rgwml.config").
|
361
|
+
- **Parameters:**
|
362
|
+
- `db_preset_name` (str): Name of the database preset in the configuration file.
|
363
|
+
- `query` (str): The SQL query to execute.
|
364
364
|
|
365
|
-
|
366
|
-
- pd.DataFrame
|
365
|
+
- **Returns:**
|
366
|
+
- `pd.DataFrame`: A DataFrame containing the query result.
|
367
367
|
|
368
|
-
|
369
|
-
|
370
|
-
from rgwfuncs import load_data_from_query
|
368
|
+
- **Notes:**
|
369
|
+
- The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
371
370
|
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
371
|
+
- **Example:**
|
372
|
+
|
373
|
+
from rgwfuncs import load_data_from_query
|
374
|
+
|
375
|
+
df = load_data_from_query(
|
376
|
+
db_preset_name="MyDBPreset",
|
377
|
+
query="SELECT * FROM my_table"
|
378
|
+
)
|
379
|
+
print(df)
|
379
380
|
|
381
|
+
|
380
382
|
--------------------------------------------------------------------------------
|
381
383
|
|
382
384
|
### 13. `load_data_from_path`
|
@@ -1122,10 +1124,47 @@ Perform a right join on two DataFrames.
|
|
1122
1124
|
df_right_join = right_join(df1, df2, 'ID', 'ID')
|
1123
1125
|
print(df_right_join)
|
1124
1126
|
|
1127
|
+
--------------------------------------------------------------------------------
|
1128
|
+
|
1129
|
+
### 45. `insert_dataframe_in_sqlite_database`
|
1130
|
+
|
1131
|
+
Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
|
1132
|
+
|
1133
|
+
- **Parameters:**
|
1134
|
+
- `db_path` (str): The path to the SQLite database file. If the database does not exist, it will be created.
|
1135
|
+
- `tablename` (str): The name of the table in the database. If the table does not exist, it is created with the DataFrame's columns and data types.
|
1136
|
+
- `df` (pd.DataFrame): The DataFrame containing the data to be inserted into the database table.
|
1137
|
+
|
1138
|
+
- **Returns:**
|
1139
|
+
- `None`
|
1140
|
+
|
1141
|
+
- **Notes:**
|
1142
|
+
- Data types in the DataFrame are converted to SQLite-compatible types:
|
1143
|
+
- `int64` is mapped to `INTEGER`
|
1144
|
+
- `float64` is mapped to `REAL`
|
1145
|
+
- `object` is mapped to `TEXT`
|
1146
|
+
- `datetime64[ns]` is mapped to `TEXT` (dates are stored as text)
|
1147
|
+
- `bool` is mapped to `INTEGER` (SQLite does not have a separate Boolean type)
|
1148
|
+
|
1149
|
+
- **Example:**
|
1150
|
+
|
1151
|
+
from rgwfuncs import insert_dataframe_in_sqlite_database
|
1152
|
+
import pandas as pd
|
1153
|
+
|
1154
|
+
df = pd.DataFrame({
|
1155
|
+
'ID': [1, 2, 3],
|
1156
|
+
'Name': ['Alice', 'Bob', 'Charlie'],
|
1157
|
+
'Score': [88.5, 92.3, 85.0]
|
1158
|
+
})
|
1159
|
+
|
1160
|
+
db_path = 'my_database.db'
|
1161
|
+
tablename = 'students'
|
1162
|
+
|
1163
|
+
insert_dataframe_in_sqlite_database(db_path, tablename, df)
|
1125
1164
|
|
1126
1165
|
--------------------------------------------------------------------------------
|
1127
1166
|
|
1128
|
-
###
|
1167
|
+
### 46. `sync_dataframe_to_sqlite_database`
|
1129
1168
|
Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
|
1130
1169
|
|
1131
1170
|
• Parameters:
|
@@ -0,0 +1,4 @@
|
|
1
|
+
# This file is automatically generated
|
2
|
+
# Dynamically importing functions from modules
|
3
|
+
|
4
|
+
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, insert_dataframe_in_sqlite_database, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
@@ -28,6 +28,7 @@ import warnings
|
|
28
28
|
# Suppress all FutureWarnings
|
29
29
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
30
30
|
|
31
|
+
|
31
32
|
def docs(method_type_filter: Optional[str] = None) -> None:
|
32
33
|
"""
|
33
34
|
Print a list of function names in alphabetical order. If method_type_filter
|
@@ -339,16 +340,13 @@ def drop_duplicates_retain_last(df: pd.DataFrame, columns: Optional[str] = None)
|
|
339
340
|
return df.drop_duplicates(subset=columns_list, keep='last')
|
340
341
|
|
341
342
|
|
342
|
-
def load_data_from_query(db_preset_name: str, query: str
|
343
|
+
def load_data_from_query(db_preset_name: str, query: str) -> pd.DataFrame:
|
343
344
|
"""
|
344
|
-
Load data from a database query into a DataFrame based on a configuration
|
345
|
-
preset.
|
345
|
+
Load data from a database query into a DataFrame based on a configuration preset.
|
346
346
|
|
347
347
|
Parameters:
|
348
348
|
db_preset_name: The name of the database preset in the configuration file.
|
349
349
|
query: The SQL query to execute.
|
350
|
-
config_file_name: Name of the configuration file
|
351
|
-
(default: 'rgwml.config').
|
352
350
|
|
353
351
|
Returns:
|
354
352
|
A DataFrame containing the query result.
|
@@ -358,17 +356,6 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
|
|
358
356
|
ValueError: If the database preset or db_type is invalid.
|
359
357
|
"""
|
360
358
|
|
361
|
-
def locate_config_file(filename: str = config_file_name) -> str:
|
362
|
-
home_dir = os.path.expanduser("~")
|
363
|
-
search_paths = [os.path.join(home_dir, "Desktop"), os.path.join(home_dir, "Documents"), os.path.join(home_dir, "Downloads"),]
|
364
|
-
|
365
|
-
for path in search_paths:
|
366
|
-
for root, dirs, files in os.walk(path):
|
367
|
-
if filename in files:
|
368
|
-
return os.path.join(root, filename)
|
369
|
-
raise FileNotFoundError(
|
370
|
-
f"{filename} not found in Desktop, Documents, or Downloads folders")
|
371
|
-
|
372
359
|
def query_mssql(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
|
373
360
|
server = db_preset['host']
|
374
361
|
user = db_preset['username']
|
@@ -398,7 +385,6 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
|
|
398
385
|
return pd.DataFrame(rows, columns=columns)
|
399
386
|
|
400
387
|
def query_clickhouse(db_preset: Dict[str, Any], query: str) -> pd.DataFrame:
|
401
|
-
|
402
388
|
host = db_preset['host']
|
403
389
|
user = db_preset['username']
|
404
390
|
password = db_preset['password']
|
@@ -437,8 +423,8 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
|
|
437
423
|
|
438
424
|
return pd.DataFrame(rows, columns=columns)
|
439
425
|
|
440
|
-
#
|
441
|
-
config_path =
|
426
|
+
# Assume the configuration file is located at ~/.rgwfuncsrc
|
427
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
442
428
|
with open(config_path, 'r') as f:
|
443
429
|
config = json.load(f)
|
444
430
|
|
@@ -461,6 +447,7 @@ def load_data_from_query(db_preset_name: str, query: str, config_file_name: str
|
|
461
447
|
raise ValueError(f"Unsupported db_type: {db_type}")
|
462
448
|
|
463
449
|
|
450
|
+
|
464
451
|
def load_data_from_path(file_path: str) -> pd.DataFrame:
|
465
452
|
"""
|
466
453
|
Load data from a file into a DataFrame based on the file extension.
|
@@ -811,39 +798,36 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
|
|
811
798
|
|
812
799
|
Parameters:
|
813
800
|
df: The DataFrame to send.
|
814
|
-
bot_name: The name of the Telegram bot as specified in the configuration.
|
815
|
-
message: Custom message to send along with the DataFrame or file.
|
816
|
-
as_file: Boolean flag to
|
817
|
-
remove_after_send: If True, removes the file after sending.
|
818
|
-
"""
|
801
|
+
bot_name: The name of the Telegram bot as specified in the configuration file.
|
802
|
+
message: Custom message to send along with the DataFrame or file. Defaults to None.
|
803
|
+
as_file: Boolean flag to indicate whether the DataFrame should be sent as a file (True) or as text (False). Defaults to True.
|
804
|
+
remove_after_send: If True, removes the CSV file after sending. Defaults to True.
|
819
805
|
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
|
806
|
+
Raises:
|
807
|
+
ValueError: If the specified bot is not found or if no DataFrame is provided.
|
808
|
+
Exception: If the message sending fails.
|
824
809
|
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
return os.path.join(root, filename)
|
829
|
-
raise FileNotFoundError(
|
830
|
-
f"{filename} not found in Desktop, Documents, or Downloads")
|
810
|
+
Notes:
|
811
|
+
The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
812
|
+
"""
|
831
813
|
|
832
814
|
def get_config(config_path: str) -> dict:
|
833
|
-
"""Load configuration from a
|
815
|
+
"""Load configuration from a JSON file."""
|
834
816
|
with open(config_path, 'r') as file:
|
835
817
|
return json.load(file)
|
836
818
|
|
837
|
-
|
819
|
+
# Assume the configuration file is located at ~/.rgwfuncsrc
|
820
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
838
821
|
config = get_config(config_path)
|
839
|
-
bot_config = next((bot for bot in config['telegram_bot_presets'] if bot['name'] == bot_name), None)
|
840
822
|
|
823
|
+
bot_config = next((bot for bot in config['telegram_bot_presets'] if bot['name'] == bot_name), None)
|
841
824
|
if not bot_config:
|
842
825
|
raise ValueError(f"No bot found with the name {bot_name}")
|
843
826
|
|
844
827
|
if df is None:
|
845
828
|
raise ValueError("No DataFrame to send. Please provide a DataFrame.")
|
846
829
|
|
830
|
+
response = None
|
847
831
|
if as_file:
|
848
832
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
849
833
|
file_name = f"df_{timestamp}.csv"
|
@@ -862,11 +846,12 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
|
|
862
846
|
df_str = df.to_string()
|
863
847
|
payload = {
|
864
848
|
'chat_id': bot_config['chat_id'],
|
865
|
-
'text': message + "\n\n" + df_str if message else df_str,
|
866
|
-
'parse_mode': 'HTML'
|
849
|
+
'text': (message + "\n\n" + df_str) if message else df_str,
|
850
|
+
'parse_mode': 'HTML'
|
851
|
+
}
|
867
852
|
response = requests.post(f"https://api.telegram.org/bot{bot_config['bot_token']}/sendMessage", data=payload)
|
868
853
|
|
869
|
-
if not response.ok:
|
854
|
+
if response and not response.ok:
|
870
855
|
raise Exception(f"Error sending message: {response.text}")
|
871
856
|
|
872
857
|
print("Message sent successfully.")
|
@@ -874,28 +859,24 @@ def send_dataframe_via_telegram(df: pd.DataFrame, bot_name: str, message: Option
|
|
874
859
|
|
875
860
|
def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subject: Optional[str] = None, body: Optional[str] = None, as_file: bool = True, remove_after_send: bool = True) -> None:
|
876
861
|
"""
|
877
|
-
Send an email with optional DataFrame attachment using Gmail API via a specified preset.
|
862
|
+
Send an email with an optional DataFrame attachment using the Gmail API via a specified preset.
|
878
863
|
|
879
864
|
Parameters:
|
880
865
|
df: The DataFrame to send.
|
881
866
|
preset_name: The configuration preset name to use for sending the email.
|
882
867
|
to_email: The recipient email address.
|
883
|
-
subject: Optional subject of the email.
|
884
|
-
body: Optional message body of the email.
|
885
|
-
as_file: Boolean flag to decide whether to send the DataFrame as a file.
|
886
|
-
remove_after_send: If True, removes the CSV file after sending.
|
887
|
-
"""
|
868
|
+
subject: Optional subject of the email. Defaults to 'DataFrame CSV File' if not given.
|
869
|
+
body: Optional message body of the email. Defaults to 'Please find the CSV file attached.' if not given.
|
870
|
+
as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or embed it in the email (False). Defaults to True.
|
871
|
+
remove_after_send: If True, removes the CSV file after sending. Defaults to True.
|
888
872
|
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
|
873
|
+
Raises:
|
874
|
+
ValueError: If the preset is not found in the configuration.
|
875
|
+
Exception: If the email preparation or sending fails.
|
893
876
|
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
return os.path.join(root, filename)
|
898
|
-
raise FileNotFoundError(f"{filename} not found in Desktop, Documents, or Downloads folders")
|
877
|
+
Notes:
|
878
|
+
The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
879
|
+
"""
|
899
880
|
|
900
881
|
def get_config(config_path: str) -> dict:
|
901
882
|
with open(config_path, 'r') as file:
|
@@ -904,9 +885,7 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
|
|
904
885
|
except json.JSONDecodeError as e:
|
905
886
|
raise ValueError(f"Invalid JSON format in config file: {e}")
|
906
887
|
|
907
|
-
def authenticate_service_account(
|
908
|
-
service_account_credentials_path: str,
|
909
|
-
sender_email_id: str) -> Any:
|
888
|
+
def authenticate_service_account(service_account_credentials_path: str, sender_email_id: str) -> Any:
|
910
889
|
credentials = service_account.Credentials.from_service_account_file(
|
911
890
|
service_account_credentials_path,
|
912
891
|
scopes=['https://mail.google.com/'],
|
@@ -914,8 +893,8 @@ def send_data_to_email(df: pd.DataFrame, preset_name: str, to_email: str, subjec
|
|
914
893
|
)
|
915
894
|
return build('gmail', 'v1', credentials=credentials)
|
916
895
|
|
917
|
-
# Load configuration
|
918
|
-
config_path =
|
896
|
+
# Load configuration from ~/.rgwfuncsrc
|
897
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
919
898
|
config = get_config(config_path)
|
920
899
|
|
921
900
|
# Retrieve Gmail preset configuration
|
@@ -983,30 +962,25 @@ def send_data_to_slack(df: pd.DataFrame, bot_name: str, message: Optional[str] =
|
|
983
962
|
Parameters:
|
984
963
|
df: The DataFrame to send.
|
985
964
|
bot_name: The Slack bot configuration preset name.
|
986
|
-
message: Custom message to send along with the DataFrame or file.
|
987
|
-
as_file: Boolean flag to decide whether to send the DataFrame as a file.
|
988
|
-
remove_after_send: If True, removes the CSV file after sending.
|
989
|
-
"""
|
965
|
+
message: Custom message to send along with the DataFrame or file. Defaults to None.
|
966
|
+
as_file: Boolean flag to decide whether to send the DataFrame as a file (True) or as text (False). Defaults to True.
|
967
|
+
remove_after_send: If True, removes the CSV file after sending. Defaults to True.
|
990
968
|
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
search_paths = [os.path.join(home_dir, folder) for folder in ["Desktop", "Documents", "Downloads"]]
|
969
|
+
Raises:
|
970
|
+
ValueError: If the specified bot is not found in the configuration.
|
971
|
+
Exception: If the message sending fails.
|
995
972
|
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
return os.path.join(root, filename)
|
1000
|
-
raise FileNotFoundError(
|
1001
|
-
f"{filename} not found in Desktop, Documents, or Downloads folders")
|
973
|
+
Notes:
|
974
|
+
The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
975
|
+
"""
|
1002
976
|
|
1003
977
|
def get_config(config_path: str) -> dict:
|
1004
978
|
"""Load configuration from a JSON file."""
|
1005
979
|
with open(config_path, 'r') as file:
|
1006
980
|
return json.load(file)
|
1007
981
|
|
1008
|
-
# Load the Slack configuration
|
1009
|
-
config_path =
|
982
|
+
# Load the Slack configuration from ~/.rgwfuncsrc
|
983
|
+
config_path = os.path.expanduser('~/.rgwfuncsrc')
|
1010
984
|
config = get_config(config_path)
|
1011
985
|
|
1012
986
|
bot_config = next((bot for bot in config['slack_bot_presets'] if bot['name'] == bot_name), None)
|
@@ -1024,13 +998,22 @@ def send_data_to_slack(df: pd.DataFrame, bot_name: str, message: Optional[str] =
|
|
1024
998
|
|
1025
999
|
try:
|
1026
1000
|
with open(file_name, 'rb') as file:
|
1027
|
-
response = client.files_upload(
|
1001
|
+
response = client.files_upload(
|
1002
|
+
channels=bot_config['channel_id'],
|
1003
|
+
file=file,
|
1004
|
+
filename=os.path.basename(file_name),
|
1005
|
+
title="DataFrame Upload",
|
1006
|
+
initial_comment=message or ''
|
1007
|
+
)
|
1028
1008
|
finally:
|
1029
1009
|
if remove_after_send and os.path.exists(file_name):
|
1030
1010
|
os.remove(file_name)
|
1031
1011
|
else:
|
1032
1012
|
df_str = df.to_string()
|
1033
|
-
response = client.chat_postMessage(
|
1013
|
+
response = client.chat_postMessage(
|
1014
|
+
channel=bot_config['channel_id'],
|
1015
|
+
text=(message + "\n\n" + df_str) if message else df_str
|
1016
|
+
)
|
1034
1017
|
|
1035
1018
|
# Check if the message was sent successfully
|
1036
1019
|
if not response["ok"]:
|
@@ -1692,6 +1675,65 @@ def right_join(df1: pd.DataFrame, df2: pd.DataFrame, left_on: str, right_on: str
|
|
1692
1675
|
return df1.merge(df2, how='right', left_on=left_on, right_on=right_on)
|
1693
1676
|
|
1694
1677
|
|
1678
|
+
def insert_dataframe_in_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
|
1679
|
+
"""
|
1680
|
+
Inserts a Pandas DataFrame into a SQLite database table.
|
1681
|
+
|
1682
|
+
Parameters:
|
1683
|
+
db_path: str
|
1684
|
+
The file path to the SQLite database. If the database does not exist,
|
1685
|
+
it will be created.
|
1686
|
+
|
1687
|
+
tablename: str
|
1688
|
+
The name of the table where the data will be inserted. If the table does
|
1689
|
+
not exist, it will be created based on the DataFrame's columns and types.
|
1690
|
+
|
1691
|
+
df: pd.DataFrame
|
1692
|
+
The DataFrame containing the data to be inserted into the database.
|
1693
|
+
|
1694
|
+
Functionality:
|
1695
|
+
- Checks if the specified table exists in the database.
|
1696
|
+
- Creates the table with appropriate column types if it doesn't exist.
|
1697
|
+
- Inserts the DataFrame's data into the table, appending to any existing data.
|
1698
|
+
|
1699
|
+
Data Type Mapping:
|
1700
|
+
- Converts Pandas data types to SQLite types: 'int64' to 'INTEGER',
|
1701
|
+
'float64' to 'REAL', 'object' to 'TEXT', 'datetime64[ns]' to 'TEXT',
|
1702
|
+
and 'bool' to 'INTEGER'.
|
1703
|
+
|
1704
|
+
Returns:
|
1705
|
+
None
|
1706
|
+
"""
|
1707
|
+
|
1708
|
+
def table_exists(cursor, table_name):
|
1709
|
+
cursor.execute(f"SELECT count(name) FROM sqlite_master WHERE type='table' AND name='{table_name}'")
|
1710
|
+
return cursor.fetchone()[0] == 1
|
1711
|
+
|
1712
|
+
|
1713
|
+
dtype_mapping = {
|
1714
|
+
'int64': 'INTEGER',
|
1715
|
+
'float64': 'REAL',
|
1716
|
+
'object': 'TEXT',
|
1717
|
+
'datetime64[ns]': 'TEXT',
|
1718
|
+
'bool': 'INTEGER',
|
1719
|
+
}
|
1720
|
+
|
1721
|
+
def map_dtype(dtype):
|
1722
|
+
return dtype_mapping.get(str(dtype), 'TEXT')
|
1723
|
+
|
1724
|
+
with sqlite3.connect(db_path) as conn:
|
1725
|
+
cursor = conn.cursor()
|
1726
|
+
|
1727
|
+
if not table_exists(cursor, tablename):
|
1728
|
+
columns_with_types = ', '.join(
|
1729
|
+
f'"{col}" {map_dtype(dtype)}' for col, dtype in zip(df.columns, df.dtypes)
|
1730
|
+
)
|
1731
|
+
create_table_query = f'CREATE TABLE "{tablename}" ({columns_with_types})'
|
1732
|
+
conn.execute(create_table_query)
|
1733
|
+
|
1734
|
+
df.to_sql(tablename, conn, if_exists='append', index=False)
|
1735
|
+
|
1736
|
+
|
1695
1737
|
def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataFrame) -> None:
|
1696
1738
|
"""
|
1697
1739
|
Processes and saves a DataFrame to an SQLite database, adding a timestamp column
|
@@ -1702,6 +1744,10 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
|
|
1702
1744
|
- tablename (str): The name of the table in the database.
|
1703
1745
|
- df (pd.DataFrame): The DataFrame to be processed and saved.
|
1704
1746
|
"""
|
1747
|
+
# Helper function to map pandas dtype to SQLite type
|
1748
|
+
def map_dtype(dtype):
|
1749
|
+
return dtype_mapping.get(str(dtype), 'TEXT')
|
1750
|
+
|
1705
1751
|
# Step 1: Add a timestamp column to the dataframe
|
1706
1752
|
df['rgwfuncs_sync_timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
1707
1753
|
|
@@ -1714,10 +1760,6 @@ def sync_dataframe_to_sqlite_database(db_path: str, tablename: str, df: pd.DataF
|
|
1714
1760
|
'bool': 'INTEGER', # SQLite does not have a separate Boolean storage class
|
1715
1761
|
}
|
1716
1762
|
|
1717
|
-
# Helper function to map pandas dtype to SQLite type
|
1718
|
-
def map_dtype(dtype):
|
1719
|
-
return dtype_mapping.get(str(dtype), 'TEXT')
|
1720
|
-
|
1721
1763
|
# Step 2: Save df in SQLite3 db as '{tablename}_new'
|
1722
1764
|
with sqlite3.connect(db_path) as conn:
|
1723
1765
|
new_table_name = f"{tablename}_new"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.17
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -40,9 +40,9 @@ Install the package using:
|
|
40
40
|
|
41
41
|
--------------------------------------------------------------------------------
|
42
42
|
|
43
|
-
## Create a `
|
43
|
+
## Create a `.rgwfuncsrc` File
|
44
44
|
|
45
|
-
A `
|
45
|
+
A `.rgwfuncsrc` file (located at `vi ~/.rgwfuncsrc) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
|
46
46
|
|
47
47
|
{
|
48
48
|
"db_presets" : [
|
@@ -381,28 +381,30 @@ Drop duplicate rows based on specified columns, retaining the last occurrence.
|
|
381
381
|
--------------------------------------------------------------------------------
|
382
382
|
|
383
383
|
### 12. `load_data_from_query`
|
384
|
+
|
384
385
|
Load data from a database query into a DataFrame based on a configuration preset.
|
385
386
|
|
386
|
-
|
387
|
-
- `db_preset_name` (str): Name of the database preset in the
|
388
|
-
- query (str): The SQL query to execute.
|
389
|
-
- `config_file_name` (str): Name of the configuration file (default: "rgwml.config").
|
387
|
+
- **Parameters:**
|
388
|
+
- `db_preset_name` (str): Name of the database preset in the configuration file.
|
389
|
+
- `query` (str): The SQL query to execute.
|
390
390
|
|
391
|
-
|
392
|
-
- pd.DataFrame
|
391
|
+
- **Returns:**
|
392
|
+
- `pd.DataFrame`: A DataFrame containing the query result.
|
393
393
|
|
394
|
-
|
395
|
-
|
396
|
-
from rgwfuncs import load_data_from_query
|
394
|
+
- **Notes:**
|
395
|
+
- The configuration file is assumed to be located at `~/.rgwfuncsrc`.
|
397
396
|
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
397
|
+
- **Example:**
|
398
|
+
|
399
|
+
from rgwfuncs import load_data_from_query
|
400
|
+
|
401
|
+
df = load_data_from_query(
|
402
|
+
db_preset_name="MyDBPreset",
|
403
|
+
query="SELECT * FROM my_table"
|
404
|
+
)
|
405
|
+
print(df)
|
405
406
|
|
407
|
+
|
406
408
|
--------------------------------------------------------------------------------
|
407
409
|
|
408
410
|
### 13. `load_data_from_path`
|
@@ -1148,10 +1150,47 @@ Perform a right join on two DataFrames.
|
|
1148
1150
|
df_right_join = right_join(df1, df2, 'ID', 'ID')
|
1149
1151
|
print(df_right_join)
|
1150
1152
|
|
1153
|
+
--------------------------------------------------------------------------------
|
1154
|
+
|
1155
|
+
### 45. `insert_dataframe_in_sqlite_database`
|
1156
|
+
|
1157
|
+
Inserts a Pandas DataFrame into a SQLite database table. If the specified table does not exist, it will be created with column types automatically inferred from the DataFrame's data types.
|
1158
|
+
|
1159
|
+
- **Parameters:**
|
1160
|
+
- `db_path` (str): The path to the SQLite database file. If the database does not exist, it will be created.
|
1161
|
+
- `tablename` (str): The name of the table in the database. If the table does not exist, it is created with the DataFrame's columns and data types.
|
1162
|
+
- `df` (pd.DataFrame): The DataFrame containing the data to be inserted into the database table.
|
1163
|
+
|
1164
|
+
- **Returns:**
|
1165
|
+
- `None`
|
1166
|
+
|
1167
|
+
- **Notes:**
|
1168
|
+
- Data types in the DataFrame are converted to SQLite-compatible types:
|
1169
|
+
- `int64` is mapped to `INTEGER`
|
1170
|
+
- `float64` is mapped to `REAL`
|
1171
|
+
- `object` is mapped to `TEXT`
|
1172
|
+
- `datetime64[ns]` is mapped to `TEXT` (dates are stored as text)
|
1173
|
+
- `bool` is mapped to `INTEGER` (SQLite does not have a separate Boolean type)
|
1174
|
+
|
1175
|
+
- **Example:**
|
1176
|
+
|
1177
|
+
from rgwfuncs import insert_dataframe_in_sqlite_database
|
1178
|
+
import pandas as pd
|
1179
|
+
|
1180
|
+
df = pd.DataFrame({
|
1181
|
+
'ID': [1, 2, 3],
|
1182
|
+
'Name': ['Alice', 'Bob', 'Charlie'],
|
1183
|
+
'Score': [88.5, 92.3, 85.0]
|
1184
|
+
})
|
1185
|
+
|
1186
|
+
db_path = 'my_database.db'
|
1187
|
+
tablename = 'students'
|
1188
|
+
|
1189
|
+
insert_dataframe_in_sqlite_database(db_path, tablename, df)
|
1151
1190
|
|
1152
1191
|
--------------------------------------------------------------------------------
|
1153
1192
|
|
1154
|
-
###
|
1193
|
+
### 46. `sync_dataframe_to_sqlite_database`
|
1155
1194
|
Processes and saves a DataFrame to an SQLite database, adding a timestamp column and replacing the existing table if needed. Creates the table if it does not exist.
|
1156
1195
|
|
1157
1196
|
• Parameters:
|
@@ -1,4 +0,0 @@
|
|
1
|
-
# This file is automatically generated
|
2
|
-
# Dynamically importing functions from modules
|
3
|
-
|
4
|
-
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, sync_dataframe_to_sqlite_database, top_n_unique_values, union_join, update_rows
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|