rgwfuncs 0.0.5__tar.gz → 0.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rgwfuncs-0.0.5/src/rgwfuncs.egg-info → rgwfuncs-0.0.7}/PKG-INFO +175 -4
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/README.md +174 -3
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/pyproject.toml +1 -1
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/setup.cfg +1 -1
- rgwfuncs-0.0.7/src/rgwfuncs/__init__.py +4 -0
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/src/rgwfuncs/df_lib.py +209 -503
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7/src/rgwfuncs.egg-info}/PKG-INFO +175 -4
- rgwfuncs-0.0.5/src/rgwfuncs/__init__.py +0 -3
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/LICENSE +0 -0
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/src/rgwfuncs.egg-info/SOURCES.txt +0 -0
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/src/rgwfuncs.egg-info/dependency_links.txt +0 -0
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/src/rgwfuncs.egg-info/entry_points.txt +0 -0
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/src/rgwfuncs.egg-info/requires.txt +0 -0
- {rgwfuncs-0.0.5 → rgwfuncs-0.0.7}/src/rgwfuncs.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: rgwfuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.7
|
4
4
|
Summary: A functional programming paradigm for mathematical modelling and data science
|
5
5
|
Home-page: https://github.com/ryangerardwilson/rgwfunc
|
6
6
|
Author: Ryan Gerard Wilson
|
@@ -28,16 +28,90 @@ Requires-Dist: google-api-python-client
|
|
28
28
|
|
29
29
|
***By Ryan Gerard Wilson (https://ryangerardwilson.com)***
|
30
30
|
|
31
|
-
This library
|
31
|
+
This library is meant to make ML/ Data Science pipelines more readable. It assumes a linux environment, and the existence of a `rgwml.config` file for certain features (like db querying, sending data to slack, etc.)
|
32
32
|
|
33
33
|
--------------------------------------------------------------------------------
|
34
34
|
|
35
35
|
## Installation
|
36
36
|
|
37
37
|
Install the package using:
|
38
|
-
bash
|
39
|
-
pip install rgwfuncs
|
40
38
|
|
39
|
+
pip install rgwfuncs
|
40
|
+
|
41
|
+
--------------------------------------------------------------------------------
|
42
|
+
|
43
|
+
## Create a `rgwml.config` File
|
44
|
+
|
45
|
+
A `rgwml.config` file (located at `vi ~/Documents/rgwml.config) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
|
46
|
+
|
47
|
+
{
|
48
|
+
"db_presets" : [
|
49
|
+
{
|
50
|
+
"name": "mssql_db9",
|
51
|
+
"db_type": "mssql",
|
52
|
+
"host": "",
|
53
|
+
"username": "",
|
54
|
+
"password": "",
|
55
|
+
"database": ""
|
56
|
+
},
|
57
|
+
{
|
58
|
+
"name": "clickhouse_db7",
|
59
|
+
"db_type": "clickhouse",
|
60
|
+
"host": "",
|
61
|
+
"username": "",
|
62
|
+
"password": "",
|
63
|
+
"database": ""
|
64
|
+
},
|
65
|
+
{
|
66
|
+
"name": "mysql_db2",
|
67
|
+
"db_type": "mysql",
|
68
|
+
"host": "",
|
69
|
+
"username": "",
|
70
|
+
"password": "",
|
71
|
+
"database": ""
|
72
|
+
},
|
73
|
+
{
|
74
|
+
"name": "bq_db1",
|
75
|
+
"db_type": "google_big_query",
|
76
|
+
"json_file_path": "",
|
77
|
+
"project_id": ""
|
78
|
+
}
|
79
|
+
],
|
80
|
+
"vm_presets": [
|
81
|
+
{
|
82
|
+
"name": "main_server",
|
83
|
+
"host": "",
|
84
|
+
"ssh_user": "",
|
85
|
+
"ssh_key_path": ""
|
86
|
+
}
|
87
|
+
],
|
88
|
+
"cloud_storage_presets": [
|
89
|
+
{
|
90
|
+
"name": "gcs_bucket_name",
|
91
|
+
"credential_path": "path/to/your/credentials.json"
|
92
|
+
}
|
93
|
+
],
|
94
|
+
"telegram_bot_presets": [
|
95
|
+
{
|
96
|
+
"name": "rgwml-bot",
|
97
|
+
"chat_id": "",
|
98
|
+
"bot_token": ""
|
99
|
+
}
|
100
|
+
],
|
101
|
+
"slack_bot_presets": [
|
102
|
+
{
|
103
|
+
"name": "labs-channel",
|
104
|
+
"channel_id": "",
|
105
|
+
"bot_token": ""
|
106
|
+
}
|
107
|
+
],
|
108
|
+
"gmail_bot_presets": [
|
109
|
+
{
|
110
|
+
"name": "info@xyz.com",
|
111
|
+
"service_account_credentials_path": "/home/user/Documents/credentials/your_creds.json"
|
112
|
+
}
|
113
|
+
]
|
114
|
+
}
|
41
115
|
|
42
116
|
--------------------------------------------------------------------------------
|
43
117
|
|
@@ -978,6 +1052,103 @@ Retain only rows with uncommon column values between two DataFrames.
|
|
978
1052
|
print(df_uncommon)
|
979
1053
|
|
980
1054
|
|
1055
|
+
--------------------------------------------------------------------------------
|
1056
|
+
|
1057
|
+
### 41. `union_join`
|
1058
|
+
Perform a union join, concatenating two DataFrames and dropping duplicates.
|
1059
|
+
|
1060
|
+
• Parameters:
|
1061
|
+
- `df1` (pd.DataFrame): First DataFrame.
|
1062
|
+
- `df2` (pd.DataFrame): Second DataFrame.
|
1063
|
+
|
1064
|
+
• Returns:
|
1065
|
+
- pd.DataFrame: A new DataFrame with the union of `df1` and `df2`, without duplicates.
|
1066
|
+
|
1067
|
+
• Example:
|
1068
|
+
|
1069
|
+
from rgwfuncs import union_join
|
1070
|
+
import pandas as pd
|
1071
|
+
|
1072
|
+
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
|
1073
|
+
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Value': [20, 30, 40]})
|
1074
|
+
|
1075
|
+
df_union = union_join(df1, df2)
|
1076
|
+
print(df_union)
|
1077
|
+
|
1078
|
+
--------------------------------------------------------------------------------
|
1079
|
+
|
1080
|
+
### 42. `bag_union_join`
|
1081
|
+
Perform a bag union join, concatenating two DataFrames without dropping duplicates.
|
1082
|
+
|
1083
|
+
• Parameters:
|
1084
|
+
- `df1` (pd.DataFrame): First DataFrame.
|
1085
|
+
- `df2` (pd.DataFrame): Second DataFrame.
|
1086
|
+
|
1087
|
+
• Returns:
|
1088
|
+
- pd.DataFrame: A new DataFrame with the concatenated data of `df1` and `df2`.
|
1089
|
+
|
1090
|
+
• Example:
|
1091
|
+
|
1092
|
+
from rgwfuncs import bag_union_join
|
1093
|
+
import pandas as pd
|
1094
|
+
|
1095
|
+
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
|
1096
|
+
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Value': [20, 30, 40]})
|
1097
|
+
|
1098
|
+
df_bag_union = bag_union_join(df1, df2)
|
1099
|
+
print(df_bag_union)
|
1100
|
+
|
1101
|
+
--------------------------------------------------------------------------------
|
1102
|
+
|
1103
|
+
### 43. `left_join`
|
1104
|
+
Perform a left join on two DataFrames.
|
1105
|
+
|
1106
|
+
• Parameters:
|
1107
|
+
- `df1` (pd.DataFrame): The left DataFrame.
|
1108
|
+
- `df2` (pd.DataFrame): The right DataFrame.
|
1109
|
+
- `left_on` (str): Column name in `df1` to join on.
|
1110
|
+
- `right_on` (str): Column name in `df2` to join on.
|
1111
|
+
|
1112
|
+
• Returns:
|
1113
|
+
- pd.DataFrame: A new DataFrame as the result of a left join.
|
1114
|
+
|
1115
|
+
• Example:
|
1116
|
+
|
1117
|
+
from rgwfuncs import left_join
|
1118
|
+
import pandas as pd
|
1119
|
+
|
1120
|
+
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
|
1121
|
+
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Extra': ['A', 'B', 'C']})
|
1122
|
+
|
1123
|
+
df_left_join = left_join(df1, df2, 'ID', 'ID')
|
1124
|
+
print(df_left_join)
|
1125
|
+
|
1126
|
+
--------------------------------------------------------------------------------
|
1127
|
+
|
1128
|
+
### 44. `right_join`
|
1129
|
+
Perform a right join on two DataFrames.
|
1130
|
+
|
1131
|
+
• Parameters:
|
1132
|
+
- `df1` (pd.DataFrame): The left DataFrame.
|
1133
|
+
- `df2` (pd.DataFrame): The right DataFrame.
|
1134
|
+
- `left_on` (str): Column name in `df1` to join on.
|
1135
|
+
- `right_on` (str): Column name in `df2` to join on.
|
1136
|
+
|
1137
|
+
• Returns:
|
1138
|
+
- pd.DataFrame: A new DataFrame as the result of a right join.
|
1139
|
+
|
1140
|
+
• Example:
|
1141
|
+
|
1142
|
+
from rgwfuncs import right_join
|
1143
|
+
import pandas as pd
|
1144
|
+
|
1145
|
+
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
|
1146
|
+
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Extra': ['A', 'B', 'C']})
|
1147
|
+
|
1148
|
+
df_right_join = right_join(df1, df2, 'ID', 'ID')
|
1149
|
+
print(df_right_join)
|
1150
|
+
|
1151
|
+
|
981
1152
|
--------------------------------------------------------------------------------
|
982
1153
|
|
983
1154
|
## Additional Info
|
@@ -2,16 +2,90 @@
|
|
2
2
|
|
3
3
|
***By Ryan Gerard Wilson (https://ryangerardwilson.com)***
|
4
4
|
|
5
|
-
This library
|
5
|
+
This library is meant to make ML/ Data Science pipelines more readable. It assumes a linux environment, and the existence of a `rgwml.config` file for certain features (like db querying, sending data to slack, etc.)
|
6
6
|
|
7
7
|
--------------------------------------------------------------------------------
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
11
|
Install the package using:
|
12
|
-
bash
|
13
|
-
pip install rgwfuncs
|
14
12
|
|
13
|
+
pip install rgwfuncs
|
14
|
+
|
15
|
+
--------------------------------------------------------------------------------
|
16
|
+
|
17
|
+
## Create a `rgwml.config` File
|
18
|
+
|
19
|
+
A `rgwml.config` file (located at `vi ~/Documents/rgwml.config) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
|
20
|
+
|
21
|
+
{
|
22
|
+
"db_presets" : [
|
23
|
+
{
|
24
|
+
"name": "mssql_db9",
|
25
|
+
"db_type": "mssql",
|
26
|
+
"host": "",
|
27
|
+
"username": "",
|
28
|
+
"password": "",
|
29
|
+
"database": ""
|
30
|
+
},
|
31
|
+
{
|
32
|
+
"name": "clickhouse_db7",
|
33
|
+
"db_type": "clickhouse",
|
34
|
+
"host": "",
|
35
|
+
"username": "",
|
36
|
+
"password": "",
|
37
|
+
"database": ""
|
38
|
+
},
|
39
|
+
{
|
40
|
+
"name": "mysql_db2",
|
41
|
+
"db_type": "mysql",
|
42
|
+
"host": "",
|
43
|
+
"username": "",
|
44
|
+
"password": "",
|
45
|
+
"database": ""
|
46
|
+
},
|
47
|
+
{
|
48
|
+
"name": "bq_db1",
|
49
|
+
"db_type": "google_big_query",
|
50
|
+
"json_file_path": "",
|
51
|
+
"project_id": ""
|
52
|
+
}
|
53
|
+
],
|
54
|
+
"vm_presets": [
|
55
|
+
{
|
56
|
+
"name": "main_server",
|
57
|
+
"host": "",
|
58
|
+
"ssh_user": "",
|
59
|
+
"ssh_key_path": ""
|
60
|
+
}
|
61
|
+
],
|
62
|
+
"cloud_storage_presets": [
|
63
|
+
{
|
64
|
+
"name": "gcs_bucket_name",
|
65
|
+
"credential_path": "path/to/your/credentials.json"
|
66
|
+
}
|
67
|
+
],
|
68
|
+
"telegram_bot_presets": [
|
69
|
+
{
|
70
|
+
"name": "rgwml-bot",
|
71
|
+
"chat_id": "",
|
72
|
+
"bot_token": ""
|
73
|
+
}
|
74
|
+
],
|
75
|
+
"slack_bot_presets": [
|
76
|
+
{
|
77
|
+
"name": "labs-channel",
|
78
|
+
"channel_id": "",
|
79
|
+
"bot_token": ""
|
80
|
+
}
|
81
|
+
],
|
82
|
+
"gmail_bot_presets": [
|
83
|
+
{
|
84
|
+
"name": "info@xyz.com",
|
85
|
+
"service_account_credentials_path": "/home/user/Documents/credentials/your_creds.json"
|
86
|
+
}
|
87
|
+
]
|
88
|
+
}
|
15
89
|
|
16
90
|
--------------------------------------------------------------------------------
|
17
91
|
|
@@ -952,6 +1026,103 @@ Retain only rows with uncommon column values between two DataFrames.
|
|
952
1026
|
print(df_uncommon)
|
953
1027
|
|
954
1028
|
|
1029
|
+
--------------------------------------------------------------------------------
|
1030
|
+
|
1031
|
+
### 41. `union_join`
|
1032
|
+
Perform a union join, concatenating two DataFrames and dropping duplicates.
|
1033
|
+
|
1034
|
+
• Parameters:
|
1035
|
+
- `df1` (pd.DataFrame): First DataFrame.
|
1036
|
+
- `df2` (pd.DataFrame): Second DataFrame.
|
1037
|
+
|
1038
|
+
• Returns:
|
1039
|
+
- pd.DataFrame: A new DataFrame with the union of `df1` and `df2`, without duplicates.
|
1040
|
+
|
1041
|
+
• Example:
|
1042
|
+
|
1043
|
+
from rgwfuncs import union_join
|
1044
|
+
import pandas as pd
|
1045
|
+
|
1046
|
+
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
|
1047
|
+
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Value': [20, 30, 40]})
|
1048
|
+
|
1049
|
+
df_union = union_join(df1, df2)
|
1050
|
+
print(df_union)
|
1051
|
+
|
1052
|
+
--------------------------------------------------------------------------------
|
1053
|
+
|
1054
|
+
### 42. `bag_union_join`
|
1055
|
+
Perform a bag union join, concatenating two DataFrames without dropping duplicates.
|
1056
|
+
|
1057
|
+
• Parameters:
|
1058
|
+
- `df1` (pd.DataFrame): First DataFrame.
|
1059
|
+
- `df2` (pd.DataFrame): Second DataFrame.
|
1060
|
+
|
1061
|
+
• Returns:
|
1062
|
+
- pd.DataFrame: A new DataFrame with the concatenated data of `df1` and `df2`.
|
1063
|
+
|
1064
|
+
• Example:
|
1065
|
+
|
1066
|
+
from rgwfuncs import bag_union_join
|
1067
|
+
import pandas as pd
|
1068
|
+
|
1069
|
+
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
|
1070
|
+
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Value': [20, 30, 40]})
|
1071
|
+
|
1072
|
+
df_bag_union = bag_union_join(df1, df2)
|
1073
|
+
print(df_bag_union)
|
1074
|
+
|
1075
|
+
--------------------------------------------------------------------------------
|
1076
|
+
|
1077
|
+
### 43. `left_join`
|
1078
|
+
Perform a left join on two DataFrames.
|
1079
|
+
|
1080
|
+
• Parameters:
|
1081
|
+
- `df1` (pd.DataFrame): The left DataFrame.
|
1082
|
+
- `df2` (pd.DataFrame): The right DataFrame.
|
1083
|
+
- `left_on` (str): Column name in `df1` to join on.
|
1084
|
+
- `right_on` (str): Column name in `df2` to join on.
|
1085
|
+
|
1086
|
+
• Returns:
|
1087
|
+
- pd.DataFrame: A new DataFrame as the result of a left join.
|
1088
|
+
|
1089
|
+
• Example:
|
1090
|
+
|
1091
|
+
from rgwfuncs import left_join
|
1092
|
+
import pandas as pd
|
1093
|
+
|
1094
|
+
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
|
1095
|
+
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Extra': ['A', 'B', 'C']})
|
1096
|
+
|
1097
|
+
df_left_join = left_join(df1, df2, 'ID', 'ID')
|
1098
|
+
print(df_left_join)
|
1099
|
+
|
1100
|
+
--------------------------------------------------------------------------------
|
1101
|
+
|
1102
|
+
### 44. `right_join`
|
1103
|
+
Perform a right join on two DataFrames.
|
1104
|
+
|
1105
|
+
• Parameters:
|
1106
|
+
- `df1` (pd.DataFrame): The left DataFrame.
|
1107
|
+
- `df2` (pd.DataFrame): The right DataFrame.
|
1108
|
+
- `left_on` (str): Column name in `df1` to join on.
|
1109
|
+
- `right_on` (str): Column name in `df2` to join on.
|
1110
|
+
|
1111
|
+
• Returns:
|
1112
|
+
- pd.DataFrame: A new DataFrame as the result of a right join.
|
1113
|
+
|
1114
|
+
• Example:
|
1115
|
+
|
1116
|
+
from rgwfuncs import right_join
|
1117
|
+
import pandas as pd
|
1118
|
+
|
1119
|
+
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
|
1120
|
+
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Extra': ['A', 'B', 'C']})
|
1121
|
+
|
1122
|
+
df_right_join = right_join(df1, df2, 'ID', 'ID')
|
1123
|
+
print(df_right_join)
|
1124
|
+
|
1125
|
+
|
955
1126
|
--------------------------------------------------------------------------------
|
956
1127
|
|
957
1128
|
## Additional Info
|
@@ -0,0 +1,4 @@
|
|
1
|
+
# This file is automatically generated
|
2
|
+
# Dynamically importing functions from modules
|
3
|
+
|
4
|
+
from .df_lib import append_columns, append_percentile_classification_column, append_ranged_classification_column, append_ranged_date_classification_column, append_rows, append_xgb_labels, append_xgb_logistic_regression_predictions, append_xgb_regression_predictions, bag_union_join, bottom_n_unique_values, cascade_sort, delete_rows, docs, drop_duplicates, drop_duplicates_retain_first, drop_duplicates_retain_last, filter_dataframe, filter_indian_mobiles, first_n_rows, from_raw_data, last_n_rows, left_join, limit_dataframe, load_data_from_path, load_data_from_query, load_data_from_sqlite_path, mask_against_dataframe, mask_against_dataframe_converse, numeric_clean, order_columns, print_correlation, print_dataframe, print_memory_usage, print_n_frequency_cascading, print_n_frequency_linear, rename_columns, retain_columns, right_join, send_data_to_email, send_data_to_slack, send_dataframe_via_telegram, top_n_unique_values, union_join, update_rows
|