rgwfuncs 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rgwfuncs
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: A functional programming paradigm for mathematical modelling and data science
5
5
  Home-page: https://github.com/ryangerardwilson/rgwfunc
6
6
  Author: Ryan Gerard Wilson
@@ -28,16 +28,90 @@ Requires-Dist: google-api-python-client
28
28
 
29
29
  ***By Ryan Gerard Wilson (https://ryangerardwilson.com)***
30
30
 
31
- This library provides a variety of functions for manipulating and analyzing pandas DataFrames.
31
+ This library is meant to make ML/ Data Science pipelines more readable. It assumes a linux environment, and the existence of a `rgwml.config` file for certain features (like db querying, sending data to slack, etc.)
32
32
 
33
33
  --------------------------------------------------------------------------------
34
34
 
35
35
  ## Installation
36
36
 
37
37
  Install the package using:
38
- bash
39
- pip install rgwfuncs
40
38
 
39
+ pip install rgwfuncs
40
+
41
+ --------------------------------------------------------------------------------
42
+
43
+ ## Create a `rgwml.config` File
44
+
45
+ A `rgwml.config` file (located at `vi ~/Documents/rgwml.config) is required for MSSQL, CLICKHOUSE, MYSQL, GOOGLE BIG QUERY, SLACK, TELEGRAM, and GMAIL integrations.
46
+
47
+ {
48
+ "db_presets" : [
49
+ {
50
+ "name": "mssql_db9",
51
+ "db_type": "mssql",
52
+ "host": "",
53
+ "username": "",
54
+ "password": "",
55
+ "database": ""
56
+ },
57
+ {
58
+ "name": "clickhouse_db7",
59
+ "db_type": "clickhouse",
60
+ "host": "",
61
+ "username": "",
62
+ "password": "",
63
+ "database": ""
64
+ },
65
+ {
66
+ "name": "mysql_db2",
67
+ "db_type": "mysql",
68
+ "host": "",
69
+ "username": "",
70
+ "password": "",
71
+ "database": ""
72
+ },
73
+ {
74
+ "name": "bq_db1",
75
+ "db_type": "google_big_query",
76
+ "json_file_path": "",
77
+ "project_id": ""
78
+ }
79
+ ],
80
+ "vm_presets": [
81
+ {
82
+ "name": "main_server",
83
+ "host": "",
84
+ "ssh_user": "",
85
+ "ssh_key_path": ""
86
+ }
87
+ ],
88
+ "cloud_storage_presets": [
89
+ {
90
+ "name": "gcs_bucket_name",
91
+ "credential_path": "path/to/your/credentials.json"
92
+ }
93
+ ],
94
+ "telegram_bot_presets": [
95
+ {
96
+ "name": "rgwml-bot",
97
+ "chat_id": "",
98
+ "bot_token": ""
99
+ }
100
+ ],
101
+ "slack_bot_presets": [
102
+ {
103
+ "name": "labs-channel",
104
+ "channel_id": "",
105
+ "bot_token": ""
106
+ }
107
+ ],
108
+ "gmail_bot_presets": [
109
+ {
110
+ "name": "info@xyz.com",
111
+ "service_account_credentials_path": "/home/user/Documents/credentials/your_creds.json"
112
+ }
113
+ ]
114
+ }
41
115
 
42
116
  --------------------------------------------------------------------------------
43
117
 
@@ -978,6 +1052,103 @@ Retain only rows with uncommon column values between two DataFrames.
978
1052
  print(df_uncommon)
979
1053
 
980
1054
 
1055
+ --------------------------------------------------------------------------------
1056
+
1057
+ ### 41. `union_join`
1058
+ Perform a union join, concatenating two DataFrames and dropping duplicates.
1059
+
1060
+ • Parameters:
1061
+ - `df1` (pd.DataFrame): First DataFrame.
1062
+ - `df2` (pd.DataFrame): Second DataFrame.
1063
+
1064
+ • Returns:
1065
+ - pd.DataFrame: A new DataFrame with the union of `df1` and `df2`, without duplicates.
1066
+
1067
+ • Example:
1068
+
1069
+ from rgwfuncs import union_join
1070
+ import pandas as pd
1071
+
1072
+ df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
1073
+ df2 = pd.DataFrame({'ID': [2, 3, 4], 'Value': [20, 30, 40]})
1074
+
1075
+ df_union = union_join(df1, df2)
1076
+ print(df_union)
1077
+
1078
+ --------------------------------------------------------------------------------
1079
+
1080
+ ### 42. `bag_union_join`
1081
+ Perform a bag union join, concatenating two DataFrames without dropping duplicates.
1082
+
1083
+ • Parameters:
1084
+ - `df1` (pd.DataFrame): First DataFrame.
1085
+ - `df2` (pd.DataFrame): Second DataFrame.
1086
+
1087
+ • Returns:
1088
+ - pd.DataFrame: A new DataFrame with the concatenated data of `df1` and `df2`.
1089
+
1090
+ • Example:
1091
+
1092
+ from rgwfuncs import bag_union_join
1093
+ import pandas as pd
1094
+
1095
+ df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
1096
+ df2 = pd.DataFrame({'ID': [2, 3, 4], 'Value': [20, 30, 40]})
1097
+
1098
+ df_bag_union = bag_union_join(df1, df2)
1099
+ print(df_bag_union)
1100
+
1101
+ --------------------------------------------------------------------------------
1102
+
1103
+ ### 43. `left_join`
1104
+ Perform a left join on two DataFrames.
1105
+
1106
+ • Parameters:
1107
+ - `df1` (pd.DataFrame): The left DataFrame.
1108
+ - `df2` (pd.DataFrame): The right DataFrame.
1109
+ - `left_on` (str): Column name in `df1` to join on.
1110
+ - `right_on` (str): Column name in `df2` to join on.
1111
+
1112
+ • Returns:
1113
+ - pd.DataFrame: A new DataFrame as the result of a left join.
1114
+
1115
+ • Example:
1116
+
1117
+ from rgwfuncs import left_join
1118
+ import pandas as pd
1119
+
1120
+ df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
1121
+ df2 = pd.DataFrame({'ID': [2, 3, 4], 'Extra': ['A', 'B', 'C']})
1122
+
1123
+ df_left_join = left_join(df1, df2, 'ID', 'ID')
1124
+ print(df_left_join)
1125
+
1126
+ --------------------------------------------------------------------------------
1127
+
1128
+ ### 44. `right_join`
1129
+ Perform a right join on two DataFrames.
1130
+
1131
+ • Parameters:
1132
+ - `df1` (pd.DataFrame): The left DataFrame.
1133
+ - `df2` (pd.DataFrame): The right DataFrame.
1134
+ - `left_on` (str): Column name in `df1` to join on.
1135
+ - `right_on` (str): Column name in `df2` to join on.
1136
+
1137
+ • Returns:
1138
+ - pd.DataFrame: A new DataFrame as the result of a right join.
1139
+
1140
+ • Example:
1141
+
1142
+ from rgwfuncs import right_join
1143
+ import pandas as pd
1144
+
1145
+ df1 = pd.DataFrame({'ID': [1, 2, 3], 'Value': [10, 20, 30]})
1146
+ df2 = pd.DataFrame({'ID': [2, 3, 4], 'Extra': ['A', 'B', 'C']})
1147
+
1148
+ df_right_join = right_join(df1, df2, 'ID', 'ID')
1149
+ print(df_right_join)
1150
+
1151
+
981
1152
  --------------------------------------------------------------------------------
982
1153
 
983
1154
  ## Additional Info
@@ -0,0 +1,8 @@
1
+ rgwfuncs/__init__.py,sha256=o4BBYVERWwAx8dknJ03yVCHqV9o8D1qrRgFeJrtpDWg,1041
2
+ rgwfuncs/df_lib.py,sha256=vYKElOUyMqBMC5EYA6vaqknpmVoOzNIyOWdJxMlzGcs,61137
3
+ rgwfuncs-0.0.7.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
4
+ rgwfuncs-0.0.7.dist-info/METADATA,sha256=CoUF-aVOBZmywYKpz2fLgd18Y0YRQieXGYp1E6ggMw8,31226
5
+ rgwfuncs-0.0.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ rgwfuncs-0.0.7.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
7
+ rgwfuncs-0.0.7.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
8
+ rgwfuncs-0.0.7.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- rgwfuncs/__init__.py,sha256=8suLAGE7rHBY9e2ViUJuRCUyiam4PO7bjNq_l59dW8Q,24
2
- rgwfuncs/df_lib.py,sha256=vkPOg0acDUwEYbyELNZ4OTJ9cHu9MbZaC4quN8XWtQY,63202
3
- rgwfuncs-0.0.5.dist-info/LICENSE,sha256=7EI8xVBu6h_7_JlVw-yPhhOZlpY9hP8wal7kHtqKT_E,1074
4
- rgwfuncs-0.0.5.dist-info/METADATA,sha256=mR9Dp1WY8pFwFXRyrzJ229pMO_3Na80AyYwqA32K4GY,26554
5
- rgwfuncs-0.0.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- rgwfuncs-0.0.5.dist-info/entry_points.txt,sha256=j-c5IOPIQ0252EaOV6j6STio56sbXl2C4ym_fQ0lXx0,43
7
- rgwfuncs-0.0.5.dist-info/top_level.txt,sha256=aGuVIzWsKiV1f2gCb6mynx0zx5ma0B1EwPGFKVEMTi4,9
8
- rgwfuncs-0.0.5.dist-info/RECORD,,