dataflow-core 2.1.7__tar.gz → 2.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dataflow-core might be problematic. Click here for more details.
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/PKG-INFO +3 -1
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/authenticator/dataflowhubauthenticator.py +19 -16
- dataflow_core-2.1.8/dataflow/dataflow.py +323 -0
- dataflow_core-2.1.8/dataflow/schemas/connection.py +84 -0
- dataflow_core-2.1.8/dataflow/schemas/git_ssh.py +50 -0
- dataflow_core-2.1.8/dataflow/schemas/secret.py +44 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/__init__.py +13 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/factory.py +59 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/interface.py +22 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/providers/__init__.py +0 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/providers/aws_manager.py +164 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/providers/azure_manager.py +185 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/service.py +156 -0
- dataflow_core-2.1.8/dataflow/utils/__init__.py +0 -0
- dataflow_core-2.1.8/dataflow/utils/exceptions.py +112 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow_core.egg-info/PKG-INFO +3 -1
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow_core.egg-info/SOURCES.txt +12 -2
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow_core.egg-info/requires.txt +2 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/setup.py +4 -2
- dataflow_core-2.1.7/dataflow/dataflow.py +0 -199
- dataflow_core-2.1.7/dataflow/utils/aws_secrets_manager.py +0 -57
- dataflow_core-2.1.7/dataflow/utils/json_handler.py +0 -33
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/README.md +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/authenticator/__init__.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/authenticator/dataflowairflowauthenticator.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/authenticator/dataflowsupersetauthenticator.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/__init__.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/configuration.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/database_manager.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/db.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/environment.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/__init__.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/app_types.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/blacklist_library.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/connection.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/dataflow_zone.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/environment.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/environment_status.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/git_ssh.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/pinned_projects.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/project_details.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/recent_project_studio.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/recent_projects.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/role.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/role_server.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/role_zone.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/server_config.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/session.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/team.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/user.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/user_environment.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/user_team.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/models/variables.py +0 -0
- {dataflow_core-2.1.7/dataflow/utils → dataflow_core-2.1.8/dataflow/schemas}/__init__.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/scripts/clone_environment.sh +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/scripts/create_environment.sh +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/scripts/update_environment.sh +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/utils/get_current_user.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow/utils/logger.py +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow_core.egg-info/dependency_links.txt +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow_core.egg-info/entry_points.txt +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/dataflow_core.egg-info/top_level.txt +0 -0
- {dataflow_core-2.1.7 → dataflow_core-2.1.8}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataflow-core
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.8
|
|
4
4
|
Summary: Dataflow core package
|
|
5
5
|
Author: Dataflow
|
|
6
6
|
Author-email:
|
|
@@ -9,6 +9,8 @@ Requires-Dist: boto3
|
|
|
9
9
|
Requires-Dist: psycopg2-binary
|
|
10
10
|
Requires-Dist: pymysql
|
|
11
11
|
Requires-Dist: requests
|
|
12
|
+
Requires-Dist: azure-identity
|
|
13
|
+
Requires-Dist: azure-keyvault-secrets
|
|
12
14
|
Dynamic: author
|
|
13
15
|
Dynamic: requires-dist
|
|
14
16
|
Dynamic: summary
|
|
@@ -109,7 +109,7 @@ class DataflowBaseAuthenticator(Authenticator):
|
|
|
109
109
|
return None
|
|
110
110
|
|
|
111
111
|
username = self.extract_username_from_email(email)
|
|
112
|
-
username = re.sub(r'[^
|
|
112
|
+
username = re.sub(r'[^a-z0-9]', '', username.lower())
|
|
113
113
|
if not username:
|
|
114
114
|
self.log.error("Cannot create user: Username is empty")
|
|
115
115
|
return None
|
|
@@ -247,7 +247,11 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
|
|
|
247
247
|
azure_client_secret = Unicode(config=True, help="Azure AD OAuth client secret")
|
|
248
248
|
azure_tenant_id = Unicode(config=True, help="Azure AD tenant ID")
|
|
249
249
|
azure_scope = Unicode("openid profile email", config=True, help="Azure AD OAuth scopes")
|
|
250
|
-
|
|
250
|
+
dataflow_oauth_type = Unicode(
|
|
251
|
+
default_value="google",
|
|
252
|
+
config=True,
|
|
253
|
+
help="The OAuth provider type for DataflowHub (e.g., github, google)"
|
|
254
|
+
)
|
|
251
255
|
def __init__(self, **kwargs):
|
|
252
256
|
super().__init__(**kwargs)
|
|
253
257
|
self.client_id = self.azure_client_id
|
|
@@ -269,48 +273,47 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
|
|
|
269
273
|
if not user:
|
|
270
274
|
self.log.warning("Azure AD OAuth authentication failed: No user data returned")
|
|
271
275
|
return None
|
|
272
|
-
|
|
273
|
-
|
|
276
|
+
|
|
277
|
+
auth_state = user.get("auth_state", {})
|
|
278
|
+
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
279
|
+
email = user_info.get("upn")
|
|
274
280
|
if not email:
|
|
275
|
-
self.log.warning("Azure AD OAuth authentication failed: No
|
|
281
|
+
self.log.warning("Azure AD OAuth authentication failed: No upn in user data")
|
|
276
282
|
return None
|
|
277
|
-
|
|
283
|
+
|
|
278
284
|
db_user = (
|
|
279
285
|
self.db.query(m_user.User)
|
|
280
286
|
.filter(m_user.User.email == email)
|
|
281
287
|
.first()
|
|
282
288
|
)
|
|
283
|
-
|
|
289
|
+
|
|
284
290
|
if not db_user:
|
|
285
291
|
self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
|
|
286
|
-
# Extract additional info from user data if available
|
|
287
|
-
auth_state = user.get("auth_state", {})
|
|
288
|
-
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
289
292
|
|
|
290
|
-
first_name = user_info.get("
|
|
291
|
-
last_name = user_info.get("family_name") or user.get("family_name")
|
|
293
|
+
first_name = user_info.get("name") or user.get("name")
|
|
292
294
|
|
|
293
|
-
db_user = self.create_new_user(email, first_name, last_name)
|
|
295
|
+
db_user = self.create_new_user(email, first_name, last_name=None)
|
|
294
296
|
if not db_user:
|
|
295
297
|
self.log.error(f"Failed to create new user for email: {email}")
|
|
296
298
|
return None
|
|
297
|
-
|
|
299
|
+
|
|
298
300
|
username = db_user.user_name
|
|
299
301
|
session_id = self.get_or_create_session(db_user.user_id)
|
|
300
302
|
self.set_session_cookie(handler, session_id)
|
|
301
303
|
self.log.info(f"Azure AD OAuth completed for user: {username}, session_id={session_id}")
|
|
302
304
|
return {
|
|
303
|
-
"name":
|
|
305
|
+
"name": db_user.first_name,
|
|
304
306
|
"session_id": session_id,
|
|
305
307
|
"auth_state": user.get("auth_state", {})
|
|
306
308
|
}
|
|
309
|
+
|
|
307
310
|
except Exception as e:
|
|
308
311
|
self.log.error(f"Azure AD OAuth authentication error: {str(e)}", exc_info=True)
|
|
309
312
|
return None
|
|
310
313
|
finally:
|
|
311
314
|
self.db.close()
|
|
312
315
|
|
|
313
|
-
auth_type = os.environ.get("
|
|
316
|
+
auth_type = os.environ.get("DATAFLOW_OAUTH_TYPE", "google")
|
|
314
317
|
|
|
315
318
|
if auth_type == "google":
|
|
316
319
|
BaseAuthenticator = DataflowGoogleAuthenticator
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
import os, requests
|
|
2
|
+
from .database_manager import DatabaseManager
|
|
3
|
+
import json
|
|
4
|
+
from .configuration import ConfigurationManager
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Dataflow:
|
|
8
|
+
"""
|
|
9
|
+
Dataflow class to interact with Dataflow services.
|
|
10
|
+
"""
|
|
11
|
+
def auth(self, session_id: str):
|
|
12
|
+
"""
|
|
13
|
+
Retrieve and return user information using their session ID.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
session_id (str): User's session ID from cookies
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
dict: User information including username, name, email, and role
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
23
|
+
auth_api = dataflow_config.get_config_value('auth', 'ui_auth_api')
|
|
24
|
+
response = requests.get(
|
|
25
|
+
auth_api,
|
|
26
|
+
cookies={"dataflow_session": session_id, "jupyterhub-hub-login": ""}
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if response.status_code != 200:
|
|
30
|
+
return response.json()
|
|
31
|
+
|
|
32
|
+
user_data = response.json()
|
|
33
|
+
user_dict = {
|
|
34
|
+
"user_name": user_data["user_name"],
|
|
35
|
+
"first_name": user_data["first_name"],
|
|
36
|
+
"last_name": user_data["last_name"] if user_data.get("last_name") else "",
|
|
37
|
+
"email": user_data["email"],
|
|
38
|
+
"role": user_data["base_role"]
|
|
39
|
+
}
|
|
40
|
+
return user_dict
|
|
41
|
+
|
|
42
|
+
except Exception as e:
|
|
43
|
+
return e
|
|
44
|
+
|
|
45
|
+
def variable(self, variable_name: str):
|
|
46
|
+
"""
|
|
47
|
+
Retrieve a Dataflow variable.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
variable_name (str): Name of the variable to retrieve
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
str or None: Variable value if found, None otherwise
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
57
|
+
runtime = os.environ.get("RUNTIME")
|
|
58
|
+
slug = os.environ.get("SLUG")
|
|
59
|
+
|
|
60
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
61
|
+
|
|
62
|
+
variable_api = None
|
|
63
|
+
if runtime and slug:
|
|
64
|
+
variable_api = dataflow_config.get_config_value("auth", "variable_ui_api")
|
|
65
|
+
elif host_name:
|
|
66
|
+
variable_api = dataflow_config.get_config_value("auth", "variable_manager_api")
|
|
67
|
+
else:
|
|
68
|
+
raise Exception("Cannot run dataflow methods here!")
|
|
69
|
+
|
|
70
|
+
if not variable_api:
|
|
71
|
+
print("[Dataflow.variable] Variable Unreachable")
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
if runtime:
|
|
75
|
+
query_params = {
|
|
76
|
+
"key": variable_name,
|
|
77
|
+
"runtime": runtime,
|
|
78
|
+
"slug": slug
|
|
79
|
+
}
|
|
80
|
+
response = requests.get(variable_api, params=query_params)
|
|
81
|
+
if response.status_code == 200:
|
|
82
|
+
response_text = response.text.strip().strip('"')
|
|
83
|
+
return response_text
|
|
84
|
+
|
|
85
|
+
query_params["slug"] = "global"
|
|
86
|
+
response = requests.get(variable_api, params=query_params)
|
|
87
|
+
if response.status_code == 200:
|
|
88
|
+
response_text = response.text.strip().strip('"')
|
|
89
|
+
return response_text
|
|
90
|
+
else:
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
query_params = {
|
|
94
|
+
"key": variable_name,
|
|
95
|
+
}
|
|
96
|
+
response = requests.get(variable_api, params=query_params)
|
|
97
|
+
|
|
98
|
+
# Handle different HTTP status codes gracefully
|
|
99
|
+
if response.status_code == 404:
|
|
100
|
+
return None # Variable not found
|
|
101
|
+
elif response.status_code >= 500:
|
|
102
|
+
response.raise_for_status() # Let server errors propagate
|
|
103
|
+
elif response.status_code >= 400:
|
|
104
|
+
print(f"[Dataflow.variable] Client error {response.status_code} for variable '{variable_name}'")
|
|
105
|
+
return None
|
|
106
|
+
elif response.status_code != 200:
|
|
107
|
+
print(f"[Dataflow.variable] Unexpected status {response.status_code} for variable '{variable_name}'")
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
return response.text.strip().strip('"')
|
|
111
|
+
|
|
112
|
+
except requests.exceptions.RequestException as e:
|
|
113
|
+
raise RuntimeError(f"[Dataflow.variable] Failed to fetch variable '{variable_name}'") from e
|
|
114
|
+
|
|
115
|
+
except Exception as e:
|
|
116
|
+
print(f"[Dataflow.variable] Exception occurred: {e}")
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
def secret(self, secret_name: str):
|
|
120
|
+
"""
|
|
121
|
+
Retrieve a Dataflow secret value.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
secret_name (str): Name of the secret to retrieve
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
str or None: Secret value if found, None otherwise
|
|
128
|
+
"""
|
|
129
|
+
try:
|
|
130
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
131
|
+
runtime = os.environ.get("RUNTIME")
|
|
132
|
+
slug = os.environ.get("SLUG")
|
|
133
|
+
|
|
134
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
135
|
+
if runtime:
|
|
136
|
+
secret_api = dataflow_config.get_config_value("auth", "secret_ui_api")
|
|
137
|
+
else:
|
|
138
|
+
secret_api = dataflow_config.get_config_value("auth", "secret_manager_api")
|
|
139
|
+
if not secret_api:
|
|
140
|
+
print("[Dataflow.secret] Secret API Unreachable")
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
query_params = {
|
|
144
|
+
"key": secret_name
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if runtime:
|
|
148
|
+
query_params["runtime"] = runtime
|
|
149
|
+
if slug:
|
|
150
|
+
query_params["slug"] = slug
|
|
151
|
+
|
|
152
|
+
response = requests.get(secret_api, params=query_params)
|
|
153
|
+
|
|
154
|
+
# Handle different HTTP status codes gracefully
|
|
155
|
+
if response.status_code == 404:
|
|
156
|
+
return None # Secret not found
|
|
157
|
+
elif response.status_code >= 500:
|
|
158
|
+
response.raise_for_status() # Let server errors propagate
|
|
159
|
+
elif response.status_code >= 400:
|
|
160
|
+
print(f"[Dataflow.secret] Client error {response.status_code} for secret '{secret_name}'")
|
|
161
|
+
return None
|
|
162
|
+
elif response.status_code != 200:
|
|
163
|
+
print(f"[Dataflow.secret] Unexpected status {response.status_code} for secret '{secret_name}'")
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
return response.text.strip().strip('"')
|
|
167
|
+
|
|
168
|
+
except requests.exceptions.RequestException as e:
|
|
169
|
+
raise RuntimeError(f"[Dataflow.secret] Failed to fetch secret '{secret_name}'") from e
|
|
170
|
+
except Exception as e:
|
|
171
|
+
print(f"[Dataflow.secret] Exception occurred: {e}")
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
def connection(self, conn_id: str, mode="session"):
|
|
175
|
+
"""
|
|
176
|
+
Connects with a Dataflow connection.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
conn_id (str): Connection identifier
|
|
180
|
+
mode (str): Return type - "session" (default) or "engine" or "url"
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Session or Engine: SQLAlchemy session or engine based on mode
|
|
184
|
+
"""
|
|
185
|
+
try:
|
|
186
|
+
host_name = os.environ["HOSTNAME"]
|
|
187
|
+
runtime = os.environ.get("RUNTIME")
|
|
188
|
+
slug = os.environ.get("SLUG")
|
|
189
|
+
|
|
190
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
191
|
+
if runtime:
|
|
192
|
+
connection_api = dataflow_config.get_config_value("auth", "connection_ui_api")
|
|
193
|
+
elif host_name:
|
|
194
|
+
connection_api = dataflow_config.get_config_value("auth", "connection_manager_api")
|
|
195
|
+
else:
|
|
196
|
+
raise Exception("Cannot run dataflow methods here! HOSTNAME or RUNTIME env variable not set.")
|
|
197
|
+
|
|
198
|
+
query_params = {
|
|
199
|
+
"conn_id": conn_id
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if runtime:
|
|
203
|
+
query_params["runtime"] = runtime
|
|
204
|
+
if slug:
|
|
205
|
+
query_params["slug"] = slug
|
|
206
|
+
|
|
207
|
+
response = requests.get(connection_api, params=query_params)
|
|
208
|
+
|
|
209
|
+
# Handle different HTTP status codes gracefully
|
|
210
|
+
if response.status_code == 404:
|
|
211
|
+
raise RuntimeError(f"[Dataflow.connection] Connection '{conn_id}' not found!")
|
|
212
|
+
elif response.status_code >= 500:
|
|
213
|
+
response.raise_for_status() # Let server errors propagate
|
|
214
|
+
elif response.status_code >= 400:
|
|
215
|
+
raise RuntimeError(f"[Dataflow.connection] Client error {response.status_code} for connection '{conn_id}'")
|
|
216
|
+
elif response.status_code != 200:
|
|
217
|
+
raise RuntimeError(f"[Dataflow.connection] Unexpected status {response.status_code} for connection '{conn_id}'")
|
|
218
|
+
|
|
219
|
+
connection_details = response.json()
|
|
220
|
+
|
|
221
|
+
if not connection_details:
|
|
222
|
+
raise RuntimeError(f"[Dataflow.connection] Connection '{conn_id}' not found!")
|
|
223
|
+
|
|
224
|
+
if mode == "dict":
|
|
225
|
+
with open('/home/jovyan/log.txt', 'w') as log_file:
|
|
226
|
+
log_file.write(f"Connection details for {conn_id}: {connection_details}\n")
|
|
227
|
+
print(f"connection_details: {connection_details}")
|
|
228
|
+
return dict(connection_details)
|
|
229
|
+
|
|
230
|
+
conn_type = connection_details['conn_type'].lower()
|
|
231
|
+
username = connection_details['login']
|
|
232
|
+
password = connection_details.get('password', '')
|
|
233
|
+
host = connection_details['host']
|
|
234
|
+
port = connection_details['port']
|
|
235
|
+
database = connection_details.get('schemas', '')
|
|
236
|
+
|
|
237
|
+
user_info = f"{username}:{password}@" if password else f"{username}@"
|
|
238
|
+
db_info = f"/{database}" if database else ""
|
|
239
|
+
|
|
240
|
+
connection_string = f"{conn_type}://{user_info}{host}:{port}{db_info}"
|
|
241
|
+
|
|
242
|
+
extra = connection_details.get('extra', '')
|
|
243
|
+
if extra:
|
|
244
|
+
try:
|
|
245
|
+
extra_params = json.loads(extra)
|
|
246
|
+
if extra_params:
|
|
247
|
+
extra_query = "&".join(f"{key}={value}" for key, value in extra_params.items())
|
|
248
|
+
connection_string += f"?{extra_query}"
|
|
249
|
+
except json.JSONDecodeError:
|
|
250
|
+
# If 'extra' is not valid JSON, skip adding extra parameters
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
if mode == "url":
|
|
254
|
+
return connection_string
|
|
255
|
+
|
|
256
|
+
connection_instance = DatabaseManager(connection_string)
|
|
257
|
+
if mode == "engine":
|
|
258
|
+
return connection_instance.get_engine()
|
|
259
|
+
elif mode == "session":
|
|
260
|
+
return next(connection_instance.get_session())
|
|
261
|
+
else:
|
|
262
|
+
raise ValueError(f"Unsupported mode: {mode}. Use 'session', 'engine', 'url'.")
|
|
263
|
+
|
|
264
|
+
except requests.exceptions.RequestException as e:
|
|
265
|
+
raise RuntimeError(f"[Dataflow.connection] Failed to fetch connection '{conn_id}'") from e
|
|
266
|
+
|
|
267
|
+
except Exception as e:
|
|
268
|
+
raise RuntimeError(f"[Dataflow.connection] Error connecting to '{conn_id}': {str(e)}") from e
|
|
269
|
+
|
|
270
|
+
def variable_or_secret(self, key: str):
|
|
271
|
+
"""
|
|
272
|
+
Retrieve a variable or secret by key.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
key (str): Key of the variable or secret
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
str or None: Value if found, None otherwise
|
|
279
|
+
"""
|
|
280
|
+
try:
|
|
281
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
282
|
+
runtime = os.environ.get("RUNTIME")
|
|
283
|
+
slug = os.environ.get("SLUG")
|
|
284
|
+
|
|
285
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
286
|
+
if runtime and slug:
|
|
287
|
+
variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_ui_api")
|
|
288
|
+
query_params = {
|
|
289
|
+
"key": key,
|
|
290
|
+
"runtime": runtime,
|
|
291
|
+
"slug": slug
|
|
292
|
+
}
|
|
293
|
+
elif host_name:
|
|
294
|
+
variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_manager_api")
|
|
295
|
+
query_params = {
|
|
296
|
+
"key": key
|
|
297
|
+
}
|
|
298
|
+
else:
|
|
299
|
+
raise Exception("Cannot run dataflow methods here!")
|
|
300
|
+
|
|
301
|
+
if not variableorsecret_api:
|
|
302
|
+
print("[Dataflow.variable_or_secret] Variable/Secret Unreachable")
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
response = requests.get(variableorsecret_api, params=query_params)
|
|
306
|
+
|
|
307
|
+
# Handle different HTTP status codes gracefully
|
|
308
|
+
if response.status_code == 404:
|
|
309
|
+
return None # Variable/secret not found
|
|
310
|
+
elif response.status_code >= 500:
|
|
311
|
+
response.raise_for_status() # Let server errors propagate
|
|
312
|
+
elif response.status_code >= 400:
|
|
313
|
+
print(f"[Dataflow.variable_or_secret] Client error {response.status_code} for key '{key}'")
|
|
314
|
+
return None
|
|
315
|
+
elif response.status_code != 200:
|
|
316
|
+
print(f"[Dataflow.variable_or_secret] Unexpected status {response.status_code} for key '{key}'")
|
|
317
|
+
return None
|
|
318
|
+
|
|
319
|
+
response_text = response.text.strip().strip('"')
|
|
320
|
+
return response_text
|
|
321
|
+
|
|
322
|
+
except requests.exceptions.RequestException as e:
|
|
323
|
+
raise RuntimeError(f"[Dataflow.variable_or_secret] Failed to fetch '{key}'") from e
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""schemas/connection.py"""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, field_validator
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConnectionType(str, Enum):
|
|
10
|
+
"""Enum for supported connection types."""
|
|
11
|
+
POSTGRESQL = "PostgreSQL"
|
|
12
|
+
MYSQL = "MySQL"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ConnectionBase(BaseModel):
|
|
16
|
+
"""Base connection model with common fields."""
|
|
17
|
+
conn_id: str
|
|
18
|
+
conn_type: ConnectionType
|
|
19
|
+
description: Optional[str] = None
|
|
20
|
+
host: str
|
|
21
|
+
schemas: Optional[str] = None
|
|
22
|
+
password: str
|
|
23
|
+
login: str
|
|
24
|
+
port: int
|
|
25
|
+
extra: Optional[str] = None
|
|
26
|
+
|
|
27
|
+
@field_validator("conn_id")
|
|
28
|
+
def validate_conn_id(cls, v) -> str:
|
|
29
|
+
import re
|
|
30
|
+
if not isinstance(v, str):
|
|
31
|
+
raise ValueError("Connection ID must be a string.")
|
|
32
|
+
if len(v) > 20:
|
|
33
|
+
raise ValueError("Connection ID must be at most 20 characters long.")
|
|
34
|
+
if not re.fullmatch(r"[A-Za-z0-9-]+", v):
|
|
35
|
+
raise ValueError(
|
|
36
|
+
"Connection ID can only contain letters, numbers, and hyphens (-)!"
|
|
37
|
+
)
|
|
38
|
+
return v
|
|
39
|
+
|
|
40
|
+
@field_validator("conn_type")
|
|
41
|
+
def validate_conn_type(cls, v) -> ConnectionType:
|
|
42
|
+
if isinstance(v, str):
|
|
43
|
+
try:
|
|
44
|
+
return ConnectionType(v)
|
|
45
|
+
except ValueError:
|
|
46
|
+
raise ValueError(f'conn_type must be one of {[e.value for e in ConnectionType]}')
|
|
47
|
+
return v
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ConnectionSave(ConnectionBase):
|
|
51
|
+
"""Model for creating a new connection."""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ConnectionUpdate(BaseModel):
|
|
56
|
+
"""Model for updating an existing connection."""
|
|
57
|
+
conn_type: Optional[ConnectionType] = None
|
|
58
|
+
description: Optional[str] = None
|
|
59
|
+
host: Optional[str] = None
|
|
60
|
+
schemas: Optional[str] = None
|
|
61
|
+
login: Optional[str] = None
|
|
62
|
+
password: Optional[str] = None
|
|
63
|
+
port: Optional[int] = None
|
|
64
|
+
extra: Optional[str] = None
|
|
65
|
+
|
|
66
|
+
@field_validator("conn_type")
|
|
67
|
+
def validate_conn_type(cls, v) -> Optional[ConnectionType]:
|
|
68
|
+
if v is None:
|
|
69
|
+
return v
|
|
70
|
+
if isinstance(v, str):
|
|
71
|
+
# Convert string to enum if needed
|
|
72
|
+
try:
|
|
73
|
+
return ConnectionType(v)
|
|
74
|
+
except ValueError:
|
|
75
|
+
raise ValueError(f'conn_type must be one of {[e.value for e in ConnectionType]}')
|
|
76
|
+
return v
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class ConnectionRead(ConnectionBase):
|
|
80
|
+
"""Model for reading/displaying connection data."""
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
class Config:
|
|
84
|
+
from_attributes = True
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""schemas/git_ssh.py"""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, field_validator
|
|
4
|
+
from typing import Optional, Literal
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SSHBase(BaseModel):
|
|
9
|
+
"""Base SSH key model with common fields."""
|
|
10
|
+
key_name: str
|
|
11
|
+
description: Optional[str] = None
|
|
12
|
+
|
|
13
|
+
@field_validator("key_name")
|
|
14
|
+
def validate_key_name(cls, v) -> str:
|
|
15
|
+
import re
|
|
16
|
+
if not isinstance(v, str):
|
|
17
|
+
raise ValueError("SSH key name must be a string.")
|
|
18
|
+
if len(v) > 20:
|
|
19
|
+
raise ValueError("SSH key name must be at most 20 characters long.")
|
|
20
|
+
if not re.fullmatch(r"[A-Za-z0-9-]+", v):
|
|
21
|
+
raise ValueError(
|
|
22
|
+
"SSH key name can only contain letters, numbers, and hyphens (-)!"
|
|
23
|
+
)
|
|
24
|
+
return v
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SSHSave(SSHBase):
|
|
28
|
+
"""Model for creating a new SSH key."""
|
|
29
|
+
public_key: str
|
|
30
|
+
private_key: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SSHUpdate(BaseModel):
|
|
34
|
+
"""Model for updating an existing SSH key."""
|
|
35
|
+
description: Optional[str] = None
|
|
36
|
+
public_key: Optional[str] = None
|
|
37
|
+
private_key: Optional[str] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class SSHRead(SSHBase):
|
|
41
|
+
"""Model for reading/displaying SSH key data."""
|
|
42
|
+
public_key: str
|
|
43
|
+
private_key: str
|
|
44
|
+
created_date: Optional[datetime] = None
|
|
45
|
+
|
|
46
|
+
class Config:
|
|
47
|
+
from_attributes = True
|
|
48
|
+
|
|
49
|
+
class Config:
|
|
50
|
+
from_attributes = True
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""schemas/secret.py"""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, field_validator
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SecretBase(BaseModel):
|
|
9
|
+
"""Base secret model with common fields."""
|
|
10
|
+
key: str
|
|
11
|
+
value: str
|
|
12
|
+
description: Optional[str] = None
|
|
13
|
+
|
|
14
|
+
@field_validator("key")
|
|
15
|
+
def validate_key(cls, v) -> str:
|
|
16
|
+
import re
|
|
17
|
+
if not isinstance(v, str):
|
|
18
|
+
raise ValueError("Secret key must be a string.")
|
|
19
|
+
if len(v) > 20:
|
|
20
|
+
raise ValueError("Secret key must be at most 20 characters long.")
|
|
21
|
+
if not re.fullmatch(r"[A-Za-z0-9-]+", v):
|
|
22
|
+
raise ValueError(
|
|
23
|
+
"Secret key can only contain letters, numbers, and hyphens (-)!"
|
|
24
|
+
)
|
|
25
|
+
return v
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SecretSave(SecretBase):
|
|
29
|
+
"""Model for creating a new secret."""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SecretUpdate(BaseModel):
|
|
34
|
+
"""Model for updating an existing secret."""
|
|
35
|
+
value: Optional[str] = None
|
|
36
|
+
description: Optional[str] = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SecretRead(SecretBase):
|
|
40
|
+
"""Model for reading/displaying secret data."""
|
|
41
|
+
created_date: Optional[datetime] = None
|
|
42
|
+
|
|
43
|
+
class Config:
|
|
44
|
+
from_attributes = True
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# secrets_manager/__init__.py
|
|
2
|
+
|
|
3
|
+
from .factory import get_secret_manager
|
|
4
|
+
from .service import SecretsService
|
|
5
|
+
|
|
6
|
+
# 1. Call the factory to get the configured low-level secret manager
|
|
7
|
+
# (e.g., an instance of AWSSecretsManager or AzureKeyVault).
|
|
8
|
+
# This happens only once when the package is first imported.
|
|
9
|
+
secret_manager_instance = get_secret_manager()
|
|
10
|
+
|
|
11
|
+
# 2. Create the single, high-level service instance that the rest of
|
|
12
|
+
# your application will use. It wraps the low-level instance.
|
|
13
|
+
secrets_service = SecretsService(secret_manager=secret_manager_instance)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# secrets_manager/factory.py
|
|
2
|
+
import os
|
|
3
|
+
from .interface import SecretManager
|
|
4
|
+
from .providers.aws_manager import AWSSecretsManager
|
|
5
|
+
from .providers.azure_manager import AzureKeyVault
|
|
6
|
+
from ..configuration import ConfigurationManager
|
|
7
|
+
|
|
8
|
+
# A custom exception for clear error messages
|
|
9
|
+
class SecretProviderError(Exception):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
def get_secret_manager() -> SecretManager:
|
|
13
|
+
"""
|
|
14
|
+
Factory function to get the configured secret manager instance.
|
|
15
|
+
|
|
16
|
+
Reads the cloud provider configuration from dataflow_auth.cfg
|
|
17
|
+
to determine which cloud provider's secret manager to instantiate.
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
# dataflow_config = None
|
|
21
|
+
# if os.getenv('HOSTNAME'):
|
|
22
|
+
# dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
23
|
+
# else:
|
|
24
|
+
dataflow_config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
|
|
25
|
+
except Exception as e:
|
|
26
|
+
raise SecretProviderError(
|
|
27
|
+
f"Failed to read cloud provider configuration: {str(e)}. "
|
|
28
|
+
"Please check that the configuration file exists and contains the 'cloud' section."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
provider = dataflow_config.get_config_value('cloudProvider', 'cloud')
|
|
32
|
+
if not provider:
|
|
33
|
+
raise SecretProviderError(
|
|
34
|
+
"The cloud provider is not configured in config file. "
|
|
35
|
+
"Please set the 'cloud' value in the 'cloud' section to 'aws' or 'azure'."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
provider = provider.lower()
|
|
39
|
+
print(f"Initializing secret manager for provider: {provider}")
|
|
40
|
+
|
|
41
|
+
if provider == "aws":
|
|
42
|
+
return AWSSecretsManager()
|
|
43
|
+
|
|
44
|
+
elif provider == "azure":
|
|
45
|
+
vault_url = dataflow_config.get_config_value('cloudProvider', 'key_vault')
|
|
46
|
+
if not vault_url:
|
|
47
|
+
raise SecretProviderError(
|
|
48
|
+
"AZURE_VAULT_URL must be set when using the Azure provider."
|
|
49
|
+
)
|
|
50
|
+
return AzureKeyVault(vault_url=vault_url)
|
|
51
|
+
|
|
52
|
+
# You can easily add more providers here in the future
|
|
53
|
+
# elif provider == "gcp":
|
|
54
|
+
# return GCPSecretManager()
|
|
55
|
+
|
|
56
|
+
else:
|
|
57
|
+
raise SecretProviderError(
|
|
58
|
+
f"Unsupported secret provider: '{provider}'. Supported providers are: aws, azure."
|
|
59
|
+
)
|