dataflow-core 2.1.6__tar.gz → 2.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dataflow-core might be problematic. Click here for more details.
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/PKG-INFO +3 -1
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/authenticator/dataflowhubauthenticator.py +19 -17
- dataflow_core-2.1.8/dataflow/dataflow.py +323 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/__init__.py +2 -1
- dataflow_core-2.1.8/dataflow/models/dataflow_zone.py +19 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/role.py +12 -2
- dataflow_core-2.1.8/dataflow/models/role_zone.py +17 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/user.py +2 -2
- dataflow_core-2.1.8/dataflow/schemas/connection.py +84 -0
- dataflow_core-2.1.8/dataflow/schemas/git_ssh.py +50 -0
- dataflow_core-2.1.8/dataflow/schemas/secret.py +44 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/__init__.py +13 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/factory.py +59 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/interface.py +22 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/providers/__init__.py +0 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/providers/aws_manager.py +164 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/providers/azure_manager.py +185 -0
- dataflow_core-2.1.8/dataflow/secrets_manager/service.py +156 -0
- dataflow_core-2.1.8/dataflow/utils/__init__.py +0 -0
- dataflow_core-2.1.8/dataflow/utils/exceptions.py +112 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/utils/get_current_user.py +2 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow_core.egg-info/PKG-INFO +3 -1
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow_core.egg-info/SOURCES.txt +14 -3
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow_core.egg-info/requires.txt +2 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/setup.py +4 -2
- dataflow_core-2.1.6/dataflow/dataflow.py +0 -199
- dataflow_core-2.1.6/dataflow/models/runtime.py +0 -11
- dataflow_core-2.1.6/dataflow/utils/aws_secrets_manager.py +0 -57
- dataflow_core-2.1.6/dataflow/utils/json_handler.py +0 -33
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/README.md +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/authenticator/__init__.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/authenticator/dataflowairflowauthenticator.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/authenticator/dataflowsupersetauthenticator.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/__init__.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/configuration.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/database_manager.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/db.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/environment.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/app_types.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/blacklist_library.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/connection.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/environment.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/environment_status.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/git_ssh.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/pinned_projects.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/project_details.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/recent_project_studio.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/recent_projects.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/role_server.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/server_config.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/session.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/team.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/user_environment.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/user_team.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/models/variables.py +0 -0
- {dataflow_core-2.1.6/dataflow/utils → dataflow_core-2.1.8/dataflow/schemas}/__init__.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/scripts/clone_environment.sh +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/scripts/create_environment.sh +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/scripts/update_environment.sh +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow/utils/logger.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow_core.egg-info/dependency_links.txt +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow_core.egg-info/entry_points.txt +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/dataflow_core.egg-info/top_level.txt +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.8}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataflow-core
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.8
|
|
4
4
|
Summary: Dataflow core package
|
|
5
5
|
Author: Dataflow
|
|
6
6
|
Author-email:
|
|
@@ -9,6 +9,8 @@ Requires-Dist: boto3
|
|
|
9
9
|
Requires-Dist: psycopg2-binary
|
|
10
10
|
Requires-Dist: pymysql
|
|
11
11
|
Requires-Dist: requests
|
|
12
|
+
Requires-Dist: azure-identity
|
|
13
|
+
Requires-Dist: azure-keyvault-secrets
|
|
12
14
|
Dynamic: author
|
|
13
15
|
Dynamic: requires-dist
|
|
14
16
|
Dynamic: summary
|
|
@@ -109,7 +109,7 @@ class DataflowBaseAuthenticator(Authenticator):
|
|
|
109
109
|
return None
|
|
110
110
|
|
|
111
111
|
username = self.extract_username_from_email(email)
|
|
112
|
-
username = re.sub(r'[^
|
|
112
|
+
username = re.sub(r'[^a-z0-9]', '', username.lower())
|
|
113
113
|
if not username:
|
|
114
114
|
self.log.error("Cannot create user: Username is empty")
|
|
115
115
|
return None
|
|
@@ -136,7 +136,6 @@ class DataflowBaseAuthenticator(Authenticator):
|
|
|
136
136
|
last_name=last_name or "",
|
|
137
137
|
email=email,
|
|
138
138
|
role_id=role_id,
|
|
139
|
-
active='Y',
|
|
140
139
|
password='user@123',
|
|
141
140
|
)
|
|
142
141
|
|
|
@@ -248,7 +247,11 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
|
|
|
248
247
|
azure_client_secret = Unicode(config=True, help="Azure AD OAuth client secret")
|
|
249
248
|
azure_tenant_id = Unicode(config=True, help="Azure AD tenant ID")
|
|
250
249
|
azure_scope = Unicode("openid profile email", config=True, help="Azure AD OAuth scopes")
|
|
251
|
-
|
|
250
|
+
dataflow_oauth_type = Unicode(
|
|
251
|
+
default_value="google",
|
|
252
|
+
config=True,
|
|
253
|
+
help="The OAuth provider type for DataflowHub (e.g., github, google)"
|
|
254
|
+
)
|
|
252
255
|
def __init__(self, **kwargs):
|
|
253
256
|
super().__init__(**kwargs)
|
|
254
257
|
self.client_id = self.azure_client_id
|
|
@@ -270,48 +273,47 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
|
|
|
270
273
|
if not user:
|
|
271
274
|
self.log.warning("Azure AD OAuth authentication failed: No user data returned")
|
|
272
275
|
return None
|
|
273
|
-
|
|
274
|
-
|
|
276
|
+
|
|
277
|
+
auth_state = user.get("auth_state", {})
|
|
278
|
+
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
279
|
+
email = user_info.get("upn")
|
|
275
280
|
if not email:
|
|
276
|
-
self.log.warning("Azure AD OAuth authentication failed: No
|
|
281
|
+
self.log.warning("Azure AD OAuth authentication failed: No upn in user data")
|
|
277
282
|
return None
|
|
278
|
-
|
|
283
|
+
|
|
279
284
|
db_user = (
|
|
280
285
|
self.db.query(m_user.User)
|
|
281
286
|
.filter(m_user.User.email == email)
|
|
282
287
|
.first()
|
|
283
288
|
)
|
|
284
|
-
|
|
289
|
+
|
|
285
290
|
if not db_user:
|
|
286
291
|
self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
|
|
287
|
-
# Extract additional info from user data if available
|
|
288
|
-
auth_state = user.get("auth_state", {})
|
|
289
|
-
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
290
292
|
|
|
291
|
-
first_name = user_info.get("
|
|
292
|
-
last_name = user_info.get("family_name") or user.get("family_name")
|
|
293
|
+
first_name = user_info.get("name") or user.get("name")
|
|
293
294
|
|
|
294
|
-
db_user = self.create_new_user(email, first_name, last_name)
|
|
295
|
+
db_user = self.create_new_user(email, first_name, last_name=None)
|
|
295
296
|
if not db_user:
|
|
296
297
|
self.log.error(f"Failed to create new user for email: {email}")
|
|
297
298
|
return None
|
|
298
|
-
|
|
299
|
+
|
|
299
300
|
username = db_user.user_name
|
|
300
301
|
session_id = self.get_or_create_session(db_user.user_id)
|
|
301
302
|
self.set_session_cookie(handler, session_id)
|
|
302
303
|
self.log.info(f"Azure AD OAuth completed for user: {username}, session_id={session_id}")
|
|
303
304
|
return {
|
|
304
|
-
"name":
|
|
305
|
+
"name": db_user.first_name,
|
|
305
306
|
"session_id": session_id,
|
|
306
307
|
"auth_state": user.get("auth_state", {})
|
|
307
308
|
}
|
|
309
|
+
|
|
308
310
|
except Exception as e:
|
|
309
311
|
self.log.error(f"Azure AD OAuth authentication error: {str(e)}", exc_info=True)
|
|
310
312
|
return None
|
|
311
313
|
finally:
|
|
312
314
|
self.db.close()
|
|
313
315
|
|
|
314
|
-
auth_type = os.environ.get("
|
|
316
|
+
auth_type = os.environ.get("DATAFLOW_OAUTH_TYPE", "google")
|
|
315
317
|
|
|
316
318
|
if auth_type == "google":
|
|
317
319
|
BaseAuthenticator = DataflowGoogleAuthenticator
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
import os, requests
|
|
2
|
+
from .database_manager import DatabaseManager
|
|
3
|
+
import json
|
|
4
|
+
from .configuration import ConfigurationManager
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Dataflow:
|
|
8
|
+
"""
|
|
9
|
+
Dataflow class to interact with Dataflow services.
|
|
10
|
+
"""
|
|
11
|
+
def auth(self, session_id: str):
|
|
12
|
+
"""
|
|
13
|
+
Retrieve and return user information using their session ID.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
session_id (str): User's session ID from cookies
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
dict: User information including username, name, email, and role
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
23
|
+
auth_api = dataflow_config.get_config_value('auth', 'ui_auth_api')
|
|
24
|
+
response = requests.get(
|
|
25
|
+
auth_api,
|
|
26
|
+
cookies={"dataflow_session": session_id, "jupyterhub-hub-login": ""}
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if response.status_code != 200:
|
|
30
|
+
return response.json()
|
|
31
|
+
|
|
32
|
+
user_data = response.json()
|
|
33
|
+
user_dict = {
|
|
34
|
+
"user_name": user_data["user_name"],
|
|
35
|
+
"first_name": user_data["first_name"],
|
|
36
|
+
"last_name": user_data["last_name"] if user_data.get("last_name") else "",
|
|
37
|
+
"email": user_data["email"],
|
|
38
|
+
"role": user_data["base_role"]
|
|
39
|
+
}
|
|
40
|
+
return user_dict
|
|
41
|
+
|
|
42
|
+
except Exception as e:
|
|
43
|
+
return e
|
|
44
|
+
|
|
45
|
+
def variable(self, variable_name: str):
|
|
46
|
+
"""
|
|
47
|
+
Retrieve a Dataflow variable.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
variable_name (str): Name of the variable to retrieve
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
str or None: Variable value if found, None otherwise
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
57
|
+
runtime = os.environ.get("RUNTIME")
|
|
58
|
+
slug = os.environ.get("SLUG")
|
|
59
|
+
|
|
60
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
61
|
+
|
|
62
|
+
variable_api = None
|
|
63
|
+
if runtime and slug:
|
|
64
|
+
variable_api = dataflow_config.get_config_value("auth", "variable_ui_api")
|
|
65
|
+
elif host_name:
|
|
66
|
+
variable_api = dataflow_config.get_config_value("auth", "variable_manager_api")
|
|
67
|
+
else:
|
|
68
|
+
raise Exception("Cannot run dataflow methods here!")
|
|
69
|
+
|
|
70
|
+
if not variable_api:
|
|
71
|
+
print("[Dataflow.variable] Variable Unreachable")
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
if runtime:
|
|
75
|
+
query_params = {
|
|
76
|
+
"key": variable_name,
|
|
77
|
+
"runtime": runtime,
|
|
78
|
+
"slug": slug
|
|
79
|
+
}
|
|
80
|
+
response = requests.get(variable_api, params=query_params)
|
|
81
|
+
if response.status_code == 200:
|
|
82
|
+
response_text = response.text.strip().strip('"')
|
|
83
|
+
return response_text
|
|
84
|
+
|
|
85
|
+
query_params["slug"] = "global"
|
|
86
|
+
response = requests.get(variable_api, params=query_params)
|
|
87
|
+
if response.status_code == 200:
|
|
88
|
+
response_text = response.text.strip().strip('"')
|
|
89
|
+
return response_text
|
|
90
|
+
else:
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
query_params = {
|
|
94
|
+
"key": variable_name,
|
|
95
|
+
}
|
|
96
|
+
response = requests.get(variable_api, params=query_params)
|
|
97
|
+
|
|
98
|
+
# Handle different HTTP status codes gracefully
|
|
99
|
+
if response.status_code == 404:
|
|
100
|
+
return None # Variable not found
|
|
101
|
+
elif response.status_code >= 500:
|
|
102
|
+
response.raise_for_status() # Let server errors propagate
|
|
103
|
+
elif response.status_code >= 400:
|
|
104
|
+
print(f"[Dataflow.variable] Client error {response.status_code} for variable '{variable_name}'")
|
|
105
|
+
return None
|
|
106
|
+
elif response.status_code != 200:
|
|
107
|
+
print(f"[Dataflow.variable] Unexpected status {response.status_code} for variable '{variable_name}'")
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
return response.text.strip().strip('"')
|
|
111
|
+
|
|
112
|
+
except requests.exceptions.RequestException as e:
|
|
113
|
+
raise RuntimeError(f"[Dataflow.variable] Failed to fetch variable '{variable_name}'") from e
|
|
114
|
+
|
|
115
|
+
except Exception as e:
|
|
116
|
+
print(f"[Dataflow.variable] Exception occurred: {e}")
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
def secret(self, secret_name: str):
|
|
120
|
+
"""
|
|
121
|
+
Retrieve a Dataflow secret value.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
secret_name (str): Name of the secret to retrieve
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
str or None: Secret value if found, None otherwise
|
|
128
|
+
"""
|
|
129
|
+
try:
|
|
130
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
131
|
+
runtime = os.environ.get("RUNTIME")
|
|
132
|
+
slug = os.environ.get("SLUG")
|
|
133
|
+
|
|
134
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
135
|
+
if runtime:
|
|
136
|
+
secret_api = dataflow_config.get_config_value("auth", "secret_ui_api")
|
|
137
|
+
else:
|
|
138
|
+
secret_api = dataflow_config.get_config_value("auth", "secret_manager_api")
|
|
139
|
+
if not secret_api:
|
|
140
|
+
print("[Dataflow.secret] Secret API Unreachable")
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
query_params = {
|
|
144
|
+
"key": secret_name
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if runtime:
|
|
148
|
+
query_params["runtime"] = runtime
|
|
149
|
+
if slug:
|
|
150
|
+
query_params["slug"] = slug
|
|
151
|
+
|
|
152
|
+
response = requests.get(secret_api, params=query_params)
|
|
153
|
+
|
|
154
|
+
# Handle different HTTP status codes gracefully
|
|
155
|
+
if response.status_code == 404:
|
|
156
|
+
return None # Secret not found
|
|
157
|
+
elif response.status_code >= 500:
|
|
158
|
+
response.raise_for_status() # Let server errors propagate
|
|
159
|
+
elif response.status_code >= 400:
|
|
160
|
+
print(f"[Dataflow.secret] Client error {response.status_code} for secret '{secret_name}'")
|
|
161
|
+
return None
|
|
162
|
+
elif response.status_code != 200:
|
|
163
|
+
print(f"[Dataflow.secret] Unexpected status {response.status_code} for secret '{secret_name}'")
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
return response.text.strip().strip('"')
|
|
167
|
+
|
|
168
|
+
except requests.exceptions.RequestException as e:
|
|
169
|
+
raise RuntimeError(f"[Dataflow.secret] Failed to fetch secret '{secret_name}'") from e
|
|
170
|
+
except Exception as e:
|
|
171
|
+
print(f"[Dataflow.secret] Exception occurred: {e}")
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
def connection(self, conn_id: str, mode="session"):
|
|
175
|
+
"""
|
|
176
|
+
Connects with a Dataflow connection.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
conn_id (str): Connection identifier
|
|
180
|
+
mode (str): Return type - "session" (default) or "engine" or "url"
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Session or Engine: SQLAlchemy session or engine based on mode
|
|
184
|
+
"""
|
|
185
|
+
try:
|
|
186
|
+
host_name = os.environ["HOSTNAME"]
|
|
187
|
+
runtime = os.environ.get("RUNTIME")
|
|
188
|
+
slug = os.environ.get("SLUG")
|
|
189
|
+
|
|
190
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
191
|
+
if runtime:
|
|
192
|
+
connection_api = dataflow_config.get_config_value("auth", "connection_ui_api")
|
|
193
|
+
elif host_name:
|
|
194
|
+
connection_api = dataflow_config.get_config_value("auth", "connection_manager_api")
|
|
195
|
+
else:
|
|
196
|
+
raise Exception("Cannot run dataflow methods here! HOSTNAME or RUNTIME env variable not set.")
|
|
197
|
+
|
|
198
|
+
query_params = {
|
|
199
|
+
"conn_id": conn_id
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if runtime:
|
|
203
|
+
query_params["runtime"] = runtime
|
|
204
|
+
if slug:
|
|
205
|
+
query_params["slug"] = slug
|
|
206
|
+
|
|
207
|
+
response = requests.get(connection_api, params=query_params)
|
|
208
|
+
|
|
209
|
+
# Handle different HTTP status codes gracefully
|
|
210
|
+
if response.status_code == 404:
|
|
211
|
+
raise RuntimeError(f"[Dataflow.connection] Connection '{conn_id}' not found!")
|
|
212
|
+
elif response.status_code >= 500:
|
|
213
|
+
response.raise_for_status() # Let server errors propagate
|
|
214
|
+
elif response.status_code >= 400:
|
|
215
|
+
raise RuntimeError(f"[Dataflow.connection] Client error {response.status_code} for connection '{conn_id}'")
|
|
216
|
+
elif response.status_code != 200:
|
|
217
|
+
raise RuntimeError(f"[Dataflow.connection] Unexpected status {response.status_code} for connection '{conn_id}'")
|
|
218
|
+
|
|
219
|
+
connection_details = response.json()
|
|
220
|
+
|
|
221
|
+
if not connection_details:
|
|
222
|
+
raise RuntimeError(f"[Dataflow.connection] Connection '{conn_id}' not found!")
|
|
223
|
+
|
|
224
|
+
if mode == "dict":
|
|
225
|
+
with open('/home/jovyan/log.txt', 'w') as log_file:
|
|
226
|
+
log_file.write(f"Connection details for {conn_id}: {connection_details}\n")
|
|
227
|
+
print(f"connection_details: {connection_details}")
|
|
228
|
+
return dict(connection_details)
|
|
229
|
+
|
|
230
|
+
conn_type = connection_details['conn_type'].lower()
|
|
231
|
+
username = connection_details['login']
|
|
232
|
+
password = connection_details.get('password', '')
|
|
233
|
+
host = connection_details['host']
|
|
234
|
+
port = connection_details['port']
|
|
235
|
+
database = connection_details.get('schemas', '')
|
|
236
|
+
|
|
237
|
+
user_info = f"{username}:{password}@" if password else f"{username}@"
|
|
238
|
+
db_info = f"/{database}" if database else ""
|
|
239
|
+
|
|
240
|
+
connection_string = f"{conn_type}://{user_info}{host}:{port}{db_info}"
|
|
241
|
+
|
|
242
|
+
extra = connection_details.get('extra', '')
|
|
243
|
+
if extra:
|
|
244
|
+
try:
|
|
245
|
+
extra_params = json.loads(extra)
|
|
246
|
+
if extra_params:
|
|
247
|
+
extra_query = "&".join(f"{key}={value}" for key, value in extra_params.items())
|
|
248
|
+
connection_string += f"?{extra_query}"
|
|
249
|
+
except json.JSONDecodeError:
|
|
250
|
+
# If 'extra' is not valid JSON, skip adding extra parameters
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
if mode == "url":
|
|
254
|
+
return connection_string
|
|
255
|
+
|
|
256
|
+
connection_instance = DatabaseManager(connection_string)
|
|
257
|
+
if mode == "engine":
|
|
258
|
+
return connection_instance.get_engine()
|
|
259
|
+
elif mode == "session":
|
|
260
|
+
return next(connection_instance.get_session())
|
|
261
|
+
else:
|
|
262
|
+
raise ValueError(f"Unsupported mode: {mode}. Use 'session', 'engine', 'url'.")
|
|
263
|
+
|
|
264
|
+
except requests.exceptions.RequestException as e:
|
|
265
|
+
raise RuntimeError(f"[Dataflow.connection] Failed to fetch connection '{conn_id}'") from e
|
|
266
|
+
|
|
267
|
+
except Exception as e:
|
|
268
|
+
raise RuntimeError(f"[Dataflow.connection] Error connecting to '{conn_id}': {str(e)}") from e
|
|
269
|
+
|
|
270
|
+
def variable_or_secret(self, key: str):
|
|
271
|
+
"""
|
|
272
|
+
Retrieve a variable or secret by key.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
key (str): Key of the variable or secret
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
str or None: Value if found, None otherwise
|
|
279
|
+
"""
|
|
280
|
+
try:
|
|
281
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
282
|
+
runtime = os.environ.get("RUNTIME")
|
|
283
|
+
slug = os.environ.get("SLUG")
|
|
284
|
+
|
|
285
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
286
|
+
if runtime and slug:
|
|
287
|
+
variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_ui_api")
|
|
288
|
+
query_params = {
|
|
289
|
+
"key": key,
|
|
290
|
+
"runtime": runtime,
|
|
291
|
+
"slug": slug
|
|
292
|
+
}
|
|
293
|
+
elif host_name:
|
|
294
|
+
variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_manager_api")
|
|
295
|
+
query_params = {
|
|
296
|
+
"key": key
|
|
297
|
+
}
|
|
298
|
+
else:
|
|
299
|
+
raise Exception("Cannot run dataflow methods here!")
|
|
300
|
+
|
|
301
|
+
if not variableorsecret_api:
|
|
302
|
+
print("[Dataflow.variable_or_secret] Variable/Secret Unreachable")
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
response = requests.get(variableorsecret_api, params=query_params)
|
|
306
|
+
|
|
307
|
+
# Handle different HTTP status codes gracefully
|
|
308
|
+
if response.status_code == 404:
|
|
309
|
+
return None # Variable/secret not found
|
|
310
|
+
elif response.status_code >= 500:
|
|
311
|
+
response.raise_for_status() # Let server errors propagate
|
|
312
|
+
elif response.status_code >= 400:
|
|
313
|
+
print(f"[Dataflow.variable_or_secret] Client error {response.status_code} for key '{key}'")
|
|
314
|
+
return None
|
|
315
|
+
elif response.status_code != 200:
|
|
316
|
+
print(f"[Dataflow.variable_or_secret] Unexpected status {response.status_code} for key '{key}'")
|
|
317
|
+
return None
|
|
318
|
+
|
|
319
|
+
response_text = response.text.strip().strip('"')
|
|
320
|
+
return response_text
|
|
321
|
+
|
|
322
|
+
except requests.exceptions.RequestException as e:
|
|
323
|
+
raise RuntimeError(f"[Dataflow.variable_or_secret] Failed to fetch '{key}'") from e
|
|
@@ -12,7 +12,8 @@ from .blacklist_library import BlacklistedLibrary
|
|
|
12
12
|
from .environment_status import EnvironmentStatus
|
|
13
13
|
from .session import Session
|
|
14
14
|
from .server_config import ServerConfig, CustomServerConfig
|
|
15
|
-
from .
|
|
15
|
+
from .dataflow_zone import DataflowZone
|
|
16
|
+
from .role_zone import RoleZone
|
|
16
17
|
from .environment_status import EnvironmentStatus
|
|
17
18
|
from .user_team import UserTeam
|
|
18
19
|
from .role_server import RoleServer
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from sqlalchemy import Column, Integer, String, Boolean
|
|
2
|
+
from sqlalchemy.orm import relationship
|
|
3
|
+
from dataflow.db import Base
|
|
4
|
+
|
|
5
|
+
class DataflowZone(Base):
|
|
6
|
+
__tablename__ = "DATAFLOW_ZONE"
|
|
7
|
+
|
|
8
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
9
|
+
slug = Column(String, unique=True, nullable=False)
|
|
10
|
+
display_name = Column(String, nullable=False)
|
|
11
|
+
is_runtime = Column(Boolean, default=False)
|
|
12
|
+
subdomain = Column(String)
|
|
13
|
+
spark_enabled = Column(Boolean, default=False)
|
|
14
|
+
display_order = Column(Integer, default=0)
|
|
15
|
+
|
|
16
|
+
role_zone_assocs = relationship("RoleZone", back_populates="zone")
|
|
17
|
+
|
|
18
|
+
def __repr__(self):
|
|
19
|
+
return f"<DataflowZone(id={self.id}, slug='{self.slug}', display_name='{self.display_name}', display_order={self.display_order})>"
|
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
from sqlalchemy import Column, Integer, String, Enum
|
|
3
3
|
from sqlalchemy.orm import relationship
|
|
4
4
|
from dataflow.db import Base
|
|
5
|
+
import enum
|
|
6
|
+
|
|
7
|
+
class BaseRoleField(enum.Enum):
|
|
8
|
+
admin = "admin"
|
|
9
|
+
user = "user"
|
|
10
|
+
applicant = "applicant"
|
|
5
11
|
|
|
6
12
|
class Role(Base):
|
|
7
13
|
"""
|
|
@@ -13,7 +19,11 @@ class Role(Base):
|
|
|
13
19
|
id = Column(Integer, primary_key=True, index=True, autoincrement=True, nullable=False)
|
|
14
20
|
name = Column(String, unique=True, nullable=False)
|
|
15
21
|
description = Column(String, nullable=True)
|
|
16
|
-
base_role = Column(Enum(
|
|
22
|
+
base_role = Column(Enum(BaseRoleField), nullable=False, default=BaseRoleField.user)
|
|
17
23
|
|
|
18
24
|
users = relationship("User", back_populates="role_details", cascade="all, delete-orphan")
|
|
19
|
-
role_server_assocs = relationship("RoleServer", back_populates="role")
|
|
25
|
+
role_server_assocs = relationship("RoleServer", back_populates="role")
|
|
26
|
+
role_zone_assocs = relationship("RoleZone", back_populates="role")
|
|
27
|
+
|
|
28
|
+
def __repr__(self):
|
|
29
|
+
return f"<Role(id={self.id}, name='{self.name}', base_role='{self.base_role}')>"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
|
+
from sqlalchemy import Column, Integer, ForeignKey, UniqueConstraint, Boolean
|
|
3
|
+
from sqlalchemy.orm import relationship
|
|
4
|
+
from dataflow.db import Base
|
|
5
|
+
|
|
6
|
+
class RoleZone(Base):
|
|
7
|
+
__tablename__ = 'ROLE_ZONE'
|
|
8
|
+
|
|
9
|
+
role_id = Column(Integer, ForeignKey('ROLE.id', ondelete="CASCADE"), primary_key=True)
|
|
10
|
+
zone_id = Column(Integer, ForeignKey('DATAFLOW_ZONE.id', ondelete="CASCADE"), primary_key=True)
|
|
11
|
+
is_default = Column(Boolean, default=False, nullable=False)
|
|
12
|
+
|
|
13
|
+
role = relationship("Role", back_populates="role_zone_assocs")
|
|
14
|
+
zone = relationship("DataflowZone", back_populates="role_zone_assocs")
|
|
15
|
+
|
|
16
|
+
def __repr__(self):
|
|
17
|
+
return f"<RoleZone(role_id={self.role_id}, zone_id={self.zone_id}, is_default={self.is_default})>"
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""models.py"""
|
|
2
|
-
from sqlalchemy import Column, Integer, String, Boolean, LargeBinary,
|
|
2
|
+
from sqlalchemy import Column, Integer, String, Boolean, LargeBinary, ForeignKey
|
|
3
3
|
from sqlalchemy.orm import relationship
|
|
4
4
|
from dataflow.db import Base
|
|
5
5
|
|
|
@@ -18,7 +18,7 @@ class User(Base):
|
|
|
18
18
|
role_id = Column(Integer, ForeignKey('ROLE.id'), nullable=False)
|
|
19
19
|
image = Column(LargeBinary)
|
|
20
20
|
image_url = Column(String, nullable=True)
|
|
21
|
-
active = Column(
|
|
21
|
+
active = Column(Boolean, nullable=False, default=True)
|
|
22
22
|
password = Column(String, nullable=False)
|
|
23
23
|
active_env = Column(String)
|
|
24
24
|
active_env_type = Column(String, nullable=True)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""schemas/connection.py"""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, field_validator
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConnectionType(str, Enum):
|
|
10
|
+
"""Enum for supported connection types."""
|
|
11
|
+
POSTGRESQL = "PostgreSQL"
|
|
12
|
+
MYSQL = "MySQL"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ConnectionBase(BaseModel):
|
|
16
|
+
"""Base connection model with common fields."""
|
|
17
|
+
conn_id: str
|
|
18
|
+
conn_type: ConnectionType
|
|
19
|
+
description: Optional[str] = None
|
|
20
|
+
host: str
|
|
21
|
+
schemas: Optional[str] = None
|
|
22
|
+
password: str
|
|
23
|
+
login: str
|
|
24
|
+
port: int
|
|
25
|
+
extra: Optional[str] = None
|
|
26
|
+
|
|
27
|
+
@field_validator("conn_id")
|
|
28
|
+
def validate_conn_id(cls, v) -> str:
|
|
29
|
+
import re
|
|
30
|
+
if not isinstance(v, str):
|
|
31
|
+
raise ValueError("Connection ID must be a string.")
|
|
32
|
+
if len(v) > 20:
|
|
33
|
+
raise ValueError("Connection ID must be at most 20 characters long.")
|
|
34
|
+
if not re.fullmatch(r"[A-Za-z0-9-]+", v):
|
|
35
|
+
raise ValueError(
|
|
36
|
+
"Connection ID can only contain letters, numbers, and hyphens (-)!"
|
|
37
|
+
)
|
|
38
|
+
return v
|
|
39
|
+
|
|
40
|
+
@field_validator("conn_type")
|
|
41
|
+
def validate_conn_type(cls, v) -> ConnectionType:
|
|
42
|
+
if isinstance(v, str):
|
|
43
|
+
try:
|
|
44
|
+
return ConnectionType(v)
|
|
45
|
+
except ValueError:
|
|
46
|
+
raise ValueError(f'conn_type must be one of {[e.value for e in ConnectionType]}')
|
|
47
|
+
return v
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ConnectionSave(ConnectionBase):
|
|
51
|
+
"""Model for creating a new connection."""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ConnectionUpdate(BaseModel):
|
|
56
|
+
"""Model for updating an existing connection."""
|
|
57
|
+
conn_type: Optional[ConnectionType] = None
|
|
58
|
+
description: Optional[str] = None
|
|
59
|
+
host: Optional[str] = None
|
|
60
|
+
schemas: Optional[str] = None
|
|
61
|
+
login: Optional[str] = None
|
|
62
|
+
password: Optional[str] = None
|
|
63
|
+
port: Optional[int] = None
|
|
64
|
+
extra: Optional[str] = None
|
|
65
|
+
|
|
66
|
+
@field_validator("conn_type")
|
|
67
|
+
def validate_conn_type(cls, v) -> Optional[ConnectionType]:
|
|
68
|
+
if v is None:
|
|
69
|
+
return v
|
|
70
|
+
if isinstance(v, str):
|
|
71
|
+
# Convert string to enum if needed
|
|
72
|
+
try:
|
|
73
|
+
return ConnectionType(v)
|
|
74
|
+
except ValueError:
|
|
75
|
+
raise ValueError(f'conn_type must be one of {[e.value for e in ConnectionType]}')
|
|
76
|
+
return v
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class ConnectionRead(ConnectionBase):
|
|
80
|
+
"""Model for reading/displaying connection data."""
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
class Config:
|
|
84
|
+
from_attributes = True
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""schemas/git_ssh.py"""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, field_validator
|
|
4
|
+
from typing import Optional, Literal
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SSHBase(BaseModel):
|
|
9
|
+
"""Base SSH key model with common fields."""
|
|
10
|
+
key_name: str
|
|
11
|
+
description: Optional[str] = None
|
|
12
|
+
|
|
13
|
+
@field_validator("key_name")
|
|
14
|
+
def validate_key_name(cls, v) -> str:
|
|
15
|
+
import re
|
|
16
|
+
if not isinstance(v, str):
|
|
17
|
+
raise ValueError("SSH key name must be a string.")
|
|
18
|
+
if len(v) > 20:
|
|
19
|
+
raise ValueError("SSH key name must be at most 20 characters long.")
|
|
20
|
+
if not re.fullmatch(r"[A-Za-z0-9-]+", v):
|
|
21
|
+
raise ValueError(
|
|
22
|
+
"SSH key name can only contain letters, numbers, and hyphens (-)!"
|
|
23
|
+
)
|
|
24
|
+
return v
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SSHSave(SSHBase):
|
|
28
|
+
"""Model for creating a new SSH key."""
|
|
29
|
+
public_key: str
|
|
30
|
+
private_key: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SSHUpdate(BaseModel):
|
|
34
|
+
"""Model for updating an existing SSH key."""
|
|
35
|
+
description: Optional[str] = None
|
|
36
|
+
public_key: Optional[str] = None
|
|
37
|
+
private_key: Optional[str] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class SSHRead(SSHBase):
|
|
41
|
+
"""Model for reading/displaying SSH key data."""
|
|
42
|
+
public_key: str
|
|
43
|
+
private_key: str
|
|
44
|
+
created_date: Optional[datetime] = None
|
|
45
|
+
|
|
46
|
+
class Config:
|
|
47
|
+
from_attributes = True
|
|
48
|
+
|
|
49
|
+
class Config:
|
|
50
|
+
from_attributes = True
|