dataflow-core 2.1.6__tar.gz → 2.1.18rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dataflow-core might be problematic. Click here for more details.
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/PKG-INFO +6 -1
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/authenticator/dataflowhubauthenticator.py +47 -60
- dataflow_core-2.1.18rc2/dataflow/dataflow.py +423 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/environment.py +62 -33
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/__init__.py +9 -4
- dataflow_core-2.1.18rc2/dataflow/models/app_types.py +15 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/connection.py +5 -4
- dataflow_core-2.1.18rc2/dataflow/models/dataflow_zone.py +18 -0
- dataflow_core-2.1.18rc2/dataflow/models/environment.py +125 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/git_ssh.py +2 -1
- dataflow_core-2.1.18rc2/dataflow/models/org_associations.py +38 -0
- dataflow_core-2.1.18rc2/dataflow/models/organization.py +78 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/pinned_projects.py +2 -2
- dataflow_core-2.1.18rc2/dataflow/models/pod_activity.py +17 -0
- dataflow_core-2.1.18rc2/dataflow/models/pod_session_history.py +16 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/project_details.py +9 -6
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/recent_project_studio.py +1 -1
- dataflow_core-2.1.18rc2/dataflow/models/role.py +35 -0
- dataflow_core-2.1.18rc2/dataflow/models/role_server.py +11 -0
- dataflow_core-2.1.18rc2/dataflow/models/role_zone.py +22 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/server_config.py +16 -8
- dataflow_core-2.1.18rc2/dataflow/models/team.py +23 -0
- dataflow_core-2.1.18rc2/dataflow/models/user.py +68 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/user_team.py +1 -4
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/variables.py +13 -4
- dataflow_core-2.1.18rc2/dataflow/schemas/connection.py +87 -0
- dataflow_core-2.1.18rc2/dataflow/schemas/git_ssh.py +50 -0
- dataflow_core-2.1.18rc2/dataflow/schemas/secret.py +47 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/scripts/clone_environment.sh +2 -1
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/scripts/create_environment.sh +4 -0
- dataflow_core-2.1.18rc2/dataflow/secrets_manager/__init__.py +13 -0
- dataflow_core-2.1.18rc2/dataflow/secrets_manager/factory.py +65 -0
- dataflow_core-2.1.18rc2/dataflow/secrets_manager/interface.py +22 -0
- dataflow_core-2.1.18rc2/dataflow/secrets_manager/providers/__init__.py +0 -0
- dataflow_core-2.1.18rc2/dataflow/secrets_manager/providers/aws_manager.py +164 -0
- dataflow_core-2.1.18rc2/dataflow/secrets_manager/providers/azure_manager.py +185 -0
- dataflow_core-2.1.18rc2/dataflow/secrets_manager/providers/gcp_manager.py +332 -0
- dataflow_core-2.1.18rc2/dataflow/secrets_manager/service.py +164 -0
- dataflow_core-2.1.18rc2/dataflow/secrets_manager/utils.py +58 -0
- dataflow_core-2.1.18rc2/dataflow/utils/__init__.py +0 -0
- dataflow_core-2.1.18rc2/dataflow/utils/exceptions.py +112 -0
- dataflow_core-2.1.18rc2/dataflow/utils/get_current_user.py +60 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/PKG-INFO +6 -1
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/SOURCES.txt +25 -5
- dataflow_core-2.1.18rc2/dataflow_core.egg-info/requires.txt +10 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/top_level.txt +1 -0
- dataflow_core-2.1.18rc2/dfmigration/__init__.py +0 -0
- dataflow_core-2.1.18rc2/dfmigration/env.py +45 -0
- dataflow_core-2.1.18rc2/dfmigration/versions/001_initial_baseline_migration.py +20 -0
- dataflow_core-2.1.18rc2/dfmigration/versions/__init__.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/setup.py +8 -3
- dataflow_core-2.1.6/dataflow/dataflow.py +0 -199
- dataflow_core-2.1.6/dataflow/models/app_types.py +0 -10
- dataflow_core-2.1.6/dataflow/models/environment.py +0 -75
- dataflow_core-2.1.6/dataflow/models/role.py +0 -19
- dataflow_core-2.1.6/dataflow/models/role_server.py +0 -14
- dataflow_core-2.1.6/dataflow/models/runtime.py +0 -11
- dataflow_core-2.1.6/dataflow/models/team.py +0 -17
- dataflow_core-2.1.6/dataflow/models/user.py +0 -30
- dataflow_core-2.1.6/dataflow/models/user_environment.py +0 -16
- dataflow_core-2.1.6/dataflow/utils/aws_secrets_manager.py +0 -57
- dataflow_core-2.1.6/dataflow/utils/get_current_user.py +0 -35
- dataflow_core-2.1.6/dataflow/utils/json_handler.py +0 -33
- dataflow_core-2.1.6/dataflow_core.egg-info/requires.txt +0 -5
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/README.md +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/authenticator/__init__.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/authenticator/dataflowairflowauthenticator.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/authenticator/dataflowsupersetauthenticator.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/__init__.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/configuration.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/database_manager.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/db.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/blacklist_library.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/environment_status.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/recent_projects.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/models/session.py +0 -0
- {dataflow_core-2.1.6/dataflow/utils → dataflow_core-2.1.18rc2/dataflow/schemas}/__init__.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/scripts/update_environment.sh +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow/utils/logger.py +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/dependency_links.txt +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/dataflow_core.egg-info/entry_points.txt +0 -0
- {dataflow_core-2.1.6 → dataflow_core-2.1.18rc2}/setup.cfg +0 -0
|
@@ -1,14 +1,19 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataflow-core
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.18rc2
|
|
4
4
|
Summary: Dataflow core package
|
|
5
5
|
Author: Dataflow
|
|
6
6
|
Author-email:
|
|
7
7
|
Requires-Dist: sqlalchemy
|
|
8
|
+
Requires-Dist: alembic
|
|
8
9
|
Requires-Dist: boto3
|
|
9
10
|
Requires-Dist: psycopg2-binary
|
|
10
11
|
Requires-Dist: pymysql
|
|
11
12
|
Requires-Dist: requests
|
|
13
|
+
Requires-Dist: azure-identity
|
|
14
|
+
Requires-Dist: azure-keyvault-secrets
|
|
15
|
+
Requires-Dist: google-auth
|
|
16
|
+
Requires-Dist: google-cloud-secret-manager
|
|
12
17
|
Dynamic: author
|
|
13
18
|
Dynamic: requires-dist
|
|
14
19
|
Dynamic: summary
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import uuid
|
|
3
|
-
import re
|
|
1
|
+
import os, uuid, re, hashlib, secrets
|
|
4
2
|
from datetime import datetime, timedelta
|
|
5
3
|
from zoneinfo import ZoneInfo
|
|
6
4
|
from traitlets import Bool, Unicode
|
|
@@ -8,7 +6,8 @@ from jupyterhub.auth import Authenticator
|
|
|
8
6
|
from oauthenticator.google import GoogleOAuthenticator
|
|
9
7
|
from oauthenticator.azuread import AzureAdOAuthenticator
|
|
10
8
|
from dataflow.db import get_db
|
|
11
|
-
from dataflow.models import user as m_user, session as m_session
|
|
9
|
+
from dataflow.models import user as m_user, session as m_session
|
|
10
|
+
from sqlalchemy import or_
|
|
12
11
|
|
|
13
12
|
class DataflowBaseAuthenticator(Authenticator):
|
|
14
13
|
enable_dataflow_auth = Bool(True, config=True, help="Enable username/password authentication")
|
|
@@ -28,7 +27,7 @@ class DataflowBaseAuthenticator(Authenticator):
|
|
|
28
27
|
return str(uuid.uuid4())
|
|
29
28
|
|
|
30
29
|
def set_session_cookie(self, handler, session_id):
|
|
31
|
-
expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=
|
|
30
|
+
expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=60)
|
|
32
31
|
host = handler.request.host
|
|
33
32
|
domain = '.'.join(host.split('.')[-2:]) if len(host.split('.')) >= 2 else host
|
|
34
33
|
handler.set_cookie(
|
|
@@ -44,19 +43,12 @@ class DataflowBaseAuthenticator(Authenticator):
|
|
|
44
43
|
self.log.info(f"Set session cookie: dataflow_session={session_id} for host={host}")
|
|
45
44
|
|
|
46
45
|
def get_or_create_session(self, user_id):
|
|
47
|
-
existing_session = (
|
|
48
|
-
self.db.query(m_session.Session)
|
|
49
|
-
.filter(m_session.Session.user_id == str(user_id))
|
|
50
|
-
.first()
|
|
51
|
-
)
|
|
52
|
-
if existing_session:
|
|
53
|
-
self.log.info(f"Reusing existing session: {existing_session.session_id}")
|
|
54
|
-
return existing_session.session_id
|
|
55
46
|
session_id = self.generate_session_id()
|
|
56
47
|
while self.db.query(m_session.Session).filter(
|
|
57
48
|
m_session.Session.session_id == session_id
|
|
58
49
|
).first():
|
|
59
50
|
session_id = self.generate_session_id()
|
|
51
|
+
|
|
60
52
|
db_item = m_session.Session(user_id=user_id, session_id=session_id)
|
|
61
53
|
self.db.add(db_item)
|
|
62
54
|
self.db.commit()
|
|
@@ -77,42 +69,28 @@ class DataflowBaseAuthenticator(Authenticator):
|
|
|
77
69
|
|
|
78
70
|
return super().check_blocked_users(username, authenticated)
|
|
79
71
|
|
|
80
|
-
def get_applicant_role_id(self):
|
|
81
|
-
"""Get the role ID for 'Applicant' role"""
|
|
82
|
-
try:
|
|
83
|
-
applicant_role = (
|
|
84
|
-
self.db.query(m_role.Role)
|
|
85
|
-
.filter(m_role.Role.name == "Applicant")
|
|
86
|
-
.first()
|
|
87
|
-
)
|
|
88
|
-
if applicant_role:
|
|
89
|
-
return applicant_role.id
|
|
90
|
-
else:
|
|
91
|
-
self.log.warning("Applicant role not found in database")
|
|
92
|
-
return None
|
|
93
|
-
except Exception as e:
|
|
94
|
-
self.log.error(f"Error getting Applicant role: {str(e)}")
|
|
95
|
-
return None
|
|
96
|
-
|
|
97
72
|
def extract_username_from_email(self, email):
|
|
98
73
|
"""Extract username from email by removing domain"""
|
|
99
74
|
if '@' in email:
|
|
100
75
|
return email.split('@')[0]
|
|
101
76
|
return email
|
|
102
77
|
|
|
78
|
+
def generate_secure_password(self):
|
|
79
|
+
"""Generate a secure random password hash"""
|
|
80
|
+
salt = secrets.token_hex(16)
|
|
81
|
+
random_uuid = str(uuid.uuid4())
|
|
82
|
+
hash_obj = hashlib.sha256((random_uuid + salt).encode())
|
|
83
|
+
return hash_obj.hexdigest()
|
|
84
|
+
|
|
103
85
|
def create_new_user(self, email, first_name=None, last_name=None):
|
|
104
86
|
"""Create a new user with Applicant role"""
|
|
105
87
|
try:
|
|
106
|
-
role_id = self.get_applicant_role_id()
|
|
107
|
-
if not role_id:
|
|
108
|
-
self.log.error("Cannot create user: Applicant role not found")
|
|
109
|
-
return None
|
|
110
|
-
|
|
111
88
|
username = self.extract_username_from_email(email)
|
|
112
|
-
username = re.sub(r'[^
|
|
89
|
+
username = re.sub(r'[^a-z0-9]', '', username.lower())
|
|
113
90
|
if not username:
|
|
114
91
|
self.log.error("Cannot create user: Username is empty")
|
|
115
92
|
return None
|
|
93
|
+
|
|
116
94
|
existing_user = (
|
|
117
95
|
self.db.query(m_user.User)
|
|
118
96
|
.filter(m_user.User.user_name == username)
|
|
@@ -122,7 +100,7 @@ class DataflowBaseAuthenticator(Authenticator):
|
|
|
122
100
|
counter = 1
|
|
123
101
|
original_username = username
|
|
124
102
|
while existing_user:
|
|
125
|
-
username = f"{original_username}
|
|
103
|
+
username = f"{original_username}{counter}"
|
|
126
104
|
existing_user = (
|
|
127
105
|
self.db.query(m_user.User)
|
|
128
106
|
.filter(m_user.User.user_name == username)
|
|
@@ -130,14 +108,13 @@ class DataflowBaseAuthenticator(Authenticator):
|
|
|
130
108
|
)
|
|
131
109
|
counter += 1
|
|
132
110
|
|
|
111
|
+
secure_password = self.generate_secure_password()
|
|
133
112
|
new_user = m_user.User(
|
|
134
113
|
user_name=username,
|
|
135
114
|
first_name=first_name or username,
|
|
136
115
|
last_name=last_name or "",
|
|
137
116
|
email=email,
|
|
138
|
-
|
|
139
|
-
active='Y',
|
|
140
|
-
password='user@123',
|
|
117
|
+
password=secure_password,
|
|
141
118
|
)
|
|
142
119
|
|
|
143
120
|
self.db.add(new_user)
|
|
@@ -155,22 +132,29 @@ class DataflowBaseAuthenticator(Authenticator):
|
|
|
155
132
|
async def authenticate_dataflow(self, handler, data):
|
|
156
133
|
if not (self.enable_dataflow_auth and isinstance(data, dict) and data.get("username") and data.get("password")):
|
|
157
134
|
return None
|
|
158
|
-
|
|
135
|
+
user_name_or_email = data["username"]
|
|
159
136
|
password = data["password"]
|
|
160
|
-
self.log.info(f"Attempting Dataflow authentication for user: {
|
|
137
|
+
self.log.info(f"Attempting Dataflow authentication for user: {user_name_or_email}")
|
|
161
138
|
try:
|
|
162
139
|
user = (
|
|
163
140
|
self.db.query(m_user.User)
|
|
164
|
-
.filter(
|
|
141
|
+
.filter(
|
|
142
|
+
or_(
|
|
143
|
+
m_user.User.email == user_name_or_email,
|
|
144
|
+
m_user.User.user_name == user_name_or_email
|
|
145
|
+
)
|
|
146
|
+
)
|
|
165
147
|
.first()
|
|
166
148
|
)
|
|
149
|
+
|
|
167
150
|
if not user or user.password != password:
|
|
168
|
-
self.log.warning(f"Dataflow authentication failed for user: {
|
|
151
|
+
self.log.warning(f"Dataflow authentication failed for user: {user_name_or_email}")
|
|
169
152
|
return None
|
|
153
|
+
|
|
170
154
|
session_id = self.get_or_create_session(user.user_id)
|
|
171
155
|
self.set_session_cookie(handler, session_id)
|
|
172
|
-
self.log.info(f"Dataflow authentication successful for user: {
|
|
173
|
-
return {"name":
|
|
156
|
+
self.log.info(f"Dataflow authentication successful for user: {user.user_name}")
|
|
157
|
+
return {"name": user.user_name, "session_id": session_id, "auth_state": {}}
|
|
174
158
|
except Exception as e:
|
|
175
159
|
self.log.error(f"Dataflow authentication error: {str(e)}")
|
|
176
160
|
return None
|
|
@@ -248,7 +232,11 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
|
|
|
248
232
|
azure_client_secret = Unicode(config=True, help="Azure AD OAuth client secret")
|
|
249
233
|
azure_tenant_id = Unicode(config=True, help="Azure AD tenant ID")
|
|
250
234
|
azure_scope = Unicode("openid profile email", config=True, help="Azure AD OAuth scopes")
|
|
251
|
-
|
|
235
|
+
dataflow_oauth_type = Unicode(
|
|
236
|
+
default_value="google",
|
|
237
|
+
config=True,
|
|
238
|
+
help="The OAuth provider type for DataflowHub (e.g., github, google)"
|
|
239
|
+
)
|
|
252
240
|
def __init__(self, **kwargs):
|
|
253
241
|
super().__init__(**kwargs)
|
|
254
242
|
self.client_id = self.azure_client_id
|
|
@@ -270,32 +258,30 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
|
|
|
270
258
|
if not user:
|
|
271
259
|
self.log.warning("Azure AD OAuth authentication failed: No user data returned")
|
|
272
260
|
return None
|
|
273
|
-
|
|
274
|
-
|
|
261
|
+
|
|
262
|
+
auth_state = user.get("auth_state", {})
|
|
263
|
+
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
264
|
+
email = user_info.get("upn")
|
|
275
265
|
if not email:
|
|
276
|
-
self.log.warning("Azure AD OAuth authentication failed: No
|
|
266
|
+
self.log.warning("Azure AD OAuth authentication failed: No upn in user data")
|
|
277
267
|
return None
|
|
278
|
-
|
|
268
|
+
|
|
279
269
|
db_user = (
|
|
280
270
|
self.db.query(m_user.User)
|
|
281
271
|
.filter(m_user.User.email == email)
|
|
282
272
|
.first()
|
|
283
273
|
)
|
|
284
|
-
|
|
274
|
+
|
|
285
275
|
if not db_user:
|
|
286
276
|
self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
|
|
287
|
-
# Extract additional info from user data if available
|
|
288
|
-
auth_state = user.get("auth_state", {})
|
|
289
|
-
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
290
277
|
|
|
291
|
-
first_name = user_info.get("
|
|
292
|
-
last_name = user_info.get("family_name") or user.get("family_name")
|
|
278
|
+
first_name = user_info.get("name") or user.get("name")
|
|
293
279
|
|
|
294
|
-
db_user = self.create_new_user(email, first_name, last_name)
|
|
280
|
+
db_user = self.create_new_user(email, first_name, last_name=None)
|
|
295
281
|
if not db_user:
|
|
296
282
|
self.log.error(f"Failed to create new user for email: {email}")
|
|
297
283
|
return None
|
|
298
|
-
|
|
284
|
+
|
|
299
285
|
username = db_user.user_name
|
|
300
286
|
session_id = self.get_or_create_session(db_user.user_id)
|
|
301
287
|
self.set_session_cookie(handler, session_id)
|
|
@@ -305,13 +291,14 @@ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticato
|
|
|
305
291
|
"session_id": session_id,
|
|
306
292
|
"auth_state": user.get("auth_state", {})
|
|
307
293
|
}
|
|
294
|
+
|
|
308
295
|
except Exception as e:
|
|
309
296
|
self.log.error(f"Azure AD OAuth authentication error: {str(e)}", exc_info=True)
|
|
310
297
|
return None
|
|
311
298
|
finally:
|
|
312
299
|
self.db.close()
|
|
313
300
|
|
|
314
|
-
auth_type = os.environ.get("
|
|
301
|
+
auth_type = os.environ.get("DATAFLOW_OAUTH_TYPE", "google")
|
|
315
302
|
|
|
316
303
|
if auth_type == "google":
|
|
317
304
|
BaseAuthenticator = DataflowGoogleAuthenticator
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
import os, requests
|
|
2
|
+
from .database_manager import DatabaseManager
|
|
3
|
+
import json
|
|
4
|
+
import base64
|
|
5
|
+
from .configuration import ConfigurationManager
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Dataflow:
|
|
9
|
+
"""
|
|
10
|
+
Dataflow class to interact with Dataflow services.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def _json_parse(value):
|
|
15
|
+
try:
|
|
16
|
+
result = json.loads(value)
|
|
17
|
+
if isinstance(result, str):
|
|
18
|
+
try:
|
|
19
|
+
return json.loads(result)
|
|
20
|
+
except json.JSONDecodeError:
|
|
21
|
+
return result
|
|
22
|
+
return result
|
|
23
|
+
except (json.JSONDecodeError, TypeError):
|
|
24
|
+
return value
|
|
25
|
+
|
|
26
|
+
def _parse_response_data(self, response):
|
|
27
|
+
"""Parse response data based on datatype field or fallback to JSON parsing."""
|
|
28
|
+
data = response.json()
|
|
29
|
+
if not isinstance(data, dict):
|
|
30
|
+
raise ValueError("Internal Dataflow Error!")
|
|
31
|
+
value = data.get('value', '')
|
|
32
|
+
if data.get('datatype') == 'json':
|
|
33
|
+
return self._json_parse(value)
|
|
34
|
+
else:
|
|
35
|
+
return value
|
|
36
|
+
|
|
37
|
+
def auth(self, session_id: str):
|
|
38
|
+
"""
|
|
39
|
+
Retrieve and return user information using their session ID.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
session_id (str): User's session ID from cookies
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
dict: User information including username, name, email, and role
|
|
46
|
+
"""
|
|
47
|
+
try:
|
|
48
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
49
|
+
auth_api = dataflow_config.get_config_value('auth', 'ui_auth_api')
|
|
50
|
+
response = requests.get(
|
|
51
|
+
auth_api,
|
|
52
|
+
cookies={"dataflow_session": session_id, "jupyterhub-hub-login": ""}
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
if response.status_code != 200:
|
|
56
|
+
return response.json()
|
|
57
|
+
|
|
58
|
+
user_data = response.json()
|
|
59
|
+
user_dict = {
|
|
60
|
+
"user_name": user_data["user_name"],
|
|
61
|
+
"first_name": user_data["first_name"],
|
|
62
|
+
"last_name": user_data["last_name"] if user_data.get("last_name") else "",
|
|
63
|
+
"email": user_data["email"],
|
|
64
|
+
"role": user_data["base_role"]
|
|
65
|
+
}
|
|
66
|
+
return user_dict
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
return e
|
|
70
|
+
|
|
71
|
+
def variable(self, variable_name: str):
|
|
72
|
+
"""
|
|
73
|
+
Retrieve a Dataflow variable.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
variable_name (str): Name of the variable to retrieve
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
str or None: Variable value if found, None otherwise
|
|
80
|
+
"""
|
|
81
|
+
try:
|
|
82
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
83
|
+
runtime = os.environ.get("RUNTIME")
|
|
84
|
+
slug = os.environ.get("SLUG")
|
|
85
|
+
org_id = os.environ.get("ORGANIZATION")
|
|
86
|
+
|
|
87
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
88
|
+
query_params = {
|
|
89
|
+
"key": variable_name,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
variable_api = None
|
|
93
|
+
if runtime and slug:
|
|
94
|
+
variable_api = dataflow_config.get_config_value("auth", "variable_ui_api")
|
|
95
|
+
query_params["runtime"] = runtime
|
|
96
|
+
query_params["slug"] = slug
|
|
97
|
+
query_params["org_id"] = org_id
|
|
98
|
+
elif host_name:
|
|
99
|
+
variable_api = dataflow_config.get_config_value("auth", "variable_manager_api")
|
|
100
|
+
else:
|
|
101
|
+
raise Exception("Cannot run dataflow methods here!")
|
|
102
|
+
|
|
103
|
+
if not variable_api:
|
|
104
|
+
print("[Dataflow.variable] Variable Unreachable")
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
response = requests.get(variable_api, params=query_params)
|
|
108
|
+
|
|
109
|
+
if response.status_code == 404:
|
|
110
|
+
return None
|
|
111
|
+
elif response.status_code >= 500:
|
|
112
|
+
response.raise_for_status()
|
|
113
|
+
elif response.status_code >= 400:
|
|
114
|
+
print(f"[Dataflow.variable] Client error {response.status_code} for variable '{variable_name}'")
|
|
115
|
+
return None
|
|
116
|
+
elif response.status_code != 200:
|
|
117
|
+
print(f"[Dataflow.variable] Unexpected status {response.status_code} for variable '{variable_name}'")
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
return self._parse_response_data(response)
|
|
121
|
+
|
|
122
|
+
except requests.exceptions.RequestException as e:
|
|
123
|
+
raise RuntimeError(f"[Dataflow.variable] Failed to fetch variable '{variable_name}'") from e
|
|
124
|
+
|
|
125
|
+
except Exception as e:
|
|
126
|
+
print(f"[Dataflow.variable] Exception occurred: {e}")
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
def secret(self, secret_name: str):
|
|
130
|
+
"""
|
|
131
|
+
Retrieve a Dataflow secret value.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
secret_name (str): Name of the secret to retrieve
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
str or None: Secret value if found, None otherwise
|
|
138
|
+
"""
|
|
139
|
+
try:
|
|
140
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
141
|
+
runtime = os.environ.get("RUNTIME")
|
|
142
|
+
slug = os.environ.get("SLUG")
|
|
143
|
+
org_id = os.environ.get("ORGANIZATION")
|
|
144
|
+
|
|
145
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
146
|
+
query_params = {
|
|
147
|
+
"key": secret_name
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if runtime:
|
|
151
|
+
secret_api = dataflow_config.get_config_value("auth", "secret_ui_api")
|
|
152
|
+
query_params["runtime"] = runtime
|
|
153
|
+
query_params["slug"] = slug
|
|
154
|
+
query_params["org_id"] = org_id
|
|
155
|
+
else:
|
|
156
|
+
secret_api = dataflow_config.get_config_value("auth", "secret_manager_api")
|
|
157
|
+
if not secret_api:
|
|
158
|
+
print("[Dataflow.secret] Secret API Unreachable")
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
response = requests.get(secret_api, params=query_params)
|
|
162
|
+
|
|
163
|
+
if response.status_code == 404:
|
|
164
|
+
return None
|
|
165
|
+
elif response.status_code >= 500:
|
|
166
|
+
response.raise_for_status()
|
|
167
|
+
elif response.status_code >= 400:
|
|
168
|
+
print(f"[Dataflow.secret] Client error {response.status_code} for secret '{secret_name}'")
|
|
169
|
+
return None
|
|
170
|
+
elif response.status_code != 200:
|
|
171
|
+
print(f"[Dataflow.secret] Unexpected status {response.status_code} for secret '{secret_name}'")
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
return self._parse_response_data(response)
|
|
175
|
+
|
|
176
|
+
except requests.exceptions.RequestException as e:
|
|
177
|
+
raise RuntimeError(f"[Dataflow.secret] Failed to fetch secret '{secret_name}'") from e
|
|
178
|
+
except Exception as e:
|
|
179
|
+
print(f"[Dataflow.secret] Exception occurred: {e}")
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
def secret_file(self, secret_name: str):
|
|
183
|
+
"""
|
|
184
|
+
Retrieve a Dataflow secret file.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
secret_name (str): Name of the secret to retrieve
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
str or None: Secret value if found, None otherwise
|
|
191
|
+
"""
|
|
192
|
+
try:
|
|
193
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
194
|
+
runtime = os.environ.get("RUNTIME")
|
|
195
|
+
slug = os.environ.get("SLUG")
|
|
196
|
+
org_id = os.environ.get("ORGANIZATION")
|
|
197
|
+
|
|
198
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
199
|
+
query_params = {
|
|
200
|
+
"key": secret_name
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if runtime:
|
|
204
|
+
secret_api = dataflow_config.get_config_value("auth", "secret_ui_api")
|
|
205
|
+
query_params["runtime"] = runtime
|
|
206
|
+
query_params["slug"] = slug
|
|
207
|
+
query_params["org_id"] = org_id
|
|
208
|
+
else:
|
|
209
|
+
secret_api = dataflow_config.get_config_value("auth", "secret_manager_api")
|
|
210
|
+
if not secret_api:
|
|
211
|
+
print("[Dataflow.secret] Secret API Unreachable")
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
response = requests.get(secret_api, params=query_params)
|
|
215
|
+
|
|
216
|
+
if response.status_code == 404:
|
|
217
|
+
return None
|
|
218
|
+
elif response.status_code >= 500:
|
|
219
|
+
response.raise_for_status()
|
|
220
|
+
elif response.status_code >= 400:
|
|
221
|
+
print(f"[Dataflow.secret] Client error {response.status_code} for secret '{secret_name}'")
|
|
222
|
+
return None
|
|
223
|
+
elif response.status_code != 200:
|
|
224
|
+
print(f"[Dataflow.secret] Unexpected status {response.status_code} for secret '{secret_name}'")
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
response_data = response.json()
|
|
228
|
+
if response.status_code == 200 and response_data.get('filename'):
|
|
229
|
+
# For runtime mode, create file and return filepath
|
|
230
|
+
if runtime:
|
|
231
|
+
import tempfile
|
|
232
|
+
from pathlib import Path
|
|
233
|
+
|
|
234
|
+
# Create /tmp/secrets directory if it doesn't exist
|
|
235
|
+
secrets_dir = Path("/tmp/secrets")
|
|
236
|
+
secrets_dir.mkdir(parents=True, exist_ok=True)
|
|
237
|
+
|
|
238
|
+
# Get filename and content
|
|
239
|
+
filename = response_data.get('filename')
|
|
240
|
+
file_content = response_data.get('value')
|
|
241
|
+
|
|
242
|
+
if not filename or not file_content:
|
|
243
|
+
print(f"[Dataflow.secret] Missing filename or content for secret '{secret_name}'")
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
file_path = os.path.join(secrets_dir, filename)
|
|
247
|
+
|
|
248
|
+
# Detect if content is Base64 encoded binary or text
|
|
249
|
+
try:
|
|
250
|
+
# Try to decode as Base64
|
|
251
|
+
decoded_content = base64.b64decode(file_content, validate=True)
|
|
252
|
+
# Check if it contains non-printable characters (likely binary)
|
|
253
|
+
is_binary = not all(32 <= byte <= 126 or byte in (9, 10, 13) for byte in decoded_content[:100])
|
|
254
|
+
|
|
255
|
+
if is_binary:
|
|
256
|
+
# Write as binary
|
|
257
|
+
with open(file_path, 'wb') as f:
|
|
258
|
+
f.write(decoded_content)
|
|
259
|
+
else:
|
|
260
|
+
# Decode and write as text
|
|
261
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
262
|
+
f.write(decoded_content.decode('utf-8'))
|
|
263
|
+
except Exception:
|
|
264
|
+
# Not Base64 or decode failed, treat as text
|
|
265
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
266
|
+
f.write(file_content)
|
|
267
|
+
return str(file_path)
|
|
268
|
+
else:
|
|
269
|
+
# For non-runtime mode, return the value as-is
|
|
270
|
+
return response_data.get('value')
|
|
271
|
+
else:
|
|
272
|
+
print(f"[Dataflow.secret] No file found for secret '{secret_name}'! If it is a non-file secret, please use the 'secret' method.")
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
except requests.exceptions.RequestException as e:
|
|
276
|
+
raise RuntimeError(f"[Dataflow.secret] Failed to fetch secret '{secret_name}'") from e
|
|
277
|
+
except Exception as e:
|
|
278
|
+
print(f"[Dataflow.secret] Exception occurred: {e}")
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
def connection(self, conn_id: str, mode="session"):
|
|
282
|
+
"""
|
|
283
|
+
Connects with a Dataflow connection.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
conn_id (str): Connection identifier
|
|
287
|
+
mode (str): Return type - "session" (default) or "engine" or "url"
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Session or Engine: SQLAlchemy session or engine based on mode
|
|
291
|
+
"""
|
|
292
|
+
try:
|
|
293
|
+
host_name = os.environ["HOSTNAME"]
|
|
294
|
+
runtime = os.environ.get("RUNTIME")
|
|
295
|
+
slug = os.environ.get("SLUG")
|
|
296
|
+
org_id = os.environ.get("ORGANIZATION")
|
|
297
|
+
|
|
298
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
299
|
+
query_params = {
|
|
300
|
+
"conn_id": conn_id
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if runtime:
|
|
304
|
+
query_params["runtime"] = runtime
|
|
305
|
+
query_params["org_id"] = org_id
|
|
306
|
+
query_params["slug"] = slug
|
|
307
|
+
connection_api = dataflow_config.get_config_value("auth", "connection_ui_api")
|
|
308
|
+
elif host_name:
|
|
309
|
+
connection_api = dataflow_config.get_config_value("auth", "connection_manager_api")
|
|
310
|
+
else:
|
|
311
|
+
raise Exception("Cannot run dataflow methods here! HOSTNAME or RUNTIME env variable not set.")
|
|
312
|
+
|
|
313
|
+
response = requests.get(connection_api, params=query_params)
|
|
314
|
+
|
|
315
|
+
if response.status_code == 404:
|
|
316
|
+
raise RuntimeError(f"[Dataflow.connection] Connection '{conn_id}' not found!")
|
|
317
|
+
elif response.status_code >= 500:
|
|
318
|
+
response.raise_for_status()
|
|
319
|
+
elif response.status_code >= 400:
|
|
320
|
+
raise RuntimeError(f"[Dataflow.connection] Client error {response.status_code} for connection '{conn_id}'")
|
|
321
|
+
elif response.status_code != 200:
|
|
322
|
+
raise RuntimeError(f"[Dataflow.connection] Unexpected status {response.status_code} for connection '{conn_id}'")
|
|
323
|
+
|
|
324
|
+
connection_details = response.json()
|
|
325
|
+
|
|
326
|
+
if not connection_details:
|
|
327
|
+
raise RuntimeError(f"[Dataflow.connection] Connection '{conn_id}' not found!")
|
|
328
|
+
|
|
329
|
+
if mode == "dict":
|
|
330
|
+
return dict(connection_details)
|
|
331
|
+
|
|
332
|
+
conn_type = connection_details['conn_type'].lower()
|
|
333
|
+
username = connection_details['login']
|
|
334
|
+
password = connection_details.get('password', '')
|
|
335
|
+
host = connection_details['host']
|
|
336
|
+
port = connection_details['port']
|
|
337
|
+
database = connection_details.get('schemas', '')
|
|
338
|
+
|
|
339
|
+
user_info = f"{username}:{password}@" if password else f"{username}@"
|
|
340
|
+
db_info = f"/{database}" if database else ""
|
|
341
|
+
|
|
342
|
+
connection_string = f"{conn_type}://{user_info}{host}:{port}{db_info}"
|
|
343
|
+
|
|
344
|
+
extra = connection_details.get('extra', '')
|
|
345
|
+
if extra:
|
|
346
|
+
try:
|
|
347
|
+
extra_params = json.loads(extra)
|
|
348
|
+
if extra_params:
|
|
349
|
+
extra_query = "&".join(f"{key}={value}" for key, value in extra_params.items())
|
|
350
|
+
connection_string += f"?{extra_query}"
|
|
351
|
+
except json.JSONDecodeError:
|
|
352
|
+
# If 'extra' is not valid JSON, skip adding extra parameters
|
|
353
|
+
pass
|
|
354
|
+
|
|
355
|
+
if mode == "url":
|
|
356
|
+
return connection_string
|
|
357
|
+
|
|
358
|
+
connection_instance = DatabaseManager(connection_string)
|
|
359
|
+
if mode == "engine":
|
|
360
|
+
return connection_instance.get_engine()
|
|
361
|
+
elif mode == "session":
|
|
362
|
+
return next(connection_instance.get_session())
|
|
363
|
+
else:
|
|
364
|
+
raise ValueError(f"Unsupported mode: {mode}. Use 'session', 'engine', 'url'.")
|
|
365
|
+
|
|
366
|
+
except requests.exceptions.RequestException as e:
|
|
367
|
+
raise RuntimeError(f"[Dataflow.connection] Failed to fetch connection '{conn_id}'") from e
|
|
368
|
+
|
|
369
|
+
except Exception as e:
|
|
370
|
+
raise RuntimeError(f"[Dataflow.connection] Error connecting to '{conn_id}': {str(e)}") from e
|
|
371
|
+
|
|
372
|
+
def variable_or_secret(self, key: str):
|
|
373
|
+
"""
|
|
374
|
+
Retrieve a variable or secret by key.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
key (str): Key of the variable or secret
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
str or None: Value if found, None otherwise
|
|
381
|
+
"""
|
|
382
|
+
try:
|
|
383
|
+
host_name = os.environ.get("HOSTNAME", "")
|
|
384
|
+
runtime = os.environ.get("RUNTIME")
|
|
385
|
+
slug = os.environ.get("SLUG")
|
|
386
|
+
org_id = os.environ.get("ORGANIZATION")
|
|
387
|
+
|
|
388
|
+
dataflow_config = ConfigurationManager('/dataflow/app/auth_config/dataflow_auth.cfg')
|
|
389
|
+
query_params = {
|
|
390
|
+
"key": key
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
if runtime:
|
|
394
|
+
variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_ui_api")
|
|
395
|
+
query_params["runtime"] = runtime
|
|
396
|
+
query_params["slug"] = slug
|
|
397
|
+
query_params["org_id"] = org_id
|
|
398
|
+
elif host_name:
|
|
399
|
+
variableorsecret_api = dataflow_config.get_config_value("auth", "variableorsecret_manager_api")
|
|
400
|
+
else:
|
|
401
|
+
raise Exception("Cannot run dataflow methods here!")
|
|
402
|
+
|
|
403
|
+
if not variableorsecret_api:
|
|
404
|
+
print("[Dataflow.variable_or_secret] Variable/Secret Unreachable")
|
|
405
|
+
return None
|
|
406
|
+
|
|
407
|
+
response = requests.get(variableorsecret_api, params=query_params)
|
|
408
|
+
|
|
409
|
+
if response.status_code == 404:
|
|
410
|
+
return None
|
|
411
|
+
elif response.status_code >= 500:
|
|
412
|
+
response.raise_for_status() # Let server errors propagate
|
|
413
|
+
elif response.status_code >= 400:
|
|
414
|
+
print(f"[Dataflow.variable_or_secret] Client error {response.status_code} for key '{key}'")
|
|
415
|
+
return None
|
|
416
|
+
elif response.status_code != 200:
|
|
417
|
+
print(f"[Dataflow.variable_or_secret] Unexpected status {response.status_code} for key '{key}'")
|
|
418
|
+
return None
|
|
419
|
+
|
|
420
|
+
return self._parse_response_data(response)
|
|
421
|
+
|
|
422
|
+
except requests.exceptions.RequestException as e:
|
|
423
|
+
raise RuntimeError(f"[Dataflow.variable_or_secret] Failed to fetch '{key}'") from e
|