dataflow-core 2.1.17__tar.gz → 2.1.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/PKG-INFO +1 -1
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/authenticator/dataflowairflowauthenticator.py +10 -2
- dataflow_core-2.1.20/authenticator/dataflowhubauthenticator.py +608 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/authenticator/dataflowsupersetauthenticator.py +11 -6
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/configuration.py +7 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/database_manager.py +23 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/db.py +4 -3
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/environment.py +20 -18
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/__init__.py +2 -0
- dataflow_core-2.1.20/dataflow/models/app_types.py +31 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/blacklist_library.py +3 -3
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/connection.py +19 -2
- dataflow_core-2.1.20/dataflow/models/dataflow_setting.py +14 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/dataflow_zone.py +15 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/environment.py +125 -4
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/environment_status.py +6 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/git_ssh.py +16 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/org_associations.py +35 -2
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/organization.py +62 -13
- dataflow_core-2.1.20/dataflow/models/otp.py +19 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/pinned_projects.py +13 -0
- dataflow_core-2.1.20/dataflow/models/pod_activity.py +30 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/pod_session_history.py +14 -1
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/project_details.py +26 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/recent_project_studio.py +16 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/recent_projects.py +7 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/role.py +20 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/role_server.py +8 -1
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/role_zone.py +17 -2
- dataflow_core-2.1.20/dataflow/models/server_config.py +77 -0
- dataflow_core-2.1.20/dataflow/models/session.py +22 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/team.py +11 -2
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/user.py +41 -7
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/user_team.py +11 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/variables.py +26 -2
- dataflow_core-2.1.20/dataflow/schemas/connection.py +129 -0
- dataflow_core-2.1.20/dataflow/schemas/git_ssh.py +84 -0
- dataflow_core-2.1.20/dataflow/schemas/secret.py +75 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/factory.py +6 -2
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/interface.py +3 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/providers/aws_manager.py +55 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/providers/azure_manager.py +55 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/service.py +8 -2
- dataflow_core-2.1.20/dataflow/secrets_manager/utils.py +58 -0
- dataflow_core-2.1.20/dataflow/utils/blocked_domains.py +4781 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/utils/get_current_user.py +17 -3
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/PKG-INFO +1 -1
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/SOURCES.txt +5 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dfmigration/env.py +10 -0
- dataflow_core-2.1.20/dfmigration/versions/002_user_onboarding_migration.py +65 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/setup.py +1 -1
- dataflow_core-2.1.17/authenticator/dataflowhubauthenticator.py +0 -309
- dataflow_core-2.1.17/dataflow/models/app_types.py +0 -15
- dataflow_core-2.1.17/dataflow/models/pod_activity.py +0 -16
- dataflow_core-2.1.17/dataflow/models/server_config.py +0 -37
- dataflow_core-2.1.17/dataflow/models/session.py +0 -17
- dataflow_core-2.1.17/dataflow/schemas/connection.py +0 -84
- dataflow_core-2.1.17/dataflow/schemas/git_ssh.py +0 -47
- dataflow_core-2.1.17/dataflow/schemas/secret.py +0 -44
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/README.md +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/authenticator/__init__.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/__init__.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/dataflow.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/schemas/__init__.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/scripts/clone_environment.sh +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/scripts/create_environment.sh +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/scripts/update_environment.sh +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/__init__.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/providers/__init__.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/providers/gcp_manager.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/utils/__init__.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/utils/exceptions.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/utils/logger.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/dependency_links.txt +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/entry_points.txt +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/requires.txt +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/top_level.txt +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dfmigration/__init__.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dfmigration/versions/001_initial_baseline_migration.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dfmigration/versions/__init__.py +0 -0
- {dataflow_core-2.1.17 → dataflow_core-2.1.20}/setup.cfg +0 -0
|
@@ -18,9 +18,14 @@ dataflow = Dataflow()
|
|
|
18
18
|
class DataflowAuthDBView(AuthDBView):
|
|
19
19
|
@expose('/login/', methods=['GET', 'POST'])
|
|
20
20
|
def login(self):
|
|
21
|
+
|
|
22
|
+
"""This method checks for a 'dataflow_session' cookie, retrieves user details from Dataflow,
|
|
23
|
+
and logs in or creates the user in Airflow accordingly.
|
|
24
|
+
If the cookie is not present, it falls back to the standard login process.
|
|
25
|
+
|
|
26
|
+
Overrides the default login method to integrate with Dataflow authentication.
|
|
21
27
|
"""
|
|
22
|
-
|
|
23
|
-
"""
|
|
28
|
+
|
|
24
29
|
try:
|
|
25
30
|
session_id = request.cookies.get('dataflow_session')
|
|
26
31
|
if not session_id:
|
|
@@ -52,6 +57,9 @@ class DataflowAuthDBView(AuthDBView):
|
|
|
52
57
|
return super().login()
|
|
53
58
|
|
|
54
59
|
class DataflowAirflowAuthenticator(FabAirflowSecurityManagerOverride):
|
|
60
|
+
|
|
61
|
+
"""Custom Security Manager to integrate Airflow authentication with Dataflow."""
|
|
62
|
+
|
|
55
63
|
authdbview = DataflowAuthDBView
|
|
56
64
|
|
|
57
65
|
def __init__(self, appbuilder):
|
|
@@ -0,0 +1,608 @@
|
|
|
1
|
+
import os, uuid, re, hashlib, secrets
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
from zoneinfo import ZoneInfo
|
|
4
|
+
from traitlets import Bool, Unicode
|
|
5
|
+
from jupyterhub.auth import Authenticator
|
|
6
|
+
from oauthenticator.google import GoogleOAuthenticator
|
|
7
|
+
from oauthenticator.azuread import AzureAdOAuthenticator
|
|
8
|
+
from dataflow.db import get_db
|
|
9
|
+
from dataflow.models import user as m_user, session as m_session, otp as m_otp
|
|
10
|
+
from sqlalchemy import or_
|
|
11
|
+
from dataflow.utils.blocked_domains import blocked_domains
|
|
12
|
+
class DataflowBaseAuthenticator(Authenticator):
|
|
13
|
+
|
|
14
|
+
"""Base Authenticator to handle Dataflow authentication and session management.
|
|
15
|
+
Provides methods to authenticate users via Dataflow credentials, manage sessions.
|
|
16
|
+
|
|
17
|
+
Overrides JupyterHub's Authenticator class.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
enable_dataflow_auth = Bool(True, config=True, help="Enable username/password authentication")
|
|
21
|
+
|
|
22
|
+
def __init__(self, **kwargs):
|
|
23
|
+
super().__init__(**kwargs)
|
|
24
|
+
try:
|
|
25
|
+
self.db = next(get_db())
|
|
26
|
+
m_user.Base.metadata.create_all(bind=self.db.get_bind(), checkfirst=True)
|
|
27
|
+
m_session.Base.metadata.create_all(bind=self.db.get_bind(), checkfirst=True)
|
|
28
|
+
self.log.info("Dataflow database initialized successfully")
|
|
29
|
+
except Exception as e:
|
|
30
|
+
self.log.error(f"Failed to initialize Dataflow database: {str(e)}")
|
|
31
|
+
raise
|
|
32
|
+
|
|
33
|
+
def generate_session_id(self):
|
|
34
|
+
|
|
35
|
+
"""Generate and return a unique session ID using UUID4."""
|
|
36
|
+
|
|
37
|
+
return str(uuid.uuid4())
|
|
38
|
+
|
|
39
|
+
def set_session_cookie(self, handler, session_id):
|
|
40
|
+
|
|
41
|
+
"""Set the dataflow_session cookie in the user's browser.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
handler: The request handler to set the cookie on.
|
|
45
|
+
session_id: The session ID to set in the cookie."""
|
|
46
|
+
|
|
47
|
+
expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=60)
|
|
48
|
+
host = handler.request.host
|
|
49
|
+
domain = '.'.join(host.split('.')[-2:]) if len(host.split('.')) >= 2 else host
|
|
50
|
+
handler.set_cookie(
|
|
51
|
+
"dataflow_session",
|
|
52
|
+
session_id,
|
|
53
|
+
domain=f".{domain}",
|
|
54
|
+
path="/",
|
|
55
|
+
expires=expires,
|
|
56
|
+
secure=True,
|
|
57
|
+
httponly=True,
|
|
58
|
+
samesite="None"
|
|
59
|
+
)
|
|
60
|
+
self.log.info(f"Set session cookie: dataflow_session={session_id} for host={host}")
|
|
61
|
+
|
|
62
|
+
def get_or_create_session(self, user_id):
|
|
63
|
+
|
|
64
|
+
"""Retrieve existing session ID for user or create a new one.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
user_id: The ID of the user to get or create a session for.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
session_id (str): The existing or newly created session ID.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
session_id = self.generate_session_id()
|
|
74
|
+
while self.db.query(m_session.Session).filter(
|
|
75
|
+
m_session.Session.session_id == session_id
|
|
76
|
+
).first():
|
|
77
|
+
session_id = self.generate_session_id()
|
|
78
|
+
|
|
79
|
+
db_item = m_session.Session(user_id=user_id, session_id=session_id)
|
|
80
|
+
self.db.add(db_item)
|
|
81
|
+
self.db.commit()
|
|
82
|
+
self.db.refresh(db_item)
|
|
83
|
+
self.log.info(f"Created new session: {session_id}")
|
|
84
|
+
return session_id
|
|
85
|
+
|
|
86
|
+
def check_blocked_users(self, username, authenticated):
|
|
87
|
+
|
|
88
|
+
"""Check if the authenticated user is blocked based on allowed_users list.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
username (str): The username of the authenticated user.
|
|
92
|
+
authenticated (dict|None): The authentication data returned from authenticate method.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
username (str|None): The username if not blocked, else None."""
|
|
96
|
+
|
|
97
|
+
self.log.info(f"Checking blocked users for {username}: authenticated={authenticated}, allowed_users={self.allowed_users}")
|
|
98
|
+
|
|
99
|
+
if not authenticated:
|
|
100
|
+
self.log.warning(f"No authenticated data for user: {username}")
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
if isinstance(authenticated, dict) and "session_id" in authenticated:
|
|
104
|
+
self.log.info(f"Allowing Dataflow authentication for user: {username}")
|
|
105
|
+
return username
|
|
106
|
+
|
|
107
|
+
return super().check_blocked_users(username, authenticated)
|
|
108
|
+
|
|
109
|
+
def extract_username_from_email(self, email):
|
|
110
|
+
|
|
111
|
+
"""Extract username from email by removing domain
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
email (str): User's email address
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
username (str): Extracted username after removing domain
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
if '@' in email:
|
|
121
|
+
return email.split('@')[0]
|
|
122
|
+
return email
|
|
123
|
+
|
|
124
|
+
def generate_secure_password(self):
|
|
125
|
+
|
|
126
|
+
"""Generate secure random password hash
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
password_hash (str): Securely hashed password
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
salt = secrets.token_hex(16)
|
|
133
|
+
random_uuid = str(uuid.uuid4())
|
|
134
|
+
hash_obj = hashlib.sha256((random_uuid + salt).encode())
|
|
135
|
+
return hash_obj.hexdigest()
|
|
136
|
+
|
|
137
|
+
def create_new_user(self, email, first_name=None, last_name=None):
|
|
138
|
+
|
|
139
|
+
"""Create a new user with Applicant role
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
email (str): User's email address
|
|
143
|
+
first_name (str): User's first name
|
|
144
|
+
last_name (str): User's last name
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
new_user (m_user.User|None): Created user object or None if creation failed
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# Normalize email to lowercase for consistency
|
|
152
|
+
email = email.lower()
|
|
153
|
+
|
|
154
|
+
# Check if email domain is blocked
|
|
155
|
+
if '@' in email:
|
|
156
|
+
domain = email.split('@')[1].lower()
|
|
157
|
+
if domain in blocked_domains:
|
|
158
|
+
self.log.warning(f"Blocked domain detected: {domain} for email: {email}")
|
|
159
|
+
raise ValueError(f"blocked_domain:{domain}")
|
|
160
|
+
|
|
161
|
+
username = self.extract_username_from_email(email)
|
|
162
|
+
username = re.sub(r'[^a-z0-9]', '', username.lower())
|
|
163
|
+
if not username:
|
|
164
|
+
self.log.error("Cannot create user: Username is empty")
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
existing_user = (
|
|
168
|
+
self.db.query(m_user.User)
|
|
169
|
+
.filter(m_user.User.user_name == username)
|
|
170
|
+
.first()
|
|
171
|
+
)
|
|
172
|
+
if existing_user:
|
|
173
|
+
counter = 1
|
|
174
|
+
original_username = username
|
|
175
|
+
while existing_user:
|
|
176
|
+
username = f"{original_username}{counter}"
|
|
177
|
+
existing_user = (
|
|
178
|
+
self.db.query(m_user.User)
|
|
179
|
+
.filter(m_user.User.user_name == username)
|
|
180
|
+
.first()
|
|
181
|
+
)
|
|
182
|
+
counter += 1
|
|
183
|
+
|
|
184
|
+
secure_password = self.generate_secure_password()
|
|
185
|
+
new_user = m_user.User(
|
|
186
|
+
user_name=username,
|
|
187
|
+
first_name=first_name or username,
|
|
188
|
+
last_name=last_name or "",
|
|
189
|
+
email=email,
|
|
190
|
+
password=secure_password,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
self.db.add(new_user)
|
|
194
|
+
self.db.commit()
|
|
195
|
+
self.db.refresh(new_user)
|
|
196
|
+
|
|
197
|
+
self.log.info(f"Created new user: {username} with email: {email}")
|
|
198
|
+
return new_user
|
|
199
|
+
|
|
200
|
+
except ValueError as e:
|
|
201
|
+
if str(e).startswith("blocked_domain:"):
|
|
202
|
+
# Re-raise the blocked domain error for proper handling
|
|
203
|
+
raise e
|
|
204
|
+
self.log.error(f"ValueError creating new user: {str(e)}")
|
|
205
|
+
self.db.rollback()
|
|
206
|
+
return None
|
|
207
|
+
except Exception as e:
|
|
208
|
+
self.log.error(f"Error creating new user: {str(e)}")
|
|
209
|
+
self.db.rollback()
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
async def authenticate_dataflow(self, handler, data):
|
|
213
|
+
|
|
214
|
+
"""Authenticate user using Dataflow username/password.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
handler: The request handler.
|
|
218
|
+
data: The authentication data containing username and password.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
dict|None: Authentication result with username and session_id if successful, else None.
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
if not (self.enable_dataflow_auth and isinstance(data, dict) and data.get("username") and data.get("password")):
|
|
225
|
+
return None
|
|
226
|
+
user_email = data["username"].lower() # Normalize email for comparison
|
|
227
|
+
password = data["password"]
|
|
228
|
+
self.log.info(f"Attempting Dataflow authentication for user: {user_email}")
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
otp_value = int(password)
|
|
232
|
+
current_time = datetime.now(ZoneInfo("UTC"))
|
|
233
|
+
valid_user = (
|
|
234
|
+
self.db.query(m_otp.UserOtp)
|
|
235
|
+
.filter(
|
|
236
|
+
self.db.func.lower(m_otp.UserOtp.email) == user_email,
|
|
237
|
+
m_otp.UserOtp.otp == otp_value,
|
|
238
|
+
m_otp.UserOtp.expires_at > current_time
|
|
239
|
+
)
|
|
240
|
+
.first()
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
user = (
|
|
244
|
+
self.db.query(m_user.User)
|
|
245
|
+
.filter(self.db.func.lower(m_user.User.email) == user_email)
|
|
246
|
+
.first()
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
if not valid_user:
|
|
250
|
+
if user:
|
|
251
|
+
self.log.warning(f"Invalid OTP for user: {user_email}")
|
|
252
|
+
self.login_error(handler, f"Invalid OTP provided for {user_email}. Please try again.")
|
|
253
|
+
else:
|
|
254
|
+
self.log.warning(f"User not found: {user_email}")
|
|
255
|
+
self.login_error(handler, f"User not found: {user_email}. Please sign up.")
|
|
256
|
+
return None
|
|
257
|
+
|
|
258
|
+
self.db.delete(valid_user)
|
|
259
|
+
self.db.commit()
|
|
260
|
+
self.log.info(f"OTP validated and deleted for user: {user_email}")
|
|
261
|
+
if not user:
|
|
262
|
+
try:
|
|
263
|
+
db_user = self.create_new_user(user_email)
|
|
264
|
+
if not db_user:
|
|
265
|
+
self.log.error(f"Failed to create new user for email: {user_email}")
|
|
266
|
+
self.login_error(handler, "Authentication error occurred. Please try again.")
|
|
267
|
+
return None
|
|
268
|
+
user = db_user
|
|
269
|
+
except ValueError as e:
|
|
270
|
+
if str(e).startswith("blocked_domain:"):
|
|
271
|
+
domain = str(e).split(":")[1]
|
|
272
|
+
self.log.warning(f"Blocked domain signup attempt: {domain}")
|
|
273
|
+
self.login_error(
|
|
274
|
+
handler,
|
|
275
|
+
f"Users with non-organizational email accounts (like {domain}) can only be created by administrators. Please contact your system administrator for access.",
|
|
276
|
+
"Registration Restricted"
|
|
277
|
+
)
|
|
278
|
+
return None
|
|
279
|
+
else:
|
|
280
|
+
self.log.error(f"ValueError during user creation: {str(e)}")
|
|
281
|
+
self.login_error(handler, "Authentication error occurred. Please try again.")
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
session_id = self.get_or_create_session(user.user_id)
|
|
285
|
+
self.set_session_cookie(handler, session_id)
|
|
286
|
+
self.log.info(f"Dataflow authentication successful for user: {user.user_name}")
|
|
287
|
+
return {"name": user.user_name, "session_id": session_id, "auth_state": {}}
|
|
288
|
+
|
|
289
|
+
except Exception as e:
|
|
290
|
+
self.log.error(f"Dataflow authentication error: {str(e)}", exc_info=True)
|
|
291
|
+
self.db.rollback()
|
|
292
|
+
self.login_error(handler, "Authentication error occurred. Please try again.")
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
class DataflowGoogleAuthenticator(DataflowBaseAuthenticator, GoogleOAuthenticator):
|
|
296
|
+
|
|
297
|
+
"""Authenticator to handle Google OAuth authentication with Dataflow integration.
|
|
298
|
+
|
|
299
|
+
Overrides
|
|
300
|
+
- DataflowBaseAuthenticator
|
|
301
|
+
- GoogleOAuthenticator
|
|
302
|
+
|
|
303
|
+
Requires Google OAuth credentials.
|
|
304
|
+
- google_client_id
|
|
305
|
+
- google_client_secret
|
|
306
|
+
"""
|
|
307
|
+
|
|
308
|
+
dataflow_oauth_type = Unicode(
|
|
309
|
+
default_value="google",
|
|
310
|
+
config=True,
|
|
311
|
+
help="The OAuth provider type for DataflowHub (e.g., github, google)"
|
|
312
|
+
)
|
|
313
|
+
google_client_id = Unicode(config=True, help="Google OAuth client ID")
|
|
314
|
+
google_client_secret = Unicode(config=True, help="Google OAuth client secret")
|
|
315
|
+
|
|
316
|
+
def __init__(self, **kwargs):
|
|
317
|
+
super().__init__(**kwargs)
|
|
318
|
+
self.client_id = self.google_client_id
|
|
319
|
+
self.client_secret = self.google_client_secret
|
|
320
|
+
self.dataflow_oauth_type = self.dataflow_oauth_type
|
|
321
|
+
self.log.info(f"DataflowGoogleAuthenticator initialized with google_client_id={self.google_client_id}, "
|
|
322
|
+
f"oauth_callback_url={self.oauth_callback_url}, "
|
|
323
|
+
f"enable_dataflow_auth={self.enable_dataflow_auth}")
|
|
324
|
+
|
|
325
|
+
def login_error(self, handler, message, title="Authentication Failed"):
|
|
326
|
+
"""Custom error handler with simple centered design"""
|
|
327
|
+
html = f"""
|
|
328
|
+
<!DOCTYPE html>
|
|
329
|
+
<html>
|
|
330
|
+
<head>
|
|
331
|
+
<meta charset="UTF-8">
|
|
332
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
333
|
+
<title>{title}</title>
|
|
334
|
+
<link href='https://fonts.googleapis.com/css?family=Lato:400,600' rel='stylesheet'>
|
|
335
|
+
</head>
|
|
336
|
+
<body style="margin: 0; padding: 20px; font-family: 'Lato', Arial, sans-serif; background-color: #f8fafc; min-height: 100vh; display: flex; align-items: center; justify-content: center;">
|
|
337
|
+
<div style="background: white; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); max-width: 450px; width: 100%; padding: 40px; text-align: center;">
|
|
338
|
+
<div style="width: 60px; height: 60px; background-color: #ffebee; border-radius: 50%; display: flex; align-items: center; justify-content: center; margin: 0 auto 20px; font-size: 30px; color: #d32f2f;">!</div>
|
|
339
|
+
|
|
340
|
+
<h1 style="font-size: 24px; font-weight: 600; color: #121926; margin: 0 0 12px 0;">{title}</h1>
|
|
341
|
+
|
|
342
|
+
<p style="font-size: 15px; color: #697586; line-height: 1.5; margin: 0 0 24px 0;">{message}</p>
|
|
343
|
+
|
|
344
|
+
<a href="/hub/login" style="display: inline-block; padding: 12px 32px; background-color: #30baba; color: white; text-decoration: none; border-radius: 6px; font-size: 14px; font-weight: 600; transition: background-color 0.3s;">Try Again</a>
|
|
345
|
+
</div>
|
|
346
|
+
</body>
|
|
347
|
+
</html>
|
|
348
|
+
"""
|
|
349
|
+
handler.set_status(403)
|
|
350
|
+
handler.finish(html)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
async def authenticate(self, handler, data):
|
|
355
|
+
|
|
356
|
+
"""Authenticate user using Google OAuth with Dataflow integration.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
handler: The request handler.
|
|
360
|
+
data: The authentication data.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
dict|None: Authentication result with username and session_id if successful, else None.
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
self.log.info(f"Authenticate called with data: {data}, request_uri: {handler.request.uri}")
|
|
367
|
+
result = await self.authenticate_dataflow(handler, data)
|
|
368
|
+
if result:
|
|
369
|
+
return result
|
|
370
|
+
try:
|
|
371
|
+
user = await super().authenticate(handler, data)
|
|
372
|
+
self.log.info(f"Google OAuth authentication returned: {user}")
|
|
373
|
+
if not user:
|
|
374
|
+
self.log.warning("Google OAuth authentication failed: No user data returned")
|
|
375
|
+
return None
|
|
376
|
+
|
|
377
|
+
email = user["name"]
|
|
378
|
+
|
|
379
|
+
db_user = (
|
|
380
|
+
self.db.query(m_user.User)
|
|
381
|
+
.filter(m_user.User.email == email)
|
|
382
|
+
.first()
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
if not db_user:
|
|
386
|
+
self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
|
|
387
|
+
# Extract additional info from user data if available
|
|
388
|
+
auth_state = user.get("auth_state", {})
|
|
389
|
+
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
390
|
+
|
|
391
|
+
# Get name information from Google OAuth response
|
|
392
|
+
full_name = user_info.get("name", "")
|
|
393
|
+
given_name = user_info.get("given_name", "")
|
|
394
|
+
family_name = user_info.get("family_name", "")
|
|
395
|
+
|
|
396
|
+
# Use given_name and family_name if available, otherwise parse full name
|
|
397
|
+
first_name = given_name
|
|
398
|
+
last_name = family_name
|
|
399
|
+
|
|
400
|
+
if not first_name and full_name:
|
|
401
|
+
# Fallback: parse full name if given_name is not available
|
|
402
|
+
name_parts = full_name.strip().split(' ', 1)
|
|
403
|
+
first_name = name_parts[0] if name_parts else ""
|
|
404
|
+
last_name = name_parts[1] if len(name_parts) > 1 else ""
|
|
405
|
+
|
|
406
|
+
# Log the extracted names for debugging
|
|
407
|
+
self.log.info(f"Creating user with first_name='{first_name}', last_name='{last_name}' from Google data: {user_info}")
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
db_user = self.create_new_user(email, first_name, last_name)
|
|
411
|
+
if not db_user:
|
|
412
|
+
self.log.error(f"Failed to create new user for email: {email}")
|
|
413
|
+
self.login_error(handler, "Authentication error occurred. Please try again.")
|
|
414
|
+
return None
|
|
415
|
+
except ValueError as e:
|
|
416
|
+
if str(e).startswith("blocked_domain:"):
|
|
417
|
+
domain = str(e).split(":")[1]
|
|
418
|
+
self.log.warning(f"Blocked domain signup attempt: {domain}")
|
|
419
|
+
self.login_error(
|
|
420
|
+
handler,
|
|
421
|
+
f"Users with non-organizational email accounts (like {domain}) can only be created by administrators. Please contact your system administrator for access.",
|
|
422
|
+
"Registration Restricted"
|
|
423
|
+
)
|
|
424
|
+
return None
|
|
425
|
+
else:
|
|
426
|
+
self.log.error(f"ValueError during user creation: {str(e)}")
|
|
427
|
+
self.login_error(handler, "Authentication error occurred. Please try again.")
|
|
428
|
+
return None
|
|
429
|
+
|
|
430
|
+
username = db_user.user_name
|
|
431
|
+
session_id = self.get_or_create_session(db_user.user_id)
|
|
432
|
+
self.set_session_cookie(handler, session_id)
|
|
433
|
+
self.log.info(f"Google OAuth completed for user: {username}, session_id={session_id}")
|
|
434
|
+
return {
|
|
435
|
+
"name": username,
|
|
436
|
+
"session_id": session_id,
|
|
437
|
+
"auth_state": user.get("auth_state", {})
|
|
438
|
+
}
|
|
439
|
+
except Exception as e:
|
|
440
|
+
self.login_error(handler, str(e))
|
|
441
|
+
self.log.error(f"Google OAuth authentication error: {str(e)}", exc_info=True)
|
|
442
|
+
return None
|
|
443
|
+
finally:
|
|
444
|
+
self.db.close()
|
|
445
|
+
|
|
446
|
+
class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticator):
|
|
447
|
+
|
|
448
|
+
"""Authenticator to handle Azure AD OAuth authentication with Dataflow integration.
|
|
449
|
+
|
|
450
|
+
Overrides
|
|
451
|
+
- DataflowBaseAuthenticator
|
|
452
|
+
- AzureAdOAuthenticator
|
|
453
|
+
|
|
454
|
+
Requires Azure AD OAuth credentials.
|
|
455
|
+
- azure_client_id
|
|
456
|
+
- azure_client_secret
|
|
457
|
+
- azure_tenant_id
|
|
458
|
+
"""
|
|
459
|
+
|
|
460
|
+
azure_client_id = Unicode(config=True, help="Azure AD OAuth client ID")
|
|
461
|
+
azure_client_secret = Unicode(config=True, help="Azure AD OAuth client secret")
|
|
462
|
+
azure_tenant_id = Unicode(config=True, help="Azure AD tenant ID")
|
|
463
|
+
azure_scope = Unicode("openid profile email", config=True, help="Azure AD OAuth scopes")
|
|
464
|
+
dataflow_oauth_type = Unicode(
|
|
465
|
+
default_value="google",
|
|
466
|
+
config=True,
|
|
467
|
+
help="The OAuth provider type for DataflowHub (e.g., github, google)"
|
|
468
|
+
)
|
|
469
|
+
def __init__(self, **kwargs):
|
|
470
|
+
super().__init__(**kwargs)
|
|
471
|
+
self.client_id = self.azure_client_id
|
|
472
|
+
self.client_secret = self.azure_client_secret
|
|
473
|
+
self.tenant_id = self.azure_tenant_id
|
|
474
|
+
self.scope = self.azure_scope.split()
|
|
475
|
+
self.dataflow_oauth_type = self.dataflow_oauth_type
|
|
476
|
+
self.log.info(f"DataflowAzureAuthenticator initialized with azure_client_id={self.azure_client_id}, "
|
|
477
|
+
f"oauth_callback_url={self.oauth_callback_url}, "
|
|
478
|
+
f"enable_dataflow_auth={self.enable_dataflow_auth}")
|
|
479
|
+
|
|
480
|
+
def login_error(self, handler, message, title="Authentication Failed"):
|
|
481
|
+
"""Custom error handler with simple centered design"""
|
|
482
|
+
html = f"""
|
|
483
|
+
<!DOCTYPE html>
|
|
484
|
+
<html>
|
|
485
|
+
<head>
|
|
486
|
+
<meta charset="UTF-8">
|
|
487
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
488
|
+
<title>{title}</title>
|
|
489
|
+
<link href='https://fonts.googleapis.com/css?family=Lato:400,600' rel='stylesheet'>
|
|
490
|
+
</head>
|
|
491
|
+
<body style="margin: 0; padding: 20px; font-family: 'Lato', Arial, sans-serif; background-color: #f8fafc; min-height: 100vh; display: flex; align-items: center; justify-content: center;">
|
|
492
|
+
<div style="background: white; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); max-width: 450px; width: 100%; padding: 40px; text-align: center;">
|
|
493
|
+
<div style="width: 60px; height: 60px; background-color: #ffebee; border-radius: 50%; display: flex; align-items: center; justify-content: center; margin: 0 auto 20px; font-size: 30px; color: #d32f2f;">!</div>
|
|
494
|
+
|
|
495
|
+
<h1 style="font-size: 24px; font-weight: 600; color: #121926; margin: 0 0 12px 0;">{title}</h1>
|
|
496
|
+
|
|
497
|
+
<p style="font-size: 15px; color: #697586; line-height: 1.5; margin: 0 0 24px 0;">{message}</p>
|
|
498
|
+
|
|
499
|
+
<a href="/hub/login" style="display: inline-block; padding: 12px 32px; background-color: #30baba; color: white; text-decoration: none; border-radius: 6px; font-size: 14px; font-weight: 600; transition: background-color 0.3s;">Try Again</a>
|
|
500
|
+
</div>
|
|
501
|
+
</body>
|
|
502
|
+
</html>
|
|
503
|
+
"""
|
|
504
|
+
handler.set_status(403)
|
|
505
|
+
handler.finish(html)
|
|
506
|
+
|
|
507
|
+
async def authenticate(self, handler, data):
|
|
508
|
+
"""Authenticate user using Azure AD OAuth with Dataflow integration.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
handler: The request handler.
|
|
512
|
+
data: The authentication data.
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
dict|None: Authentication result with username and session_id if successful, else None.
|
|
516
|
+
"""
|
|
517
|
+
|
|
518
|
+
result = await self.authenticate_dataflow(handler, data)
|
|
519
|
+
if result:
|
|
520
|
+
return result
|
|
521
|
+
try:
|
|
522
|
+
user = await super().authenticate(handler, data)
|
|
523
|
+
self.log.info(f"Azure AD OAuth authentication returned: {user}")
|
|
524
|
+
if not user:
|
|
525
|
+
self.log.warning("Azure AD OAuth authentication failed: No user data returned")
|
|
526
|
+
return None
|
|
527
|
+
|
|
528
|
+
auth_state = user.get("auth_state", {})
|
|
529
|
+
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
530
|
+
email = user_info.get("upn")
|
|
531
|
+
if not email:
|
|
532
|
+
self.log.warning("Azure AD OAuth authentication failed: No upn in user data")
|
|
533
|
+
return None
|
|
534
|
+
|
|
535
|
+
db_user = (
|
|
536
|
+
self.db.query(m_user.User)
|
|
537
|
+
.filter(m_user.User.email == email)
|
|
538
|
+
.first()
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
if not db_user:
|
|
542
|
+
self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
|
|
543
|
+
|
|
544
|
+
# Extract name information from Azure AD response
|
|
545
|
+
display_name = user_info.get("displayName", "") or user_info.get("name", "") or user.get("name", "")
|
|
546
|
+
given_name = user_info.get("givenName", "")
|
|
547
|
+
surname = user_info.get("surname", "")
|
|
548
|
+
|
|
549
|
+
# Use givenName and surname if available, otherwise parse displayName
|
|
550
|
+
first_name = given_name
|
|
551
|
+
last_name = surname
|
|
552
|
+
|
|
553
|
+
if not first_name and display_name:
|
|
554
|
+
# Fallback: parse display name if givenName is not available
|
|
555
|
+
name_parts = display_name.strip().split(' ', 1)
|
|
556
|
+
first_name = name_parts[0] if name_parts else ""
|
|
557
|
+
last_name = name_parts[1] if len(name_parts) > 1 else ""
|
|
558
|
+
|
|
559
|
+
# Log the extracted names for debugging
|
|
560
|
+
self.log.info(f"Creating user with first_name='{first_name}', last_name='{last_name}' from Azure data: {user_info}")
|
|
561
|
+
|
|
562
|
+
try:
|
|
563
|
+
db_user = self.create_new_user(email, first_name, last_name)
|
|
564
|
+
if not db_user:
|
|
565
|
+
self.log.error(f"Failed to create new user for email: {email}")
|
|
566
|
+
self.login_error(handler, "Authentication error occurred. Please try again.")
|
|
567
|
+
return None
|
|
568
|
+
except ValueError as e:
|
|
569
|
+
if str(e).startswith("blocked_domain:"):
|
|
570
|
+
domain = str(e).split(":")[1]
|
|
571
|
+
self.log.warning(f"Blocked domain signup attempt: {domain}")
|
|
572
|
+
self.login_error(
|
|
573
|
+
handler,
|
|
574
|
+
f"Users with non-organizational email accounts (like {domain}) can only be created by administrators. Please contact your system administrator for access.",
|
|
575
|
+
"Registration Restricted"
|
|
576
|
+
)
|
|
577
|
+
return None
|
|
578
|
+
else:
|
|
579
|
+
self.log.error(f"ValueError during user creation: {str(e)}")
|
|
580
|
+
self.login_error(handler, "Authentication error occurred. Please try again.")
|
|
581
|
+
return None
|
|
582
|
+
|
|
583
|
+
username = db_user.user_name
|
|
584
|
+
session_id = self.get_or_create_session(db_user.user_id)
|
|
585
|
+
self.set_session_cookie(handler, session_id)
|
|
586
|
+
self.log.info(f"Azure AD OAuth completed for user: {username}, session_id={session_id}")
|
|
587
|
+
return {
|
|
588
|
+
"name": username,
|
|
589
|
+
"session_id": session_id,
|
|
590
|
+
"auth_state": user.get("auth_state", {})
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
except Exception as e:
|
|
594
|
+
# self.login_error(handler, str(e))
|
|
595
|
+
self.log.error(f"Azure AD OAuth authentication error: {str(e)}", exc_info=True)
|
|
596
|
+
return None
|
|
597
|
+
finally:
|
|
598
|
+
self.db.close()
|
|
599
|
+
|
|
600
|
+
auth_type = os.environ.get("DATAFLOW_OAUTH_TYPE", "google")
|
|
601
|
+
|
|
602
|
+
if auth_type == "google":
|
|
603
|
+
BaseAuthenticator = DataflowGoogleAuthenticator
|
|
604
|
+
else:
|
|
605
|
+
BaseAuthenticator = DataflowAzureAuthenticator
|
|
606
|
+
|
|
607
|
+
class DataflowHubAuthenticator(BaseAuthenticator):
|
|
608
|
+
pass
|
{dataflow_core-2.1.17 → dataflow_core-2.1.20}/authenticator/dataflowsupersetauthenticator.py
RENAMED
|
@@ -12,6 +12,7 @@ from superset.security import SupersetSecurityManager
|
|
|
12
12
|
from dataflow.dataflow import Dataflow
|
|
13
13
|
|
|
14
14
|
class DataflowAuthDBView(AuthDBView):
|
|
15
|
+
|
|
15
16
|
def __init__(self):
|
|
16
17
|
self.dataflow = Dataflow()
|
|
17
18
|
|
|
@@ -33,14 +34,15 @@ class DataflowAuthDBView(AuthDBView):
|
|
|
33
34
|
@expose('/login/', methods=['GET', "POST"])
|
|
34
35
|
def login(self):
|
|
35
36
|
"""
|
|
36
|
-
|
|
37
|
+
This method handles authentication for superset in Dataflow.
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
-
|
|
39
|
+
Methods:
|
|
40
|
+
- GET:
|
|
41
|
+
Used for browser-based login. Authenticates using session cookie and redirects to home.
|
|
40
42
|
|
|
41
|
-
|
|
42
|
-
-
|
|
43
|
-
|
|
43
|
+
- POST:
|
|
44
|
+
Used for API-based login. Returns JWT access token for programmatic access.
|
|
45
|
+
Returns JSON response with access token
|
|
44
46
|
"""
|
|
45
47
|
if request.method == "GET":
|
|
46
48
|
session_id = request.cookies.get('dataflow_session')
|
|
@@ -71,6 +73,9 @@ class DataflowAuthDBView(AuthDBView):
|
|
|
71
73
|
return jsonify(resp)
|
|
72
74
|
|
|
73
75
|
class DataflowSecurityManager(SupersetSecurityManager):
|
|
76
|
+
|
|
77
|
+
"""Custom Security Manager integrating Dataflow authentication with superset."""
|
|
78
|
+
|
|
74
79
|
authdbview = DataflowAuthDBView
|
|
75
80
|
def __init__(self, appbuilder):
|
|
76
81
|
super(DataflowSecurityManager, self).__init__(appbuilder)
|