dataflow-core 2.1.17__tar.gz → 2.1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/PKG-INFO +1 -1
  2. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/authenticator/dataflowairflowauthenticator.py +10 -2
  3. dataflow_core-2.1.20/authenticator/dataflowhubauthenticator.py +608 -0
  4. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/authenticator/dataflowsupersetauthenticator.py +11 -6
  5. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/configuration.py +7 -0
  6. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/database_manager.py +23 -0
  7. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/db.py +4 -3
  8. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/environment.py +20 -18
  9. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/__init__.py +2 -0
  10. dataflow_core-2.1.20/dataflow/models/app_types.py +31 -0
  11. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/blacklist_library.py +3 -3
  12. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/connection.py +19 -2
  13. dataflow_core-2.1.20/dataflow/models/dataflow_setting.py +14 -0
  14. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/dataflow_zone.py +15 -0
  15. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/environment.py +125 -4
  16. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/environment_status.py +6 -0
  17. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/git_ssh.py +16 -0
  18. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/org_associations.py +35 -2
  19. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/organization.py +62 -13
  20. dataflow_core-2.1.20/dataflow/models/otp.py +19 -0
  21. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/pinned_projects.py +13 -0
  22. dataflow_core-2.1.20/dataflow/models/pod_activity.py +30 -0
  23. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/pod_session_history.py +14 -1
  24. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/project_details.py +26 -0
  25. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/recent_project_studio.py +16 -0
  26. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/recent_projects.py +7 -0
  27. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/role.py +20 -0
  28. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/role_server.py +8 -1
  29. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/role_zone.py +17 -2
  30. dataflow_core-2.1.20/dataflow/models/server_config.py +77 -0
  31. dataflow_core-2.1.20/dataflow/models/session.py +22 -0
  32. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/team.py +11 -2
  33. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/user.py +41 -7
  34. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/user_team.py +11 -0
  35. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/models/variables.py +26 -2
  36. dataflow_core-2.1.20/dataflow/schemas/connection.py +129 -0
  37. dataflow_core-2.1.20/dataflow/schemas/git_ssh.py +84 -0
  38. dataflow_core-2.1.20/dataflow/schemas/secret.py +75 -0
  39. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/factory.py +6 -2
  40. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/interface.py +3 -0
  41. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/providers/aws_manager.py +55 -0
  42. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/providers/azure_manager.py +55 -0
  43. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/service.py +8 -2
  44. dataflow_core-2.1.20/dataflow/secrets_manager/utils.py +58 -0
  45. dataflow_core-2.1.20/dataflow/utils/blocked_domains.py +4781 -0
  46. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/utils/get_current_user.py +17 -3
  47. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/PKG-INFO +1 -1
  48. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/SOURCES.txt +5 -0
  49. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dfmigration/env.py +10 -0
  50. dataflow_core-2.1.20/dfmigration/versions/002_user_onboarding_migration.py +65 -0
  51. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/setup.py +1 -1
  52. dataflow_core-2.1.17/authenticator/dataflowhubauthenticator.py +0 -309
  53. dataflow_core-2.1.17/dataflow/models/app_types.py +0 -15
  54. dataflow_core-2.1.17/dataflow/models/pod_activity.py +0 -16
  55. dataflow_core-2.1.17/dataflow/models/server_config.py +0 -37
  56. dataflow_core-2.1.17/dataflow/models/session.py +0 -17
  57. dataflow_core-2.1.17/dataflow/schemas/connection.py +0 -84
  58. dataflow_core-2.1.17/dataflow/schemas/git_ssh.py +0 -47
  59. dataflow_core-2.1.17/dataflow/schemas/secret.py +0 -44
  60. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/README.md +0 -0
  61. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/authenticator/__init__.py +0 -0
  62. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/__init__.py +0 -0
  63. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/dataflow.py +0 -0
  64. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/schemas/__init__.py +0 -0
  65. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/scripts/clone_environment.sh +0 -0
  66. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/scripts/create_environment.sh +0 -0
  67. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/scripts/update_environment.sh +0 -0
  68. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/__init__.py +0 -0
  69. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/providers/__init__.py +0 -0
  70. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/secrets_manager/providers/gcp_manager.py +0 -0
  71. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/utils/__init__.py +0 -0
  72. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/utils/exceptions.py +0 -0
  73. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow/utils/logger.py +0 -0
  74. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/dependency_links.txt +0 -0
  75. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/entry_points.txt +0 -0
  76. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/requires.txt +0 -0
  77. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dataflow_core.egg-info/top_level.txt +0 -0
  78. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dfmigration/__init__.py +0 -0
  79. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dfmigration/versions/001_initial_baseline_migration.py +0 -0
  80. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/dfmigration/versions/__init__.py +0 -0
  81. {dataflow_core-2.1.17 → dataflow_core-2.1.20}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataflow-core
3
- Version: 2.1.17
3
+ Version: 2.1.20
4
4
  Summary: Dataflow core package
5
5
  Author: Dataflow
6
6
  Author-email:
@@ -18,9 +18,14 @@ dataflow = Dataflow()
18
18
  class DataflowAuthDBView(AuthDBView):
19
19
  @expose('/login/', methods=['GET', 'POST'])
20
20
  def login(self):
21
+
22
+ """This method checks for a 'dataflow_session' cookie, retrieves user details from Dataflow,
23
+ and logs in or creates the user in Airflow accordingly.
24
+ If the cookie is not present, it falls back to the standard login process.
25
+
26
+ Overrides the default login method to integrate with Dataflow authentication.
21
27
  """
22
- Override the default login method to handle custom authentication
23
- """
28
+
24
29
  try:
25
30
  session_id = request.cookies.get('dataflow_session')
26
31
  if not session_id:
@@ -52,6 +57,9 @@ class DataflowAuthDBView(AuthDBView):
52
57
  return super().login()
53
58
 
54
59
  class DataflowAirflowAuthenticator(FabAirflowSecurityManagerOverride):
60
+
61
+ """Custom Security Manager to integrate Airflow authentication with Dataflow."""
62
+
55
63
  authdbview = DataflowAuthDBView
56
64
 
57
65
  def __init__(self, appbuilder):
@@ -0,0 +1,608 @@
1
+ import os, uuid, re, hashlib, secrets
2
+ from datetime import datetime, timedelta
3
+ from zoneinfo import ZoneInfo
4
+ from traitlets import Bool, Unicode
5
+ from jupyterhub.auth import Authenticator
6
+ from oauthenticator.google import GoogleOAuthenticator
7
+ from oauthenticator.azuread import AzureAdOAuthenticator
8
+ from dataflow.db import get_db
9
+ from dataflow.models import user as m_user, session as m_session, otp as m_otp
10
+ from sqlalchemy import or_
11
+ from dataflow.utils.blocked_domains import blocked_domains
12
+ class DataflowBaseAuthenticator(Authenticator):
13
+
14
+ """Base Authenticator to handle Dataflow authentication and session management.
15
+ Provides methods to authenticate users via Dataflow credentials, manage sessions.
16
+
17
+ Overrides JupyterHub's Authenticator class.
18
+ """
19
+
20
+ enable_dataflow_auth = Bool(True, config=True, help="Enable username/password authentication")
21
+
22
+ def __init__(self, **kwargs):
23
+ super().__init__(**kwargs)
24
+ try:
25
+ self.db = next(get_db())
26
+ m_user.Base.metadata.create_all(bind=self.db.get_bind(), checkfirst=True)
27
+ m_session.Base.metadata.create_all(bind=self.db.get_bind(), checkfirst=True)
28
+ self.log.info("Dataflow database initialized successfully")
29
+ except Exception as e:
30
+ self.log.error(f"Failed to initialize Dataflow database: {str(e)}")
31
+ raise
32
+
33
+ def generate_session_id(self):
34
+
35
+ """Generate and return a unique session ID using UUID4."""
36
+
37
+ return str(uuid.uuid4())
38
+
39
+ def set_session_cookie(self, handler, session_id):
40
+
41
+ """Set the dataflow_session cookie in the user's browser.
42
+
43
+ Args:
44
+ handler: The request handler to set the cookie on.
45
+ session_id: The session ID to set in the cookie."""
46
+
47
+ expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=60)
48
+ host = handler.request.host
49
+ domain = '.'.join(host.split('.')[-2:]) if len(host.split('.')) >= 2 else host
50
+ handler.set_cookie(
51
+ "dataflow_session",
52
+ session_id,
53
+ domain=f".{domain}",
54
+ path="/",
55
+ expires=expires,
56
+ secure=True,
57
+ httponly=True,
58
+ samesite="None"
59
+ )
60
+ self.log.info(f"Set session cookie: dataflow_session={session_id} for host={host}")
61
+
62
+ def get_or_create_session(self, user_id):
63
+
64
+ """Retrieve existing session ID for user or create a new one.
65
+
66
+ Args:
67
+ user_id: The ID of the user to get or create a session for.
68
+
69
+ Returns:
70
+ session_id (str): The existing or newly created session ID.
71
+ """
72
+
73
+ session_id = self.generate_session_id()
74
+ while self.db.query(m_session.Session).filter(
75
+ m_session.Session.session_id == session_id
76
+ ).first():
77
+ session_id = self.generate_session_id()
78
+
79
+ db_item = m_session.Session(user_id=user_id, session_id=session_id)
80
+ self.db.add(db_item)
81
+ self.db.commit()
82
+ self.db.refresh(db_item)
83
+ self.log.info(f"Created new session: {session_id}")
84
+ return session_id
85
+
86
+ def check_blocked_users(self, username, authenticated):
87
+
88
+ """Check if the authenticated user is blocked based on allowed_users list.
89
+
90
+ Args:
91
+ username (str): The username of the authenticated user.
92
+ authenticated (dict|None): The authentication data returned from authenticate method.
93
+
94
+ Returns:
95
+ username (str|None): The username if not blocked, else None."""
96
+
97
+ self.log.info(f"Checking blocked users for {username}: authenticated={authenticated}, allowed_users={self.allowed_users}")
98
+
99
+ if not authenticated:
100
+ self.log.warning(f"No authenticated data for user: {username}")
101
+ return None
102
+
103
+ if isinstance(authenticated, dict) and "session_id" in authenticated:
104
+ self.log.info(f"Allowing Dataflow authentication for user: {username}")
105
+ return username
106
+
107
+ return super().check_blocked_users(username, authenticated)
108
+
109
+ def extract_username_from_email(self, email):
110
+
111
+ """Extract username from email by removing domain
112
+
113
+ Args:
114
+ email (str): User's email address
115
+
116
+ Returns:
117
+ username (str): Extracted username after removing domain
118
+ """
119
+
120
+ if '@' in email:
121
+ return email.split('@')[0]
122
+ return email
123
+
124
+ def generate_secure_password(self):
125
+
126
+ """Generate secure random password hash
127
+
128
+ Returns:
129
+ password_hash (str): Securely hashed password
130
+ """
131
+
132
+ salt = secrets.token_hex(16)
133
+ random_uuid = str(uuid.uuid4())
134
+ hash_obj = hashlib.sha256((random_uuid + salt).encode())
135
+ return hash_obj.hexdigest()
136
+
137
+ def create_new_user(self, email, first_name=None, last_name=None):
138
+
139
+ """Create a new user with Applicant role
140
+
141
+ Args:
142
+ email (str): User's email address
143
+ first_name (str): User's first name
144
+ last_name (str): User's last name
145
+
146
+ Returns:
147
+ new_user (m_user.User|None): Created user object or None if creation failed
148
+ """
149
+
150
+ try:
151
+ # Normalize email to lowercase for consistency
152
+ email = email.lower()
153
+
154
+ # Check if email domain is blocked
155
+ if '@' in email:
156
+ domain = email.split('@')[1].lower()
157
+ if domain in blocked_domains:
158
+ self.log.warning(f"Blocked domain detected: {domain} for email: {email}")
159
+ raise ValueError(f"blocked_domain:{domain}")
160
+
161
+ username = self.extract_username_from_email(email)
162
+ username = re.sub(r'[^a-z0-9]', '', username.lower())
163
+ if not username:
164
+ self.log.error("Cannot create user: Username is empty")
165
+ return None
166
+
167
+ existing_user = (
168
+ self.db.query(m_user.User)
169
+ .filter(m_user.User.user_name == username)
170
+ .first()
171
+ )
172
+ if existing_user:
173
+ counter = 1
174
+ original_username = username
175
+ while existing_user:
176
+ username = f"{original_username}{counter}"
177
+ existing_user = (
178
+ self.db.query(m_user.User)
179
+ .filter(m_user.User.user_name == username)
180
+ .first()
181
+ )
182
+ counter += 1
183
+
184
+ secure_password = self.generate_secure_password()
185
+ new_user = m_user.User(
186
+ user_name=username,
187
+ first_name=first_name or username,
188
+ last_name=last_name or "",
189
+ email=email,
190
+ password=secure_password,
191
+ )
192
+
193
+ self.db.add(new_user)
194
+ self.db.commit()
195
+ self.db.refresh(new_user)
196
+
197
+ self.log.info(f"Created new user: {username} with email: {email}")
198
+ return new_user
199
+
200
+ except ValueError as e:
201
+ if str(e).startswith("blocked_domain:"):
202
+ # Re-raise the blocked domain error for proper handling
203
+ raise e
204
+ self.log.error(f"ValueError creating new user: {str(e)}")
205
+ self.db.rollback()
206
+ return None
207
+ except Exception as e:
208
+ self.log.error(f"Error creating new user: {str(e)}")
209
+ self.db.rollback()
210
+ return None
211
+
212
+ async def authenticate_dataflow(self, handler, data):
213
+
214
+ """Authenticate user using Dataflow username/password.
215
+
216
+ Args:
217
+ handler: The request handler.
218
+ data: The authentication data containing username and password.
219
+
220
+ Returns:
221
+ dict|None: Authentication result with username and session_id if successful, else None.
222
+ """
223
+
224
+ if not (self.enable_dataflow_auth and isinstance(data, dict) and data.get("username") and data.get("password")):
225
+ return None
226
+ user_email = data["username"].lower() # Normalize email for comparison
227
+ password = data["password"]
228
+ self.log.info(f"Attempting Dataflow authentication for user: {user_email}")
229
+
230
+ try:
231
+ otp_value = int(password)
232
+ current_time = datetime.now(ZoneInfo("UTC"))
233
+ valid_user = (
234
+ self.db.query(m_otp.UserOtp)
235
+ .filter(
236
+ self.db.func.lower(m_otp.UserOtp.email) == user_email,
237
+ m_otp.UserOtp.otp == otp_value,
238
+ m_otp.UserOtp.expires_at > current_time
239
+ )
240
+ .first()
241
+ )
242
+
243
+ user = (
244
+ self.db.query(m_user.User)
245
+ .filter(self.db.func.lower(m_user.User.email) == user_email)
246
+ .first()
247
+ )
248
+
249
+ if not valid_user:
250
+ if user:
251
+ self.log.warning(f"Invalid OTP for user: {user_email}")
252
+ self.login_error(handler, f"Invalid OTP provided for {user_email}. Please try again.")
253
+ else:
254
+ self.log.warning(f"User not found: {user_email}")
255
+ self.login_error(handler, f"User not found: {user_email}. Please sign up.")
256
+ return None
257
+
258
+ self.db.delete(valid_user)
259
+ self.db.commit()
260
+ self.log.info(f"OTP validated and deleted for user: {user_email}")
261
+ if not user:
262
+ try:
263
+ db_user = self.create_new_user(user_email)
264
+ if not db_user:
265
+ self.log.error(f"Failed to create new user for email: {user_email}")
266
+ self.login_error(handler, "Authentication error occurred. Please try again.")
267
+ return None
268
+ user = db_user
269
+ except ValueError as e:
270
+ if str(e).startswith("blocked_domain:"):
271
+ domain = str(e).split(":")[1]
272
+ self.log.warning(f"Blocked domain signup attempt: {domain}")
273
+ self.login_error(
274
+ handler,
275
+ f"Users with non-organizational email accounts (like {domain}) can only be created by administrators. Please contact your system administrator for access.",
276
+ "Registration Restricted"
277
+ )
278
+ return None
279
+ else:
280
+ self.log.error(f"ValueError during user creation: {str(e)}")
281
+ self.login_error(handler, "Authentication error occurred. Please try again.")
282
+ return None
283
+
284
+ session_id = self.get_or_create_session(user.user_id)
285
+ self.set_session_cookie(handler, session_id)
286
+ self.log.info(f"Dataflow authentication successful for user: {user.user_name}")
287
+ return {"name": user.user_name, "session_id": session_id, "auth_state": {}}
288
+
289
+ except Exception as e:
290
+ self.log.error(f"Dataflow authentication error: {str(e)}", exc_info=True)
291
+ self.db.rollback()
292
+ self.login_error(handler, "Authentication error occurred. Please try again.")
293
+ return None
294
+
295
+ class DataflowGoogleAuthenticator(DataflowBaseAuthenticator, GoogleOAuthenticator):
296
+
297
+ """Authenticator to handle Google OAuth authentication with Dataflow integration.
298
+
299
+ Overrides
300
+ - DataflowBaseAuthenticator
301
+ - GoogleOAuthenticator
302
+
303
+ Requires Google OAuth credentials.
304
+ - google_client_id
305
+ - google_client_secret
306
+ """
307
+
308
+ dataflow_oauth_type = Unicode(
309
+ default_value="google",
310
+ config=True,
311
+ help="The OAuth provider type for DataflowHub (e.g., github, google)"
312
+ )
313
+ google_client_id = Unicode(config=True, help="Google OAuth client ID")
314
+ google_client_secret = Unicode(config=True, help="Google OAuth client secret")
315
+
316
+ def __init__(self, **kwargs):
317
+ super().__init__(**kwargs)
318
+ self.client_id = self.google_client_id
319
+ self.client_secret = self.google_client_secret
320
+ self.dataflow_oauth_type = self.dataflow_oauth_type
321
+ self.log.info(f"DataflowGoogleAuthenticator initialized with google_client_id={self.google_client_id}, "
322
+ f"oauth_callback_url={self.oauth_callback_url}, "
323
+ f"enable_dataflow_auth={self.enable_dataflow_auth}")
324
+
325
+ def login_error(self, handler, message, title="Authentication Failed"):
326
+ """Custom error handler with simple centered design"""
327
+ html = f"""
328
+ <!DOCTYPE html>
329
+ <html>
330
+ <head>
331
+ <meta charset="UTF-8">
332
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
333
+ <title>{title}</title>
334
+ <link href='https://fonts.googleapis.com/css?family=Lato:400,600' rel='stylesheet'>
335
+ </head>
336
+ <body style="margin: 0; padding: 20px; font-family: 'Lato', Arial, sans-serif; background-color: #f8fafc; min-height: 100vh; display: flex; align-items: center; justify-content: center;">
337
+ <div style="background: white; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); max-width: 450px; width: 100%; padding: 40px; text-align: center;">
338
+ <div style="width: 60px; height: 60px; background-color: #ffebee; border-radius: 50%; display: flex; align-items: center; justify-content: center; margin: 0 auto 20px; font-size: 30px; color: #d32f2f;">!</div>
339
+
340
+ <h1 style="font-size: 24px; font-weight: 600; color: #121926; margin: 0 0 12px 0;">{title}</h1>
341
+
342
+ <p style="font-size: 15px; color: #697586; line-height: 1.5; margin: 0 0 24px 0;">{message}</p>
343
+
344
+ <a href="/hub/login" style="display: inline-block; padding: 12px 32px; background-color: #30baba; color: white; text-decoration: none; border-radius: 6px; font-size: 14px; font-weight: 600; transition: background-color 0.3s;">Try Again</a>
345
+ </div>
346
+ </body>
347
+ </html>
348
+ """
349
+ handler.set_status(403)
350
+ handler.finish(html)
351
+
352
+
353
+
354
+ async def authenticate(self, handler, data):
355
+
356
+ """Authenticate user using Google OAuth with Dataflow integration.
357
+
358
+ Args:
359
+ handler: The request handler.
360
+ data: The authentication data.
361
+
362
+ Returns:
363
+ dict|None: Authentication result with username and session_id if successful, else None.
364
+ """
365
+
366
+ self.log.info(f"Authenticate called with data: {data}, request_uri: {handler.request.uri}")
367
+ result = await self.authenticate_dataflow(handler, data)
368
+ if result:
369
+ return result
370
+ try:
371
+ user = await super().authenticate(handler, data)
372
+ self.log.info(f"Google OAuth authentication returned: {user}")
373
+ if not user:
374
+ self.log.warning("Google OAuth authentication failed: No user data returned")
375
+ return None
376
+
377
+ email = user["name"]
378
+
379
+ db_user = (
380
+ self.db.query(m_user.User)
381
+ .filter(m_user.User.email == email)
382
+ .first()
383
+ )
384
+
385
+ if not db_user:
386
+ self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
387
+ # Extract additional info from user data if available
388
+ auth_state = user.get("auth_state", {})
389
+ user_info = auth_state.get("user", {}) if auth_state else {}
390
+
391
+ # Get name information from Google OAuth response
392
+ full_name = user_info.get("name", "")
393
+ given_name = user_info.get("given_name", "")
394
+ family_name = user_info.get("family_name", "")
395
+
396
+ # Use given_name and family_name if available, otherwise parse full name
397
+ first_name = given_name
398
+ last_name = family_name
399
+
400
+ if not first_name and full_name:
401
+ # Fallback: parse full name if given_name is not available
402
+ name_parts = full_name.strip().split(' ', 1)
403
+ first_name = name_parts[0] if name_parts else ""
404
+ last_name = name_parts[1] if len(name_parts) > 1 else ""
405
+
406
+ # Log the extracted names for debugging
407
+ self.log.info(f"Creating user with first_name='{first_name}', last_name='{last_name}' from Google data: {user_info}")
408
+
409
+ try:
410
+ db_user = self.create_new_user(email, first_name, last_name)
411
+ if not db_user:
412
+ self.log.error(f"Failed to create new user for email: {email}")
413
+ self.login_error(handler, "Authentication error occurred. Please try again.")
414
+ return None
415
+ except ValueError as e:
416
+ if str(e).startswith("blocked_domain:"):
417
+ domain = str(e).split(":")[1]
418
+ self.log.warning(f"Blocked domain signup attempt: {domain}")
419
+ self.login_error(
420
+ handler,
421
+ f"Users with non-organizational email accounts (like {domain}) can only be created by administrators. Please contact your system administrator for access.",
422
+ "Registration Restricted"
423
+ )
424
+ return None
425
+ else:
426
+ self.log.error(f"ValueError during user creation: {str(e)}")
427
+ self.login_error(handler, "Authentication error occurred. Please try again.")
428
+ return None
429
+
430
+ username = db_user.user_name
431
+ session_id = self.get_or_create_session(db_user.user_id)
432
+ self.set_session_cookie(handler, session_id)
433
+ self.log.info(f"Google OAuth completed for user: {username}, session_id={session_id}")
434
+ return {
435
+ "name": username,
436
+ "session_id": session_id,
437
+ "auth_state": user.get("auth_state", {})
438
+ }
439
+ except Exception as e:
440
+ self.login_error(handler, str(e))
441
+ self.log.error(f"Google OAuth authentication error: {str(e)}", exc_info=True)
442
+ return None
443
+ finally:
444
+ self.db.close()
445
+
446
+ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticator):
447
+
448
+ """Authenticator to handle Azure AD OAuth authentication with Dataflow integration.
449
+
450
+ Overrides
451
+ - DataflowBaseAuthenticator
452
+ - AzureAdOAuthenticator
453
+
454
+ Requires Azure AD OAuth credentials.
455
+ - azure_client_id
456
+ - azure_client_secret
457
+ - azure_tenant_id
458
+ """
459
+
460
+ azure_client_id = Unicode(config=True, help="Azure AD OAuth client ID")
461
+ azure_client_secret = Unicode(config=True, help="Azure AD OAuth client secret")
462
+ azure_tenant_id = Unicode(config=True, help="Azure AD tenant ID")
463
+ azure_scope = Unicode("openid profile email", config=True, help="Azure AD OAuth scopes")
464
+ dataflow_oauth_type = Unicode(
465
+ default_value="google",
466
+ config=True,
467
+ help="The OAuth provider type for DataflowHub (e.g., github, google)"
468
+ )
469
+ def __init__(self, **kwargs):
470
+ super().__init__(**kwargs)
471
+ self.client_id = self.azure_client_id
472
+ self.client_secret = self.azure_client_secret
473
+ self.tenant_id = self.azure_tenant_id
474
+ self.scope = self.azure_scope.split()
475
+ self.dataflow_oauth_type = self.dataflow_oauth_type
476
+ self.log.info(f"DataflowAzureAuthenticator initialized with azure_client_id={self.azure_client_id}, "
477
+ f"oauth_callback_url={self.oauth_callback_url}, "
478
+ f"enable_dataflow_auth={self.enable_dataflow_auth}")
479
+
480
+ def login_error(self, handler, message, title="Authentication Failed"):
481
+ """Custom error handler with simple centered design"""
482
+ html = f"""
483
+ <!DOCTYPE html>
484
+ <html>
485
+ <head>
486
+ <meta charset="UTF-8">
487
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
488
+ <title>{title}</title>
489
+ <link href='https://fonts.googleapis.com/css?family=Lato:400,600' rel='stylesheet'>
490
+ </head>
491
+ <body style="margin: 0; padding: 20px; font-family: 'Lato', Arial, sans-serif; background-color: #f8fafc; min-height: 100vh; display: flex; align-items: center; justify-content: center;">
492
+ <div style="background: white; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); max-width: 450px; width: 100%; padding: 40px; text-align: center;">
493
+ <div style="width: 60px; height: 60px; background-color: #ffebee; border-radius: 50%; display: flex; align-items: center; justify-content: center; margin: 0 auto 20px; font-size: 30px; color: #d32f2f;">!</div>
494
+
495
+ <h1 style="font-size: 24px; font-weight: 600; color: #121926; margin: 0 0 12px 0;">{title}</h1>
496
+
497
+ <p style="font-size: 15px; color: #697586; line-height: 1.5; margin: 0 0 24px 0;">{message}</p>
498
+
499
+ <a href="/hub/login" style="display: inline-block; padding: 12px 32px; background-color: #30baba; color: white; text-decoration: none; border-radius: 6px; font-size: 14px; font-weight: 600; transition: background-color 0.3s;">Try Again</a>
500
+ </div>
501
+ </body>
502
+ </html>
503
+ """
504
+ handler.set_status(403)
505
+ handler.finish(html)
506
+
507
+ async def authenticate(self, handler, data):
508
+ """Authenticate user using Azure AD OAuth with Dataflow integration.
509
+
510
+ Args:
511
+ handler: The request handler.
512
+ data: The authentication data.
513
+
514
+ Returns:
515
+ dict|None: Authentication result with username and session_id if successful, else None.
516
+ """
517
+
518
+ result = await self.authenticate_dataflow(handler, data)
519
+ if result:
520
+ return result
521
+ try:
522
+ user = await super().authenticate(handler, data)
523
+ self.log.info(f"Azure AD OAuth authentication returned: {user}")
524
+ if not user:
525
+ self.log.warning("Azure AD OAuth authentication failed: No user data returned")
526
+ return None
527
+
528
+ auth_state = user.get("auth_state", {})
529
+ user_info = auth_state.get("user", {}) if auth_state else {}
530
+ email = user_info.get("upn")
531
+ if not email:
532
+ self.log.warning("Azure AD OAuth authentication failed: No upn in user data")
533
+ return None
534
+
535
+ db_user = (
536
+ self.db.query(m_user.User)
537
+ .filter(m_user.User.email == email)
538
+ .first()
539
+ )
540
+
541
+ if not db_user:
542
+ self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
543
+
544
+ # Extract name information from Azure AD response
545
+ display_name = user_info.get("displayName", "") or user_info.get("name", "") or user.get("name", "")
546
+ given_name = user_info.get("givenName", "")
547
+ surname = user_info.get("surname", "")
548
+
549
+ # Use givenName and surname if available, otherwise parse displayName
550
+ first_name = given_name
551
+ last_name = surname
552
+
553
+ if not first_name and display_name:
554
+ # Fallback: parse display name if givenName is not available
555
+ name_parts = display_name.strip().split(' ', 1)
556
+ first_name = name_parts[0] if name_parts else ""
557
+ last_name = name_parts[1] if len(name_parts) > 1 else ""
558
+
559
+ # Log the extracted names for debugging
560
+ self.log.info(f"Creating user with first_name='{first_name}', last_name='{last_name}' from Azure data: {user_info}")
561
+
562
+ try:
563
+ db_user = self.create_new_user(email, first_name, last_name)
564
+ if not db_user:
565
+ self.log.error(f"Failed to create new user for email: {email}")
566
+ self.login_error(handler, "Authentication error occurred. Please try again.")
567
+ return None
568
+ except ValueError as e:
569
+ if str(e).startswith("blocked_domain:"):
570
+ domain = str(e).split(":")[1]
571
+ self.log.warning(f"Blocked domain signup attempt: {domain}")
572
+ self.login_error(
573
+ handler,
574
+ f"Users with non-organizational email accounts (like {domain}) can only be created by administrators. Please contact your system administrator for access.",
575
+ "Registration Restricted"
576
+ )
577
+ return None
578
+ else:
579
+ self.log.error(f"ValueError during user creation: {str(e)}")
580
+ self.login_error(handler, "Authentication error occurred. Please try again.")
581
+ return None
582
+
583
+ username = db_user.user_name
584
+ session_id = self.get_or_create_session(db_user.user_id)
585
+ self.set_session_cookie(handler, session_id)
586
+ self.log.info(f"Azure AD OAuth completed for user: {username}, session_id={session_id}")
587
+ return {
588
+ "name": username,
589
+ "session_id": session_id,
590
+ "auth_state": user.get("auth_state", {})
591
+ }
592
+
593
+ except Exception as e:
594
+ # self.login_error(handler, str(e))
595
+ self.log.error(f"Azure AD OAuth authentication error: {str(e)}", exc_info=True)
596
+ return None
597
+ finally:
598
+ self.db.close()
599
+
600
+ auth_type = os.environ.get("DATAFLOW_OAUTH_TYPE", "google")
601
+
602
+ if auth_type == "google":
603
+ BaseAuthenticator = DataflowGoogleAuthenticator
604
+ else:
605
+ BaseAuthenticator = DataflowAzureAuthenticator
606
+
607
+ class DataflowHubAuthenticator(BaseAuthenticator):
608
+ pass
@@ -12,6 +12,7 @@ from superset.security import SupersetSecurityManager
12
12
  from dataflow.dataflow import Dataflow
13
13
 
14
14
  class DataflowAuthDBView(AuthDBView):
15
+
15
16
  def __init__(self):
16
17
  self.dataflow = Dataflow()
17
18
 
@@ -33,14 +34,15 @@ class DataflowAuthDBView(AuthDBView):
33
34
  @expose('/login/', methods=['GET', "POST"])
34
35
  def login(self):
35
36
  """
36
- Handles both GET and POST login requests.
37
+ This method handles authentication for superset in Dataflow.
37
38
 
38
- - GET: Used for browser-based login. Authenticates using session cookie and redirects to home.
39
- - POST: Used for API-based login. Returns JWT access token for programmatic access.
39
+ Methods:
40
+ - GET:
41
+ Used for browser-based login. Authenticates using session cookie and redirects to home.
40
42
 
41
- Returns:
42
- - Redirect to home page (GET)
43
- - JSON response with access token (POST)
43
+ - POST:
44
+ Used for API-based login. Returns JWT access token for programmatic access.
45
+ Returns JSON response with access token
44
46
  """
45
47
  if request.method == "GET":
46
48
  session_id = request.cookies.get('dataflow_session')
@@ -71,6 +73,9 @@ class DataflowAuthDBView(AuthDBView):
71
73
  return jsonify(resp)
72
74
 
73
75
  class DataflowSecurityManager(SupersetSecurityManager):
76
+
77
+ """Custom Security Manager integrating Dataflow authentication with superset."""
78
+
74
79
  authdbview = DataflowAuthDBView
75
80
  def __init__(self, appbuilder):
76
81
  super(DataflowSecurityManager, self).__init__(appbuilder)