dataflow-core 2.1.1__py3-none-any.whl → 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dataflow-core might be problematic. Click here for more details.

@@ -1,83 +1,322 @@
1
- from dataflow.db import get_db
2
- from dataflow.models import (
3
- user as m_user,
4
- session as m_session
5
- )
6
- from datetime import datetime, timedelta, timezone
1
+ import os
7
2
  import uuid
3
+ import re
4
+ from datetime import datetime, timedelta
5
+ from zoneinfo import ZoneInfo
6
+ from traitlets import Bool, Unicode
8
7
  from jupyterhub.auth import Authenticator
8
+ from oauthenticator.google import GoogleOAuthenticator
9
+ from oauthenticator.azuread import AzureAdOAuthenticator
10
+ from dataflow.db import get_db
11
+ from dataflow.models import user as m_user, session as m_session, role as m_role
12
+
13
+ class DataflowBaseAuthenticator(Authenticator):
14
+ enable_dataflow_auth = Bool(True, config=True, help="Enable username/password authentication")
9
15
 
10
- class DataflowHubAuthenticator(Authenticator):
11
16
  def __init__(self, **kwargs):
12
17
  super().__init__(**kwargs)
13
- self.db = next(get_db())
14
-
18
+ try:
19
+ self.db = next(get_db())
20
+ m_user.Base.metadata.create_all(bind=self.db.get_bind(), checkfirst=True)
21
+ m_session.Base.metadata.create_all(bind=self.db.get_bind(), checkfirst=True)
22
+ self.log.info("Dataflow database initialized successfully")
23
+ except Exception as e:
24
+ self.log.error(f"Failed to initialize Dataflow database: {str(e)}")
25
+ raise
26
+
15
27
  def generate_session_id(self):
16
28
  return str(uuid.uuid4())
29
+
30
+ def set_session_cookie(self, handler, session_id):
31
+ expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=365)
32
+ host = handler.request.host
33
+ domain = '.'.join(host.split('.')[-2:]) if len(host.split('.')) >= 2 else host
34
+ handler.set_cookie(
35
+ "dataflow_session",
36
+ session_id,
37
+ domain=f".{domain}",
38
+ path="/",
39
+ expires=expires,
40
+ secure=True,
41
+ httponly=True,
42
+ samesite="None"
43
+ )
44
+ self.log.info(f"Set session cookie: dataflow_session={session_id} for host={host}")
45
+
46
+ def get_or_create_session(self, user_id):
47
+ existing_session = (
48
+ self.db.query(m_session.Session)
49
+ .filter(m_session.Session.user_id == str(user_id))
50
+ .first()
51
+ )
52
+ if existing_session:
53
+ self.log.info(f"Reusing existing session: {existing_session.session_id}")
54
+ return existing_session.session_id
55
+ session_id = self.generate_session_id()
56
+ while self.db.query(m_session.Session).filter(
57
+ m_session.Session.session_id == session_id
58
+ ).first():
59
+ session_id = self.generate_session_id()
60
+ db_item = m_session.Session(user_id=user_id, session_id=session_id)
61
+ self.db.add(db_item)
62
+ self.db.commit()
63
+ self.db.refresh(db_item)
64
+ self.log.info(f"Created new session: {session_id}")
65
+ return session_id
17
66
 
18
- async def authenticate(self, handler, data):
19
- # get username and password
20
- username = data["username"]
21
- password = data["password"]
67
+ def check_blocked_users(self, username, authenticated):
68
+ self.log.info(f"Checking blocked users for {username}: authenticated={authenticated}, allowed_users={self.allowed_users}")
69
+
70
+ if not authenticated:
71
+ self.log.warning(f"No authenticated data for user: {username}")
72
+ return None
73
+
74
+ if isinstance(authenticated, dict) and "session_id" in authenticated:
75
+ self.log.info(f"Allowing Dataflow authentication for user: {username}")
76
+ return username
77
+
78
+ return super().check_blocked_users(username, authenticated)
22
79
 
80
+ def get_applicant_role_id(self):
81
+ """Get the role ID for 'Applicant' role"""
23
82
  try:
24
- # check if user exists
25
- query = self.db.query(m_user.User)
26
- user = query.filter(m_user.User.user_name == username).first()
83
+ applicant_role = (
84
+ self.db.query(m_role.Role)
85
+ .filter(m_role.Role.name == "Applicant")
86
+ .first()
87
+ )
88
+ if applicant_role:
89
+ return applicant_role.id
90
+ else:
91
+ self.log.warning("Applicant role not found in database")
92
+ return None
93
+ except Exception as e:
94
+ self.log.error(f"Error getting Applicant role: {str(e)}")
95
+ return None
96
+
97
+ def extract_username_from_email(self, email):
98
+ """Extract username from email by removing domain"""
99
+ if '@' in email:
100
+ return email.split('@')[0]
101
+ return email
27
102
 
28
- if user is None or user.password != password:
103
+ def create_new_user(self, email, first_name=None, last_name=None):
104
+ """Create a new user with Applicant role"""
105
+ try:
106
+ role_id = self.get_applicant_role_id()
107
+ if not role_id:
108
+ self.log.error("Cannot create user: Applicant role not found")
29
109
  return None
30
110
 
31
- # Check if the user already has an existing session
32
- existing_session = (
33
- self.db.query(m_session.Session)
34
- .filter(m_session.Session.user_id == user.user_id)
111
+ username = self.extract_username_from_email(email)
112
+ username = re.sub(r'[^A-Za-z0-9]', '', username)
113
+ if not username:
114
+ self.log.error("Cannot create user: Username is empty")
115
+ return None
116
+ existing_user = (
117
+ self.db.query(m_user.User)
118
+ .filter(m_user.User.user_name == username)
35
119
  .first()
36
120
  )
121
+ if existing_user:
122
+ counter = 1
123
+ original_username = username
124
+ while existing_user:
125
+ username = f"{original_username}_{counter}"
126
+ existing_user = (
127
+ self.db.query(m_user.User)
128
+ .filter(m_user.User.user_name == username)
129
+ .first()
130
+ )
131
+ counter += 1
37
132
 
38
- if existing_session:
39
- # Reuse the existing session_id
40
- session_id = existing_session.session_id
41
- else:
42
- # Generate a new session_id
43
- session_id = self.generate_session_id()
44
- query = self.db.query(m_session.Session)
45
- isSession = query.filter(m_session.Session.session_id == session_id).first()
46
-
47
- # If session_id(uuid string) already exists in the database, generate a new one
48
- while isSession is not None:
49
- session_id = self.generate_session_id()
50
- isSession = query.filter(m_session.Session.session_id == session_id).first()
51
-
52
- # add session_id to the database
53
- db_item = m_session.Session(user_id=user.user_id, session_id=session_id)
54
- self.db.add(db_item)
55
- self.db.commit()
56
- self.db.refresh(db_item)
57
-
58
- expires = datetime.now(timezone.utc) + timedelta(days=365)
59
- host = handler.request.host
60
- parts = host.split('.')
61
- if len(parts) >= 2:
62
- domain = '.'.join(parts[-2:])
63
- else:
64
- domain = host
65
- base_domain = f".{domain}"
66
- handler.set_cookie(
67
- "dataflow_session",
68
- session_id,
69
- domain=base_domain,
70
- path="/",
71
- expires=expires,
72
- secure=True,
73
- httponly=True,
74
- samesite="None"
133
+ new_user = m_user.User(
134
+ user_name=username,
135
+ first_name=first_name or username,
136
+ last_name=last_name or "",
137
+ email=email,
138
+ role_id=role_id,
139
+ active='Y',
140
+ password='user@123',
141
+ )
142
+
143
+ self.db.add(new_user)
144
+ self.db.commit()
145
+ self.db.refresh(new_user)
146
+
147
+ self.log.info(f"Created new user: {username} with email: {email}")
148
+ return new_user
149
+
150
+ except Exception as e:
151
+ self.log.error(f"Error creating new user: {str(e)}")
152
+ self.db.rollback()
153
+ return None
154
+
155
+ async def authenticate_dataflow(self, handler, data):
156
+ if not (self.enable_dataflow_auth and isinstance(data, dict) and data.get("username") and data.get("password")):
157
+ return None
158
+ username = data["username"]
159
+ password = data["password"]
160
+ self.log.info(f"Attempting Dataflow authentication for user: {username}")
161
+ try:
162
+ user = (
163
+ self.db.query(m_user.User)
164
+ .filter(m_user.User.user_name == username)
165
+ .first()
166
+ )
167
+ if not user or user.password != password:
168
+ self.log.warning(f"Dataflow authentication failed for user: {username}")
169
+ return None
170
+ session_id = self.get_or_create_session(user.user_id)
171
+ self.set_session_cookie(handler, session_id)
172
+ self.log.info(f"Dataflow authentication successful for user: {username}")
173
+ return {"name": username, "session_id": session_id, "auth_state": {}}
174
+ except Exception as e:
175
+ self.log.error(f"Dataflow authentication error: {str(e)}")
176
+ return None
177
+ finally:
178
+ self.db.close()
179
+
180
+ class DataflowGoogleAuthenticator(DataflowBaseAuthenticator, GoogleOAuthenticator):
181
+ dataflow_oauth_type = Unicode(
182
+ default_value="google",
183
+ config=True,
184
+ help="The OAuth provider type for DataflowHub (e.g., github, google)"
185
+ )
186
+ google_client_id = Unicode(config=True, help="Google OAuth client ID")
187
+ google_client_secret = Unicode(config=True, help="Google OAuth client secret")
188
+
189
+ def __init__(self, **kwargs):
190
+ super().__init__(**kwargs)
191
+ self.client_id = self.google_client_id
192
+ self.client_secret = self.google_client_secret
193
+ self.dataflow_oauth_type = self.dataflow_oauth_type
194
+ self.log.info(f"DataflowGoogleAuthenticator initialized with google_client_id={self.google_client_id}, "
195
+ f"oauth_callback_url={self.oauth_callback_url}, "
196
+ f"enable_dataflow_auth={self.enable_dataflow_auth}")
197
+
198
+ async def authenticate(self, handler, data):
199
+ self.log.info(f"Authenticate called with data: {data}, request_uri: {handler.request.uri}")
200
+ result = await self.authenticate_dataflow(handler, data)
201
+ if result:
202
+ return result
203
+ try:
204
+ user = await super().authenticate(handler, data)
205
+ self.log.info(f"Google OAuth authentication returned: {user}")
206
+ if not user:
207
+ self.log.warning("Google OAuth authentication failed: No user data returned")
208
+ return None
209
+
210
+ email = user["name"]
211
+ db_user = (
212
+ self.db.query(m_user.User)
213
+ .filter(m_user.User.email == email)
214
+ .first()
75
215
  )
76
- user_dict = {"name": username, "session_id": session_id}
77
- return user_dict
216
+
217
+ if not db_user:
218
+ self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
219
+ # Extract additional info from user data if available
220
+ auth_state = user.get("auth_state", {})
221
+ user_info = auth_state.get("user", {}) if auth_state else {}
222
+
223
+ first_name = user_info.get("name")
224
+ last_name = user_info.get("last_name")
225
+
226
+ db_user = self.create_new_user(email, first_name, last_name)
227
+ if not db_user:
228
+ self.log.error(f"Failed to create new user for email: {email}")
229
+ return None
230
+
231
+ username = db_user.user_name
232
+ session_id = self.get_or_create_session(db_user.user_id)
233
+ self.set_session_cookie(handler, session_id)
234
+ self.log.info(f"Google OAuth completed for user: {username}, session_id={session_id}")
235
+ return {
236
+ "name": username,
237
+ "session_id": session_id,
238
+ "auth_state": user.get("auth_state", {})
239
+ }
240
+ except Exception as e:
241
+ self.log.error(f"Google OAuth authentication error: {str(e)}", exc_info=True)
242
+ return None
243
+ finally:
244
+ self.db.close()
245
+
246
+ class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticator):
247
+ azure_client_id = Unicode(config=True, help="Azure AD OAuth client ID")
248
+ azure_client_secret = Unicode(config=True, help="Azure AD OAuth client secret")
249
+ azure_tenant_id = Unicode(config=True, help="Azure AD tenant ID")
250
+ azure_scope = Unicode("openid profile email", config=True, help="Azure AD OAuth scopes")
78
251
 
252
+ def __init__(self, **kwargs):
253
+ super().__init__(**kwargs)
254
+ self.client_id = self.azure_client_id
255
+ self.client_secret = self.azure_client_secret
256
+ self.tenant_id = self.azure_tenant_id
257
+ self.scope = self.azure_scope.split()
258
+ self.dataflow_oauth_type = self.dataflow_oauth_type
259
+ self.log.info(f"DataflowAzureAuthenticator initialized with azure_client_id={self.azure_client_id}, "
260
+ f"oauth_callback_url={self.oauth_callback_url}, "
261
+ f"enable_dataflow_auth={self.enable_dataflow_auth}")
262
+
263
+ async def authenticate(self, handler, data):
264
+ result = await self.authenticate_dataflow(handler, data)
265
+ if result:
266
+ return result
267
+ try:
268
+ user = await super().authenticate(handler, data)
269
+ self.log.info(f"Azure AD OAuth authentication returned: {user}")
270
+ if not user:
271
+ self.log.warning("Azure AD OAuth authentication failed: No user data returned")
272
+ return None
273
+
274
+ email = user.get("email") or user.get("preferred_username")
275
+ if not email:
276
+ self.log.warning("Azure AD OAuth authentication failed: No email in user data")
277
+ return None
278
+
279
+ db_user = (
280
+ self.db.query(m_user.User)
281
+ .filter(m_user.User.email == email)
282
+ .first()
283
+ )
284
+
285
+ if not db_user:
286
+ self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
287
+ # Extract additional info from user data if available
288
+ auth_state = user.get("auth_state", {})
289
+ user_info = auth_state.get("user", {}) if auth_state else {}
290
+
291
+ first_name = user_info.get("given_name") or user.get("given_name")
292
+ last_name = user_info.get("family_name") or user.get("family_name")
293
+
294
+ db_user = self.create_new_user(email, first_name, last_name)
295
+ if not db_user:
296
+ self.log.error(f"Failed to create new user for email: {email}")
297
+ return None
298
+
299
+ username = db_user.user_name
300
+ session_id = self.get_or_create_session(db_user.user_id)
301
+ self.set_session_cookie(handler, session_id)
302
+ self.log.info(f"Azure AD OAuth completed for user: {username}, session_id={session_id}")
303
+ return {
304
+ "name": username,
305
+ "session_id": session_id,
306
+ "auth_state": user.get("auth_state", {})
307
+ }
79
308
  except Exception as e:
309
+ self.log.error(f"Azure AD OAuth authentication error: {str(e)}", exc_info=True)
80
310
  return None
81
-
82
311
  finally:
83
- self.db.close()
312
+ self.db.close()
313
+
314
+ auth_type = os.environ.get("DATAFLOW_AUTH_TYPE", "google")
315
+
316
+ if auth_type == "google":
317
+ BaseAuthenticator = DataflowGoogleAuthenticator
318
+ else:
319
+ BaseAuthenticator = DataflowAzureAuthenticator
320
+
321
+ class DataflowHubAuthenticator(BaseAuthenticator):
322
+ pass
dataflow/models/role.py CHANGED
@@ -13,7 +13,7 @@ class Role(Base):
13
13
  id = Column(Integer, primary_key=True, index=True, autoincrement=True, nullable=False)
14
14
  name = Column(String, unique=True, nullable=False)
15
15
  description = Column(String, nullable=True)
16
- base_role = Column(Enum('admin', 'user', name='base_role_field'), default='user', nullable=False)
16
+ base_role = Column(Enum('admin', 'user', 'applicant', name='base_role_field'), default='user', nullable=False)
17
17
 
18
18
  users = relationship("User", back_populates="role_details", cascade="all, delete-orphan")
19
19
  role_server_assocs = relationship("RoleServer", back_populates="role")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataflow-core
3
- Version: 2.1.1
3
+ Version: 2.1.2
4
4
  Summary: Dataflow core package
5
5
  Author: Dataflow
6
6
  Author-email:
@@ -1,6 +1,6 @@
1
1
  authenticator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  authenticator/dataflowairflowauthenticator.py,sha256=gEdCiL2yJQ7lYvAwbrjcAkccVMfehoMJldw9eU7cc2s,2243
3
- authenticator/dataflowhubauthenticator.py,sha256=A0S0_PJVCPiTfuoLormmO0xNDqjIh00Cf93P0-mF0cw,2913
3
+ authenticator/dataflowhubauthenticator.py,sha256=-wFEPEQfgCgtghC0Eo9e18B-OU1JsPKam0tbQaYCg1s,13563
4
4
  authenticator/dataflowsupersetauthenticator.py,sha256=NkAmDaIc-ui-qEolu4xz_UY7P_2g8111hwNjPvAOW1Q,2839
5
5
  dataflow/__init__.py,sha256=WTRg8HMpMWSgxYJ9ZGVldx4k07fAbta3mBmZ1hG9mWE,30
6
6
  dataflow/configuration.py,sha256=7To6XwH1eESiYp39eqPcswXWwrdBUdPF6xN6WnazOF0,663
@@ -19,7 +19,7 @@ dataflow/models/pinned_projects.py,sha256=rkpPX_f2U9HjmrRo7_K8rnZIeXuQKGq6hYTrtL
19
19
  dataflow/models/project_details.py,sha256=94wTygXv9iGB0w8g_6vtkB5ZqIzpEv1W9uWwCA4hM0Y,1078
20
20
  dataflow/models/recent_project_studio.py,sha256=m12KGCsv453C1ijHjfVD8E7cJ7Og_0N8uc7_9VlfkYw,812
21
21
  dataflow/models/recent_projects.py,sha256=QqDlk3ll7tBaQl5hqvRarlB9_SUBuN44muLIuTVbPe0,301
22
- dataflow/models/role.py,sha256=i068kzy1UYfl2XNPGl8O4h-1Dcpxb_zzrbI9IW2FAxI,680
22
+ dataflow/models/role.py,sha256=_I5F2TFox_k2-LGgjVuO2PIW9-gpwpoX8Te7m024A8k,693
23
23
  dataflow/models/role_server.py,sha256=mMcfjsGX1cY8hOAOBBmrZgw8ozdfuvjKJoBlR6F0Kdc,689
24
24
  dataflow/models/runtime.py,sha256=OiuBfZTMg81U10GS00DxfhiAmHlcyQUw5LBR8RaPl7s,415
25
25
  dataflow/models/server_config.py,sha256=GTMtQfgtuvKUbxV16VhEpKGhYoNISFLRWdUPqBJmYbM,1365
@@ -36,8 +36,8 @@ dataflow/utils/aws_secrets_manager.py,sha256=A_fNs9VNah9dDdl9NhqizJamYU7xr2v_GXl
36
36
  dataflow/utils/get_current_user.py,sha256=akjcUyTpmMdAZj9LFGSTs76hjBRjltNk9hLUqC_BdkA,1140
37
37
  dataflow/utils/json_handler.py,sha256=5_7WdypegRBDe2HSqBXyrJAdd92wsha8qRcmQvCj1TA,782
38
38
  dataflow/utils/logger.py,sha256=7BFrOq5Oiqn8P4XZbgJzMP5O07d2fpdECbbfsjrUuHw,1213
39
- dataflow_core-2.1.1.dist-info/METADATA,sha256=WdN8Xh9UGcvgYnv-i-CB9fANkjBlUjcw_NawSc2_EcY,301
40
- dataflow_core-2.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
- dataflow_core-2.1.1.dist-info/entry_points.txt,sha256=ppj_EIbYrJJwCPg1kfdsZk5q1N-Ejfis1neYrnjhO8o,117
42
- dataflow_core-2.1.1.dist-info/top_level.txt,sha256=SZsUOpSCK9ntUy-3Tusxzf5A2e8ebwD8vouPb1dPt_8,23
43
- dataflow_core-2.1.1.dist-info/RECORD,,
39
+ dataflow_core-2.1.2.dist-info/METADATA,sha256=4eTzui9zX33CbX6cQdj1p2uzZIq6OMD-ex_mAWzEY2E,301
40
+ dataflow_core-2.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
+ dataflow_core-2.1.2.dist-info/entry_points.txt,sha256=ppj_EIbYrJJwCPg1kfdsZk5q1N-Ejfis1neYrnjhO8o,117
42
+ dataflow_core-2.1.2.dist-info/top_level.txt,sha256=SZsUOpSCK9ntUy-3Tusxzf5A2e8ebwD8vouPb1dPt_8,23
43
+ dataflow_core-2.1.2.dist-info/RECORD,,