dataflow-core 2.1.1__py3-none-any.whl → 2.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- authenticator/dataflowhubauthenticator.py +301 -62
- dataflow/environment.py +319 -133
- dataflow/models/environment.py +5 -6
- dataflow/models/environment_status.py +1 -1
- dataflow/models/role.py +1 -1
- dataflow/scripts/clone_environment.sh +2 -0
- dataflow/scripts/create_environment.sh +18 -6
- dataflow/scripts/update_environment.sh +36 -0
- {dataflow_core-2.1.1.dist-info → dataflow_core-2.1.3.dist-info}/METADATA +1 -1
- {dataflow_core-2.1.1.dist-info → dataflow_core-2.1.3.dist-info}/RECORD +13 -12
- {dataflow_core-2.1.1.dist-info → dataflow_core-2.1.3.dist-info}/WHEEL +0 -0
- {dataflow_core-2.1.1.dist-info → dataflow_core-2.1.3.dist-info}/entry_points.txt +0 -0
- {dataflow_core-2.1.1.dist-info → dataflow_core-2.1.3.dist-info}/top_level.txt +0 -0
|
@@ -1,83 +1,322 @@
|
|
|
1
|
-
|
|
2
|
-
from dataflow.models import (
|
|
3
|
-
user as m_user,
|
|
4
|
-
session as m_session
|
|
5
|
-
)
|
|
6
|
-
from datetime import datetime, timedelta, timezone
|
|
1
|
+
import os
|
|
7
2
|
import uuid
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
from zoneinfo import ZoneInfo
|
|
6
|
+
from traitlets import Bool, Unicode
|
|
8
7
|
from jupyterhub.auth import Authenticator
|
|
8
|
+
from oauthenticator.google import GoogleOAuthenticator
|
|
9
|
+
from oauthenticator.azuread import AzureAdOAuthenticator
|
|
10
|
+
from dataflow.db import get_db
|
|
11
|
+
from dataflow.models import user as m_user, session as m_session, role as m_role
|
|
12
|
+
|
|
13
|
+
class DataflowBaseAuthenticator(Authenticator):
|
|
14
|
+
enable_dataflow_auth = Bool(True, config=True, help="Enable username/password authentication")
|
|
9
15
|
|
|
10
|
-
class DataflowHubAuthenticator(Authenticator):
|
|
11
16
|
def __init__(self, **kwargs):
|
|
12
17
|
super().__init__(**kwargs)
|
|
13
|
-
|
|
14
|
-
|
|
18
|
+
try:
|
|
19
|
+
self.db = next(get_db())
|
|
20
|
+
m_user.Base.metadata.create_all(bind=self.db.get_bind(), checkfirst=True)
|
|
21
|
+
m_session.Base.metadata.create_all(bind=self.db.get_bind(), checkfirst=True)
|
|
22
|
+
self.log.info("Dataflow database initialized successfully")
|
|
23
|
+
except Exception as e:
|
|
24
|
+
self.log.error(f"Failed to initialize Dataflow database: {str(e)}")
|
|
25
|
+
raise
|
|
26
|
+
|
|
15
27
|
def generate_session_id(self):
|
|
16
28
|
return str(uuid.uuid4())
|
|
29
|
+
|
|
30
|
+
def set_session_cookie(self, handler, session_id):
|
|
31
|
+
expires = datetime.now(ZoneInfo("UTC")) + timedelta(days=365)
|
|
32
|
+
host = handler.request.host
|
|
33
|
+
domain = '.'.join(host.split('.')[-2:]) if len(host.split('.')) >= 2 else host
|
|
34
|
+
handler.set_cookie(
|
|
35
|
+
"dataflow_session",
|
|
36
|
+
session_id,
|
|
37
|
+
domain=f".{domain}",
|
|
38
|
+
path="/",
|
|
39
|
+
expires=expires,
|
|
40
|
+
secure=True,
|
|
41
|
+
httponly=True,
|
|
42
|
+
samesite="None"
|
|
43
|
+
)
|
|
44
|
+
self.log.info(f"Set session cookie: dataflow_session={session_id} for host={host}")
|
|
45
|
+
|
|
46
|
+
def get_or_create_session(self, user_id):
|
|
47
|
+
existing_session = (
|
|
48
|
+
self.db.query(m_session.Session)
|
|
49
|
+
.filter(m_session.Session.user_id == str(user_id))
|
|
50
|
+
.first()
|
|
51
|
+
)
|
|
52
|
+
if existing_session:
|
|
53
|
+
self.log.info(f"Reusing existing session: {existing_session.session_id}")
|
|
54
|
+
return existing_session.session_id
|
|
55
|
+
session_id = self.generate_session_id()
|
|
56
|
+
while self.db.query(m_session.Session).filter(
|
|
57
|
+
m_session.Session.session_id == session_id
|
|
58
|
+
).first():
|
|
59
|
+
session_id = self.generate_session_id()
|
|
60
|
+
db_item = m_session.Session(user_id=user_id, session_id=session_id)
|
|
61
|
+
self.db.add(db_item)
|
|
62
|
+
self.db.commit()
|
|
63
|
+
self.db.refresh(db_item)
|
|
64
|
+
self.log.info(f"Created new session: {session_id}")
|
|
65
|
+
return session_id
|
|
17
66
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
67
|
+
def check_blocked_users(self, username, authenticated):
|
|
68
|
+
self.log.info(f"Checking blocked users for {username}: authenticated={authenticated}, allowed_users={self.allowed_users}")
|
|
69
|
+
|
|
70
|
+
if not authenticated:
|
|
71
|
+
self.log.warning(f"No authenticated data for user: {username}")
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
if isinstance(authenticated, dict) and "session_id" in authenticated:
|
|
75
|
+
self.log.info(f"Allowing Dataflow authentication for user: {username}")
|
|
76
|
+
return username
|
|
77
|
+
|
|
78
|
+
return super().check_blocked_users(username, authenticated)
|
|
22
79
|
|
|
80
|
+
def get_applicant_role_id(self):
|
|
81
|
+
"""Get the role ID for 'Applicant' role"""
|
|
23
82
|
try:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
83
|
+
applicant_role = (
|
|
84
|
+
self.db.query(m_role.Role)
|
|
85
|
+
.filter(m_role.Role.name == "Applicant")
|
|
86
|
+
.first()
|
|
87
|
+
)
|
|
88
|
+
if applicant_role:
|
|
89
|
+
return applicant_role.id
|
|
90
|
+
else:
|
|
91
|
+
self.log.warning("Applicant role not found in database")
|
|
92
|
+
return None
|
|
93
|
+
except Exception as e:
|
|
94
|
+
self.log.error(f"Error getting Applicant role: {str(e)}")
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
def extract_username_from_email(self, email):
|
|
98
|
+
"""Extract username from email by removing domain"""
|
|
99
|
+
if '@' in email:
|
|
100
|
+
return email.split('@')[0]
|
|
101
|
+
return email
|
|
27
102
|
|
|
28
|
-
|
|
103
|
+
def create_new_user(self, email, first_name=None, last_name=None):
|
|
104
|
+
"""Create a new user with Applicant role"""
|
|
105
|
+
try:
|
|
106
|
+
role_id = self.get_applicant_role_id()
|
|
107
|
+
if not role_id:
|
|
108
|
+
self.log.error("Cannot create user: Applicant role not found")
|
|
29
109
|
return None
|
|
30
110
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
.
|
|
111
|
+
username = self.extract_username_from_email(email)
|
|
112
|
+
username = re.sub(r'[^A-Za-z0-9]', '', username)
|
|
113
|
+
if not username:
|
|
114
|
+
self.log.error("Cannot create user: Username is empty")
|
|
115
|
+
return None
|
|
116
|
+
existing_user = (
|
|
117
|
+
self.db.query(m_user.User)
|
|
118
|
+
.filter(m_user.User.user_name == username)
|
|
35
119
|
.first()
|
|
36
120
|
)
|
|
121
|
+
if existing_user:
|
|
122
|
+
counter = 1
|
|
123
|
+
original_username = username
|
|
124
|
+
while existing_user:
|
|
125
|
+
username = f"{original_username}_{counter}"
|
|
126
|
+
existing_user = (
|
|
127
|
+
self.db.query(m_user.User)
|
|
128
|
+
.filter(m_user.User.user_name == username)
|
|
129
|
+
.first()
|
|
130
|
+
)
|
|
131
|
+
counter += 1
|
|
37
132
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
133
|
+
new_user = m_user.User(
|
|
134
|
+
user_name=username,
|
|
135
|
+
first_name=first_name or username,
|
|
136
|
+
last_name=last_name or "",
|
|
137
|
+
email=email,
|
|
138
|
+
role_id=role_id,
|
|
139
|
+
active='Y',
|
|
140
|
+
password='user@123',
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
self.db.add(new_user)
|
|
144
|
+
self.db.commit()
|
|
145
|
+
self.db.refresh(new_user)
|
|
146
|
+
|
|
147
|
+
self.log.info(f"Created new user: {username} with email: {email}")
|
|
148
|
+
return new_user
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
self.log.error(f"Error creating new user: {str(e)}")
|
|
152
|
+
self.db.rollback()
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
async def authenticate_dataflow(self, handler, data):
|
|
156
|
+
if not (self.enable_dataflow_auth and isinstance(data, dict) and data.get("username") and data.get("password")):
|
|
157
|
+
return None
|
|
158
|
+
username = data["username"]
|
|
159
|
+
password = data["password"]
|
|
160
|
+
self.log.info(f"Attempting Dataflow authentication for user: {username}")
|
|
161
|
+
try:
|
|
162
|
+
user = (
|
|
163
|
+
self.db.query(m_user.User)
|
|
164
|
+
.filter(m_user.User.user_name == username)
|
|
165
|
+
.first()
|
|
166
|
+
)
|
|
167
|
+
if not user or user.password != password:
|
|
168
|
+
self.log.warning(f"Dataflow authentication failed for user: {username}")
|
|
169
|
+
return None
|
|
170
|
+
session_id = self.get_or_create_session(user.user_id)
|
|
171
|
+
self.set_session_cookie(handler, session_id)
|
|
172
|
+
self.log.info(f"Dataflow authentication successful for user: {username}")
|
|
173
|
+
return {"name": username, "session_id": session_id, "auth_state": {}}
|
|
174
|
+
except Exception as e:
|
|
175
|
+
self.log.error(f"Dataflow authentication error: {str(e)}")
|
|
176
|
+
return None
|
|
177
|
+
finally:
|
|
178
|
+
self.db.close()
|
|
179
|
+
|
|
180
|
+
class DataflowGoogleAuthenticator(DataflowBaseAuthenticator, GoogleOAuthenticator):
|
|
181
|
+
dataflow_oauth_type = Unicode(
|
|
182
|
+
default_value="google",
|
|
183
|
+
config=True,
|
|
184
|
+
help="The OAuth provider type for DataflowHub (e.g., github, google)"
|
|
185
|
+
)
|
|
186
|
+
google_client_id = Unicode(config=True, help="Google OAuth client ID")
|
|
187
|
+
google_client_secret = Unicode(config=True, help="Google OAuth client secret")
|
|
188
|
+
|
|
189
|
+
def __init__(self, **kwargs):
|
|
190
|
+
super().__init__(**kwargs)
|
|
191
|
+
self.client_id = self.google_client_id
|
|
192
|
+
self.client_secret = self.google_client_secret
|
|
193
|
+
self.dataflow_oauth_type = self.dataflow_oauth_type
|
|
194
|
+
self.log.info(f"DataflowGoogleAuthenticator initialized with google_client_id={self.google_client_id}, "
|
|
195
|
+
f"oauth_callback_url={self.oauth_callback_url}, "
|
|
196
|
+
f"enable_dataflow_auth={self.enable_dataflow_auth}")
|
|
197
|
+
|
|
198
|
+
async def authenticate(self, handler, data):
|
|
199
|
+
self.log.info(f"Authenticate called with data: {data}, request_uri: {handler.request.uri}")
|
|
200
|
+
result = await self.authenticate_dataflow(handler, data)
|
|
201
|
+
if result:
|
|
202
|
+
return result
|
|
203
|
+
try:
|
|
204
|
+
user = await super().authenticate(handler, data)
|
|
205
|
+
self.log.info(f"Google OAuth authentication returned: {user}")
|
|
206
|
+
if not user:
|
|
207
|
+
self.log.warning("Google OAuth authentication failed: No user data returned")
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
email = user["name"]
|
|
211
|
+
db_user = (
|
|
212
|
+
self.db.query(m_user.User)
|
|
213
|
+
.filter(m_user.User.email == email)
|
|
214
|
+
.first()
|
|
75
215
|
)
|
|
76
|
-
|
|
77
|
-
|
|
216
|
+
|
|
217
|
+
if not db_user:
|
|
218
|
+
self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
|
|
219
|
+
# Extract additional info from user data if available
|
|
220
|
+
auth_state = user.get("auth_state", {})
|
|
221
|
+
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
222
|
+
|
|
223
|
+
first_name = user_info.get("name")
|
|
224
|
+
last_name = user_info.get("last_name")
|
|
225
|
+
|
|
226
|
+
db_user = self.create_new_user(email, first_name, last_name)
|
|
227
|
+
if not db_user:
|
|
228
|
+
self.log.error(f"Failed to create new user for email: {email}")
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
username = db_user.user_name
|
|
232
|
+
session_id = self.get_or_create_session(db_user.user_id)
|
|
233
|
+
self.set_session_cookie(handler, session_id)
|
|
234
|
+
self.log.info(f"Google OAuth completed for user: {username}, session_id={session_id}")
|
|
235
|
+
return {
|
|
236
|
+
"name": username,
|
|
237
|
+
"session_id": session_id,
|
|
238
|
+
"auth_state": user.get("auth_state", {})
|
|
239
|
+
}
|
|
240
|
+
except Exception as e:
|
|
241
|
+
self.log.error(f"Google OAuth authentication error: {str(e)}", exc_info=True)
|
|
242
|
+
return None
|
|
243
|
+
finally:
|
|
244
|
+
self.db.close()
|
|
245
|
+
|
|
246
|
+
class DataflowAzureAuthenticator(DataflowBaseAuthenticator, AzureAdOAuthenticator):
|
|
247
|
+
azure_client_id = Unicode(config=True, help="Azure AD OAuth client ID")
|
|
248
|
+
azure_client_secret = Unicode(config=True, help="Azure AD OAuth client secret")
|
|
249
|
+
azure_tenant_id = Unicode(config=True, help="Azure AD tenant ID")
|
|
250
|
+
azure_scope = Unicode("openid profile email", config=True, help="Azure AD OAuth scopes")
|
|
78
251
|
|
|
252
|
+
def __init__(self, **kwargs):
|
|
253
|
+
super().__init__(**kwargs)
|
|
254
|
+
self.client_id = self.azure_client_id
|
|
255
|
+
self.client_secret = self.azure_client_secret
|
|
256
|
+
self.tenant_id = self.azure_tenant_id
|
|
257
|
+
self.scope = self.azure_scope.split()
|
|
258
|
+
self.dataflow_oauth_type = self.dataflow_oauth_type
|
|
259
|
+
self.log.info(f"DataflowAzureAuthenticator initialized with azure_client_id={self.azure_client_id}, "
|
|
260
|
+
f"oauth_callback_url={self.oauth_callback_url}, "
|
|
261
|
+
f"enable_dataflow_auth={self.enable_dataflow_auth}")
|
|
262
|
+
|
|
263
|
+
async def authenticate(self, handler, data):
|
|
264
|
+
result = await self.authenticate_dataflow(handler, data)
|
|
265
|
+
if result:
|
|
266
|
+
return result
|
|
267
|
+
try:
|
|
268
|
+
user = await super().authenticate(handler, data)
|
|
269
|
+
self.log.info(f"Azure AD OAuth authentication returned: {user}")
|
|
270
|
+
if not user:
|
|
271
|
+
self.log.warning("Azure AD OAuth authentication failed: No user data returned")
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
email = user.get("email") or user.get("preferred_username")
|
|
275
|
+
if not email:
|
|
276
|
+
self.log.warning("Azure AD OAuth authentication failed: No email in user data")
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
db_user = (
|
|
280
|
+
self.db.query(m_user.User)
|
|
281
|
+
.filter(m_user.User.email == email)
|
|
282
|
+
.first()
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
if not db_user:
|
|
286
|
+
self.log.info(f"User with email {email} not found in Dataflow database, creating new user")
|
|
287
|
+
# Extract additional info from user data if available
|
|
288
|
+
auth_state = user.get("auth_state", {})
|
|
289
|
+
user_info = auth_state.get("user", {}) if auth_state else {}
|
|
290
|
+
|
|
291
|
+
first_name = user_info.get("given_name") or user.get("given_name")
|
|
292
|
+
last_name = user_info.get("family_name") or user.get("family_name")
|
|
293
|
+
|
|
294
|
+
db_user = self.create_new_user(email, first_name, last_name)
|
|
295
|
+
if not db_user:
|
|
296
|
+
self.log.error(f"Failed to create new user for email: {email}")
|
|
297
|
+
return None
|
|
298
|
+
|
|
299
|
+
username = db_user.user_name
|
|
300
|
+
session_id = self.get_or_create_session(db_user.user_id)
|
|
301
|
+
self.set_session_cookie(handler, session_id)
|
|
302
|
+
self.log.info(f"Azure AD OAuth completed for user: {username}, session_id={session_id}")
|
|
303
|
+
return {
|
|
304
|
+
"name": username,
|
|
305
|
+
"session_id": session_id,
|
|
306
|
+
"auth_state": user.get("auth_state", {})
|
|
307
|
+
}
|
|
79
308
|
except Exception as e:
|
|
309
|
+
self.log.error(f"Azure AD OAuth authentication error: {str(e)}", exc_info=True)
|
|
80
310
|
return None
|
|
81
|
-
|
|
82
311
|
finally:
|
|
83
|
-
self.db.close()
|
|
312
|
+
self.db.close()
|
|
313
|
+
|
|
314
|
+
auth_type = os.environ.get("DATAFLOW_AUTH_TYPE", "google")
|
|
315
|
+
|
|
316
|
+
if auth_type == "google":
|
|
317
|
+
BaseAuthenticator = DataflowGoogleAuthenticator
|
|
318
|
+
else:
|
|
319
|
+
BaseAuthenticator = DataflowAzureAuthenticator
|
|
320
|
+
|
|
321
|
+
class DataflowHubAuthenticator(BaseAuthenticator):
|
|
322
|
+
pass
|
dataflow/environment.py
CHANGED
|
@@ -1,18 +1,21 @@
|
|
|
1
|
-
import os, shutil, subprocess, datetime
|
|
1
|
+
import os, shutil, subprocess, datetime, yaml, re
|
|
2
2
|
from .models.environment import JobLogs, Environment
|
|
3
3
|
import json, asyncio, pkg_resources
|
|
4
4
|
from sqlalchemy.orm import Session
|
|
5
5
|
from .configuration import ConfigurationManager
|
|
6
|
+
from .utils.logger import CustomLogger
|
|
6
7
|
|
|
7
8
|
class EnvironmentManager:
|
|
8
9
|
def __init__(self):
|
|
9
10
|
"""Initialize the EnvironmentManager"""
|
|
10
11
|
self.config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
|
|
11
|
-
self.
|
|
12
|
-
self.draft_env_path = self.config.get_config_value('paths', 'drafts_env_path')
|
|
12
|
+
self.env_base_path = self.config.get_config_value('paths', 'env_path')
|
|
13
13
|
self.env_logs_path = self.config.get_config_value('paths', 'env_logs_path')
|
|
14
|
+
self.env_version_path = self.config.get_config_value('paths', 'env_versions_path')
|
|
15
|
+
os.makedirs(self.env_version_path, exist_ok=True)
|
|
16
|
+
self.logger = CustomLogger().get_logger(__name__)
|
|
14
17
|
|
|
15
|
-
async def create_env(self, env_name, py_version,
|
|
18
|
+
async def create_env(self, env_name, py_version, pip_libraries, conda_libraries, status, env_version='1', user_name=None, db:Session=None):
|
|
16
19
|
"""
|
|
17
20
|
Creates a conda environment with specified Python version and packages.
|
|
18
21
|
|
|
@@ -26,47 +29,56 @@ class EnvironmentManager:
|
|
|
26
29
|
db (Session): Database session (optional, will create if None)
|
|
27
30
|
|
|
28
31
|
Returns:
|
|
29
|
-
str: Build status ('success' or '
|
|
32
|
+
str: Build status ('success' or 'failed')
|
|
30
33
|
"""
|
|
31
34
|
# Set up logging
|
|
32
35
|
log_file_location = None
|
|
33
36
|
if db:
|
|
34
37
|
log_file_location = self._setup_logging(env_name, env_version, user_name, db)
|
|
35
38
|
|
|
39
|
+
# Create the conda environment YAML file
|
|
40
|
+
yaml_path = os.path.join(self.env_version_path, f"{env_name}_v{env_version}.yaml")
|
|
41
|
+
self.create_conda_yaml(
|
|
42
|
+
yaml_path=yaml_path,
|
|
43
|
+
env_name=env_name,
|
|
44
|
+
python_version=py_version,
|
|
45
|
+
conda_packages=conda_libraries,
|
|
46
|
+
pip_packages=pip_libraries
|
|
47
|
+
)
|
|
48
|
+
|
|
36
49
|
if status == "published":
|
|
37
50
|
return await self._execute_env_operation(
|
|
38
51
|
env_name=env_name,
|
|
39
|
-
py_version=py_version,
|
|
40
|
-
py_requirements=py_requirements,
|
|
41
52
|
status="published",
|
|
42
|
-
|
|
43
|
-
|
|
53
|
+
mode="create",
|
|
54
|
+
yaml_file_path=yaml_path,
|
|
55
|
+
version=int(env_version)
|
|
44
56
|
)
|
|
45
57
|
elif status == "draft":
|
|
46
|
-
|
|
58
|
+
mode = "create" if env_version == '1' else "update"
|
|
47
59
|
build_status = await self._execute_env_operation(
|
|
48
60
|
env_name=env_name,
|
|
49
|
-
py_version=py_version,
|
|
50
|
-
py_requirements=py_requirements,
|
|
51
61
|
status=status,
|
|
52
|
-
|
|
62
|
+
mode=mode,
|
|
63
|
+
yaml_file_path=yaml_path,
|
|
53
64
|
log_file_location=log_file_location,
|
|
54
|
-
|
|
65
|
+
version=int(env_version)
|
|
55
66
|
)
|
|
56
67
|
|
|
57
68
|
# Update job log status if db was provided
|
|
58
69
|
if db and log_file_location:
|
|
59
70
|
log_file_name = os.path.basename(log_file_location)
|
|
60
71
|
await self._update_job_status(log_file_name, build_status, log_file_location, db)
|
|
61
|
-
|
|
62
|
-
self.update_environment_db(env_name, env_version,
|
|
72
|
+
pip_libraries, conda_libraries = self.update_library_versions(yaml_path)
|
|
73
|
+
self.update_environment_db(env_name, env_version, pip_libraries, conda_libraries, build_status, db)
|
|
63
74
|
|
|
64
75
|
return build_status
|
|
65
76
|
|
|
66
77
|
else:
|
|
78
|
+
self.logger.error(f"Invalid status '{status}' provided for environment creation.")
|
|
67
79
|
raise ValueError("Invalid status. Use 'draft' or 'published'.")
|
|
68
80
|
|
|
69
|
-
async def clone_env(self, source_path,
|
|
81
|
+
async def clone_env(self, source_path, env_name, pip_libraries, conda_libraries, user_name, db=None):
|
|
70
82
|
"""
|
|
71
83
|
Clones an existing conda environment.
|
|
72
84
|
|
|
@@ -79,56 +91,95 @@ class EnvironmentManager:
|
|
|
79
91
|
db (Session): Database session (optional, will create if None)
|
|
80
92
|
|
|
81
93
|
Returns:
|
|
82
|
-
str: Build status ('success' or '
|
|
94
|
+
str: Build status ('success' or 'failed')
|
|
83
95
|
"""
|
|
84
96
|
# Set up logging
|
|
85
97
|
log_file_location = None
|
|
86
98
|
if db:
|
|
87
|
-
log_file_location = self._setup_logging(
|
|
99
|
+
log_file_location = self._setup_logging(env_name, "1", user_name, db)
|
|
100
|
+
|
|
101
|
+
yaml_path = f"{self.env_version_path}/{env_name}_v1.yaml"
|
|
88
102
|
|
|
89
103
|
# Perform the clone operation
|
|
90
104
|
clone_status = await self._execute_env_operation(
|
|
91
|
-
env_name=
|
|
92
|
-
source_path=source_path,
|
|
105
|
+
env_name=env_name,
|
|
93
106
|
status="draft",
|
|
94
|
-
|
|
107
|
+
mode="clone",
|
|
108
|
+
yaml_file_path=yaml_path,
|
|
109
|
+
source_path=source_path,
|
|
95
110
|
log_file_location=log_file_location,
|
|
96
|
-
|
|
111
|
+
version=1
|
|
97
112
|
)
|
|
98
113
|
|
|
99
114
|
# Update job log status if db was provided
|
|
100
115
|
if db and log_file_location:
|
|
101
116
|
log_file_name = os.path.basename(log_file_location)
|
|
102
117
|
await self._update_job_status(log_file_name, clone_status, log_file_location, db)
|
|
103
|
-
self.update_environment_db(
|
|
118
|
+
self.update_environment_db(
|
|
119
|
+
env_short_name=env_name,
|
|
120
|
+
version="1",
|
|
121
|
+
pip_libraries=pip_libraries,
|
|
122
|
+
conda_libraries=conda_libraries,
|
|
123
|
+
status=clone_status,
|
|
124
|
+
db=db
|
|
125
|
+
)
|
|
104
126
|
|
|
105
127
|
return clone_status
|
|
106
128
|
|
|
107
|
-
async def
|
|
129
|
+
async def revert_env(self, env_name, curr_version, revert_version, new_version, user_name, db: Session):
|
|
108
130
|
"""
|
|
109
|
-
|
|
131
|
+
Reverts an environment to a previous version.
|
|
110
132
|
|
|
111
133
|
Args:
|
|
112
134
|
env_name (str): Name of the environment
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
status (str): Environment status ('draft' or 'published')
|
|
116
|
-
env_version (str): Version of the environment (for draft environments)
|
|
117
|
-
user_name (str): Username who initiated the creation
|
|
135
|
+
version (str): Version to revert to
|
|
136
|
+
db (Session): Database session
|
|
118
137
|
|
|
119
138
|
Returns:
|
|
120
|
-
str: Build status ('success' or '
|
|
139
|
+
str: Build status ('success' or 'failed')
|
|
121
140
|
"""
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
141
|
+
try:
|
|
142
|
+
# Get the YAML file for the specified version
|
|
143
|
+
old_yaml_path = f"{self.env_version_path}/{env_name}_v{revert_version}.yaml"
|
|
144
|
+
new_yaml_path = f"{self.env_version_path}/{env_name}_v{new_version}.yaml"
|
|
145
|
+
if not os.path.exists(old_yaml_path):
|
|
146
|
+
raise FileNotFoundError(f"YAML file for version {revert_version} does not exist.")
|
|
147
|
+
|
|
148
|
+
os.symlink(old_yaml_path, new_yaml_path)
|
|
149
|
+
|
|
150
|
+
log_file_location = None
|
|
151
|
+
if db:
|
|
152
|
+
log_file_location = self._setup_logging(env_name, new_version, user_name, db)
|
|
153
|
+
|
|
154
|
+
# Execute the revert operation
|
|
155
|
+
revert_status = await self._execute_env_operation(
|
|
156
|
+
env_name=env_name,
|
|
157
|
+
status="draft",
|
|
158
|
+
mode="update",
|
|
159
|
+
yaml_file_path=new_yaml_path,
|
|
160
|
+
log_file_location=log_file_location,
|
|
161
|
+
version=int(new_version)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
log_file_name = os.path.basename(log_file_location)
|
|
165
|
+
await self._update_job_status(log_file_name, revert_status, log_file_location, db)
|
|
166
|
+
pip_libraries, conda_libraries = self.update_library_versions(new_yaml_path)
|
|
167
|
+
self.update_environment_db(env_name, new_version, pip_libraries, conda_libraries, revert_status, db)
|
|
130
168
|
|
|
131
|
-
|
|
169
|
+
except Exception as e:
|
|
170
|
+
self.logger.error(f"Failed to revert environment {env_name}: {e}")
|
|
171
|
+
return "failed"
|
|
172
|
+
|
|
173
|
+
async def _execute_env_operation(
|
|
174
|
+
self,
|
|
175
|
+
env_name: str,
|
|
176
|
+
status: str,
|
|
177
|
+
mode: str,
|
|
178
|
+
yaml_file_path: str,
|
|
179
|
+
version: int,
|
|
180
|
+
source_path=None,
|
|
181
|
+
log_file_location=None,
|
|
182
|
+
):
|
|
132
183
|
"""
|
|
133
184
|
Executes environment operations (create or clone).
|
|
134
185
|
|
|
@@ -143,34 +194,32 @@ class EnvironmentManager:
|
|
|
143
194
|
log_file_location (str): Path to log file
|
|
144
195
|
|
|
145
196
|
Returns:
|
|
146
|
-
str: Build status ('success' or '
|
|
197
|
+
str: Build status ('success' or 'failed')
|
|
147
198
|
"""
|
|
199
|
+
self.logger.info(f"Executing environment operation: {env_name}, Status: {status}, Mode: {mode}")
|
|
148
200
|
status = status.lower()
|
|
149
|
-
|
|
150
|
-
env_base_path = self.config.get_config_value('paths', 'published_env_path')
|
|
151
|
-
conda_env_path = os.path.join(env_base_path, env_name)
|
|
152
|
-
else:
|
|
153
|
-
env_base_path = self.config.get_config_value('paths', 'drafts_env_path')
|
|
154
|
-
conda_env_path = os.path.join(env_base_path, env_name, f"{env_name}_v{env_version}")
|
|
201
|
+
conda_env_path = os.path.join(self.env_base_path, env_name)
|
|
155
202
|
|
|
156
203
|
try:
|
|
157
|
-
if
|
|
158
|
-
|
|
204
|
+
if os.path.exists(conda_env_path) and mode == "create":
|
|
205
|
+
raise FileExistsError(f"Environment '{env_name}' already exists at {conda_env_path}.")
|
|
206
|
+
|
|
207
|
+
os.makedirs(conda_env_path, exist_ok=True)
|
|
159
208
|
|
|
160
|
-
if mode == "create":
|
|
161
|
-
# Convert requirements list to comma-separated string
|
|
162
|
-
if isinstance(py_requirements, list):
|
|
163
|
-
py_requirements = ",".join(py_requirements)
|
|
164
|
-
|
|
209
|
+
if mode == "create":
|
|
165
210
|
create_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/create_environment.sh')
|
|
166
|
-
command = ["bash", create_env_script_path,
|
|
167
|
-
|
|
211
|
+
command = ["bash", create_env_script_path, yaml_file_path, conda_env_path]
|
|
212
|
+
|
|
213
|
+
elif mode == "update":
|
|
214
|
+
update_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/update_environment.sh')
|
|
215
|
+
command = ["bash", update_env_script_path, yaml_file_path, conda_env_path]
|
|
216
|
+
|
|
168
217
|
elif mode == "clone":
|
|
169
218
|
clone_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/clone_environment.sh')
|
|
170
219
|
command = ["bash", clone_env_script_path, source_path, conda_env_path]
|
|
171
220
|
|
|
172
221
|
else:
|
|
173
|
-
raise ValueError("Invalid mode. Use 'create' or 'clone'.")
|
|
222
|
+
raise ValueError("Invalid mode. Use 'create', 'update', or 'clone'.")
|
|
174
223
|
|
|
175
224
|
process = await asyncio.create_subprocess_exec(
|
|
176
225
|
*command,
|
|
@@ -184,6 +233,16 @@ class EnvironmentManager:
|
|
|
184
233
|
with open(log_file_location, "a") as log_file:
|
|
185
234
|
success_detected = False
|
|
186
235
|
try:
|
|
236
|
+
# Write an initial log entry to indicate the operation has started
|
|
237
|
+
start_message = {
|
|
238
|
+
"timestamp": self.format_timestamp(),
|
|
239
|
+
"type": "log",
|
|
240
|
+
"content": f"Starting environment {mode} operation for {env_name}"
|
|
241
|
+
}
|
|
242
|
+
log_file.write(json.dumps(start_message) + "\n")
|
|
243
|
+
log_file.flush()
|
|
244
|
+
|
|
245
|
+
# Process stdout line by line
|
|
187
246
|
while True:
|
|
188
247
|
line = await process.stdout.readline()
|
|
189
248
|
if not line:
|
|
@@ -213,7 +272,7 @@ class EnvironmentManager:
|
|
|
213
272
|
}
|
|
214
273
|
log_file.write(json.dumps(error_message_dict) + "\n")
|
|
215
274
|
|
|
216
|
-
final_build_status = "
|
|
275
|
+
final_build_status = "failed" if process.returncode != 0 else "success"
|
|
217
276
|
|
|
218
277
|
except asyncio.CancelledError:
|
|
219
278
|
process.kill()
|
|
@@ -224,27 +283,24 @@ class EnvironmentManager:
|
|
|
224
283
|
"content": msg_content
|
|
225
284
|
}
|
|
226
285
|
log_file.write(json.dumps(cancellation_message) + "\n")
|
|
227
|
-
final_build_status = "
|
|
286
|
+
final_build_status = "failed"
|
|
228
287
|
|
|
229
288
|
finally:
|
|
230
|
-
if final_build_status
|
|
231
|
-
symlink_path = os.path.join(env_base_path, env_name, "default")
|
|
232
|
-
self.update_symlink(symlink_path, conda_env_path)
|
|
233
|
-
elif final_build_status != "success":
|
|
289
|
+
if final_build_status != "success" and version == 1:
|
|
234
290
|
if os.path.exists(conda_env_path):
|
|
235
291
|
shutil.rmtree(conda_env_path)
|
|
236
292
|
|
|
237
293
|
return final_build_status
|
|
238
294
|
|
|
239
295
|
except OSError as e:
|
|
240
|
-
|
|
241
|
-
return "
|
|
296
|
+
self.logger.error(f"OS error while operating on {conda_env_path}: {e}")
|
|
297
|
+
return "failed"
|
|
242
298
|
except subprocess.CalledProcessError as e:
|
|
243
|
-
|
|
244
|
-
return "
|
|
299
|
+
self.logger.error(f"Subprocess error during environment operation: {e}")
|
|
300
|
+
return "failed"
|
|
245
301
|
except Exception as e:
|
|
246
|
-
|
|
247
|
-
return "
|
|
302
|
+
self.logger.error(f"Unexpected error during environment operation for {env_name}: {e}")
|
|
303
|
+
return "failed"
|
|
248
304
|
|
|
249
305
|
def _setup_logging(self, env_name: str, env_version: str, user_name: str, db: Session):
|
|
250
306
|
"""
|
|
@@ -281,7 +337,7 @@ class EnvironmentManager:
|
|
|
281
337
|
Args:
|
|
282
338
|
db (Session): Database session
|
|
283
339
|
log_file_name (str): Name of the log file
|
|
284
|
-
build_status (str): Build status ('success' or '
|
|
340
|
+
build_status (str): Build status ('success' or 'failed')
|
|
285
341
|
log_file_location (str): Path to the log file
|
|
286
342
|
"""
|
|
287
343
|
attempts = 3
|
|
@@ -306,7 +362,7 @@ class EnvironmentManager:
|
|
|
306
362
|
if attempts > 0:
|
|
307
363
|
await asyncio.sleep(retry_delay)
|
|
308
364
|
else:
|
|
309
|
-
|
|
365
|
+
self.logger.error(f"Failed to update job log after multiple attempts: {e}")
|
|
310
366
|
|
|
311
367
|
def create_job_entry(self, user_name: str, db: Session, log_file_name: str, log_file_location: str):
|
|
312
368
|
"""
|
|
@@ -325,8 +381,9 @@ class EnvironmentManager:
|
|
|
325
381
|
|
|
326
382
|
if job:
|
|
327
383
|
if job.status == "success":
|
|
384
|
+
self.logger.error(f"Job with log_file_name '{log_file_name}' already completed successfully.")
|
|
328
385
|
raise ValueError(f"Job with log_file_name '{log_file_name}' already completed successfully.")
|
|
329
|
-
if job.status == "
|
|
386
|
+
if job.status == "failed":
|
|
330
387
|
job.created_at = datetime.datetime.now()
|
|
331
388
|
job.status = "in_progress"
|
|
332
389
|
else:
|
|
@@ -349,7 +406,7 @@ class EnvironmentManager:
|
|
|
349
406
|
Args:
|
|
350
407
|
db (Session): Database session
|
|
351
408
|
log_file_name (str): Name of the log file
|
|
352
|
-
final_build_status (str): Final status of the build ('success' or '
|
|
409
|
+
final_build_status (str): Final status of the build ('success' or 'failed')
|
|
353
410
|
"""
|
|
354
411
|
try:
|
|
355
412
|
job_record = db.query(JobLogs).filter(JobLogs.log_file_name == log_file_name).first()
|
|
@@ -358,24 +415,12 @@ class EnvironmentManager:
|
|
|
358
415
|
job_record.status = final_build_status
|
|
359
416
|
db.commit()
|
|
360
417
|
else:
|
|
418
|
+
self.logger.error(f"No job log found for file: {log_file_name}")
|
|
361
419
|
raise ValueError(f"No job log found for file: {log_file_name}")
|
|
362
420
|
except Exception as e:
|
|
421
|
+
self.logger.error(f"Failed to update job log for {log_file_name}: {e}")
|
|
363
422
|
db.rollback()
|
|
364
423
|
raise
|
|
365
|
-
|
|
366
|
-
def update_symlink(self, symlink_path, conda_env_path):
|
|
367
|
-
"""
|
|
368
|
-
Creates or updates the symlink to point to the default version.
|
|
369
|
-
"""
|
|
370
|
-
symlink_dir = os.path.dirname(symlink_path)
|
|
371
|
-
if not os.path.exists(symlink_dir):
|
|
372
|
-
os.makedirs(symlink_dir, exist_ok=True)
|
|
373
|
-
|
|
374
|
-
# If symlink exists, remove it before updating
|
|
375
|
-
if os.path.islink(symlink_path):
|
|
376
|
-
os.remove(symlink_path)
|
|
377
|
-
|
|
378
|
-
subprocess.run(["ln", "-sf", conda_env_path, symlink_path], check=True)
|
|
379
424
|
|
|
380
425
|
def format_timestamp(self):
|
|
381
426
|
"""
|
|
@@ -386,73 +431,214 @@ class EnvironmentManager:
|
|
|
386
431
|
"""
|
|
387
432
|
return datetime.datetime.now().strftime("%b %d %I:%M:%S %p")
|
|
388
433
|
|
|
389
|
-
def update_environment_db(self, env_short_name, version,
|
|
434
|
+
def update_environment_db(self, env_short_name, version, pip_libraries, conda_libraries, status, db: Session):
|
|
390
435
|
"""
|
|
391
436
|
Updates the environment table with the new version and libraries.
|
|
392
437
|
"""
|
|
393
438
|
try:
|
|
394
|
-
if isinstance(
|
|
395
|
-
|
|
439
|
+
if isinstance(pip_libraries, list):
|
|
440
|
+
pip_libraries = ", ".join(pip_libraries)
|
|
441
|
+
if isinstance(conda_libraries, list):
|
|
442
|
+
conda_libraries = ", ".join(conda_libraries)
|
|
396
443
|
current_env = db.query(Environment).filter(Environment.short_name == env_short_name).first()
|
|
397
|
-
|
|
398
|
-
|
|
444
|
+
if not current_env:
|
|
445
|
+
raise ValueError(f"Environment with short name '{env_short_name}' does not exist.")
|
|
446
|
+
|
|
447
|
+
env_status = "Draft" if status == "success" else "Failed"
|
|
448
|
+
|
|
449
|
+
db.query(Environment).filter(
|
|
450
|
+
Environment.short_name == env_short_name
|
|
451
|
+
).update({"version": version, "pip_libraries": pip_libraries, "conda_libraries": conda_libraries, "status": env_status})
|
|
399
452
|
db.commit()
|
|
400
453
|
|
|
401
454
|
except Exception as e:
|
|
455
|
+
self.logger.error(f"Failed to update environment {env_short_name} in database: {e}")
|
|
402
456
|
db.rollback()
|
|
403
457
|
raise
|
|
404
458
|
|
|
405
|
-
|
|
406
|
-
def update_library_versions(self, libraries: list, conda_env_path: str) -> list:
|
|
459
|
+
def update_library_versions(self, yaml_path: str):
|
|
407
460
|
"""
|
|
408
|
-
Updates libraries without version specifications by getting their actual installed versions.
|
|
461
|
+
Updates libraries without version specifications by getting their actual installed versions from a conda YAML file.
|
|
409
462
|
|
|
410
463
|
Args:
|
|
411
|
-
|
|
412
|
-
conda_env_path (str): Path to the conda environment where libraries are installed.
|
|
464
|
+
yaml_path (str): Path to the conda environment YAML file.
|
|
413
465
|
|
|
414
466
|
Returns:
|
|
415
|
-
|
|
467
|
+
tuple: Updated lists of (pip_libraries, conda_libraries) with version specifications.
|
|
416
468
|
"""
|
|
417
469
|
try:
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
470
|
+
# Define default conda packages to ignore
|
|
471
|
+
default_conda_packages = {
|
|
472
|
+
"_libgcc_mutex", "_openmp_mutex", "bzip2", "ca-certificates",
|
|
473
|
+
"ld_impl_linux-64", "libexpat", "libffi", "libgcc", "libgcc-ng",
|
|
474
|
+
"libgomp", "liblzma", "libnsl", "libsqlite", "libuuid", "libxcrypt",
|
|
475
|
+
"libzlib", "ncurses", "openssl", "readline", "setuptools", "tk",
|
|
476
|
+
"tzdata", "wheel", "libstdcxx-ng", "python"
|
|
477
|
+
}
|
|
426
478
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
lib_name, version = line.split("==", 1)
|
|
431
|
-
installed_versions[lib_name.lower()] = version
|
|
479
|
+
# Read the YAML file
|
|
480
|
+
with open(yaml_path, 'r') as f:
|
|
481
|
+
yaml_content = yaml.safe_load(f)
|
|
432
482
|
|
|
433
|
-
#
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
483
|
+
# Extract conda and pip dependencies
|
|
484
|
+
dependencies = yaml_content.get('dependencies', [])
|
|
485
|
+
|
|
486
|
+
# Process conda libraries
|
|
487
|
+
conda_libraries = []
|
|
488
|
+
pip_libraries = []
|
|
489
|
+
|
|
490
|
+
for dep in dependencies:
|
|
491
|
+
if isinstance(dep, str):
|
|
492
|
+
if dep.startswith("python="):
|
|
493
|
+
continue
|
|
443
494
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
else:
|
|
447
|
-
updated_libraries.append(lib)
|
|
448
|
-
else:
|
|
449
|
-
updated_libraries.append(lib)
|
|
495
|
+
parts = dep.split('=')
|
|
496
|
+
package_name = parts[0].strip()
|
|
450
497
|
|
|
451
|
-
|
|
498
|
+
if package_name.lower() not in default_conda_packages:
|
|
499
|
+
if len(parts) >= 2:
|
|
500
|
+
package_with_version = f"{package_name}={parts[1]}"
|
|
501
|
+
conda_libraries.append(package_with_version)
|
|
502
|
+
else:
|
|
503
|
+
# No version specified, keep as is
|
|
504
|
+
conda_libraries.append(dep)
|
|
505
|
+
|
|
506
|
+
elif isinstance(dep, dict) and 'pip' in dep:
|
|
507
|
+
# This is the pip section
|
|
508
|
+
for pip_pkg in dep['pip']:
|
|
509
|
+
pip_libraries.append(pip_pkg)
|
|
510
|
+
|
|
511
|
+
return pip_libraries, conda_libraries
|
|
512
|
+
|
|
513
|
+
except Exception as e:
|
|
514
|
+
self.logger.error(f"Error reading YAML file and extracting libraries: {str(e)}")
|
|
515
|
+
return [], []
|
|
452
516
|
|
|
453
517
|
except subprocess.CalledProcessError as e:
|
|
454
|
-
|
|
455
|
-
return
|
|
518
|
+
self.logger.error(f"Error running pip freeze: {e.stderr}")
|
|
519
|
+
return pip_libraries
|
|
520
|
+
except Exception as e:
|
|
521
|
+
self.logger.error(f"Error updating library versions: {str(e)}")
|
|
522
|
+
return pip_libraries
|
|
523
|
+
|
|
524
|
+
def create_conda_yaml(self, yaml_path, env_name, python_version, conda_packages, pip_packages):
|
|
525
|
+
"""
|
|
526
|
+
Creates a conda environment YAML file with specified packages and channels.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
yaml_path (str): Path where to save the YAML file
|
|
530
|
+
env_name (str): Name of the conda environment
|
|
531
|
+
python_version (str): Python version to use
|
|
532
|
+
conda_channels (list): List of conda channels
|
|
533
|
+
conda_packages (list): List of conda packages to install
|
|
534
|
+
pip_packages (list): List of pip packages to install
|
|
535
|
+
"""
|
|
536
|
+
try:
|
|
537
|
+
# Create the environment specification
|
|
538
|
+
env_spec = {
|
|
539
|
+
"name": env_name,
|
|
540
|
+
"channels": ["conda-forge", "defaults"],
|
|
541
|
+
"dependencies": [
|
|
542
|
+
f"python={python_version}"
|
|
543
|
+
]
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
# Add conda packages
|
|
547
|
+
if conda_packages and len(conda_packages) > 0:
|
|
548
|
+
env_spec["dependencies"].extend(conda_packages)
|
|
549
|
+
|
|
550
|
+
pip_pattern = re.compile(r"^pip([=]{1,2}.*)?$") # matches pip, pip=..., pip==...
|
|
551
|
+
pip_found = any(pip_pattern.match(pkg.strip()) for pkg in conda_packages)
|
|
552
|
+
|
|
553
|
+
# if pip is not already included in conda packages, add it
|
|
554
|
+
if not pip_found:
|
|
555
|
+
env_spec["dependencies"].append("pip")
|
|
556
|
+
|
|
557
|
+
# Add pip packages if any
|
|
558
|
+
if pip_packages and len(pip_packages) > 0:
|
|
559
|
+
pip_section = {
|
|
560
|
+
"pip": pip_packages
|
|
561
|
+
}
|
|
562
|
+
env_spec["dependencies"].append(pip_section)
|
|
563
|
+
|
|
564
|
+
with open(yaml_path, 'w') as yaml_file:
|
|
565
|
+
yaml.dump(env_spec, yaml_file, default_flow_style=False)
|
|
566
|
+
|
|
567
|
+
return yaml_path
|
|
568
|
+
|
|
456
569
|
except Exception as e:
|
|
457
|
-
|
|
458
|
-
|
|
570
|
+
self.logger.error(f"Failed to create conda environment YAML file: {str(e)}")
|
|
571
|
+
raise Exception(f"Failed to create conda environment YAML file: {str(e)}")
|
|
572
|
+
|
|
573
|
+
def format_py_requirements(self, env):
|
|
574
|
+
"""
|
|
575
|
+
Format pip and conda libraries into a standardized list of dictionaries
|
|
576
|
+
sorted alphabetically by library name.
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
env: Environment object containing pip_libraries and conda_libraries strings
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
list: List of dictionaries with format [{"name":"lib_name", "version":"version", "manager":"pip|conda"}, ...]
|
|
583
|
+
"""
|
|
584
|
+
py_requirements = []
|
|
585
|
+
|
|
586
|
+
# process libraries, handle both '==' and '=' version specifications
|
|
587
|
+
if env.pip_libraries:
|
|
588
|
+
for lib in env.pip_libraries.split(','):
|
|
589
|
+
lib = lib.strip()
|
|
590
|
+
if not lib:
|
|
591
|
+
continue
|
|
592
|
+
|
|
593
|
+
if '==' in lib:
|
|
594
|
+
name, version = lib.split('==', 1)
|
|
595
|
+
py_requirements.append({
|
|
596
|
+
"name": name.strip(),
|
|
597
|
+
"version": version.strip(),
|
|
598
|
+
"manager": "pip"
|
|
599
|
+
})
|
|
600
|
+
elif '=' in lib:
|
|
601
|
+
name, version = lib.split('=', 1)
|
|
602
|
+
py_requirements.append({
|
|
603
|
+
"name": name.strip(),
|
|
604
|
+
"version": version.strip(),
|
|
605
|
+
"manager": "pip"
|
|
606
|
+
})
|
|
607
|
+
else:
|
|
608
|
+
py_requirements.append({
|
|
609
|
+
"name": lib,
|
|
610
|
+
"version": "",
|
|
611
|
+
"manager": "pip"
|
|
612
|
+
})
|
|
613
|
+
|
|
614
|
+
if env.conda_libraries:
|
|
615
|
+
for lib in env.conda_libraries.split(','):
|
|
616
|
+
lib = lib.strip()
|
|
617
|
+
if not lib:
|
|
618
|
+
continue
|
|
619
|
+
|
|
620
|
+
if '==' in lib:
|
|
621
|
+
name, version = lib.split('==', 1)
|
|
622
|
+
py_requirements.append({
|
|
623
|
+
"name": name.strip(),
|
|
624
|
+
"version": version.strip(),
|
|
625
|
+
"manager": "conda"
|
|
626
|
+
})
|
|
627
|
+
elif '=' in lib:
|
|
628
|
+
name, version = lib.split('=', 1)
|
|
629
|
+
py_requirements.append({
|
|
630
|
+
"name": name.strip(),
|
|
631
|
+
"version": version.strip(),
|
|
632
|
+
"manager": "conda"
|
|
633
|
+
})
|
|
634
|
+
else:
|
|
635
|
+
py_requirements.append({
|
|
636
|
+
"name": lib,
|
|
637
|
+
"version": "",
|
|
638
|
+
"manager": "conda"
|
|
639
|
+
})
|
|
640
|
+
|
|
641
|
+
# sort the requirements list alphabetically by name
|
|
642
|
+
py_requirements.sort(key=lambda x: x["name"].lower())
|
|
643
|
+
|
|
644
|
+
return py_requirements
|
dataflow/models/environment.py
CHANGED
|
@@ -15,22 +15,21 @@ class EnvironmentAttributes(Base):
|
|
|
15
15
|
enabled = Column(Boolean, default=True)
|
|
16
16
|
version = Column(String, default=0)
|
|
17
17
|
is_latest = Column(Boolean, default=True)
|
|
18
|
-
|
|
18
|
+
base_env_id = Column(Integer, default=None)
|
|
19
19
|
short_name = Column(String(5))
|
|
20
20
|
status = Column(String, default="Saved")
|
|
21
21
|
icon = Column(String)
|
|
22
22
|
py_version = Column(String)
|
|
23
23
|
r_version = Column(String)
|
|
24
|
-
|
|
24
|
+
pip_libraries = Column(Text)
|
|
25
|
+
conda_libraries = Column(Text)
|
|
25
26
|
r_requirements = Column(Text)
|
|
26
|
-
py_requirements_compiled = Column(Text)
|
|
27
|
-
r_requirements_compiled = Column(Text)
|
|
28
27
|
created_date = Column(DateTime, server_default=func.now())
|
|
29
28
|
created_by = Column(String)
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
|
|
33
|
-
class Environment(EnvironmentAttributes):
|
|
32
|
+
class Environment(EnvironmentAttributes):
|
|
34
33
|
__tablename__ = 'ENVIRONMENT'
|
|
35
34
|
|
|
36
35
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
@@ -43,7 +42,7 @@ class ArchivedEnvironment(EnvironmentAttributes):
|
|
|
43
42
|
__tablename__ = 'ARCHIVED_ENVIRONMENT'
|
|
44
43
|
|
|
45
44
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
46
|
-
original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id'))
|
|
45
|
+
original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id', ondelete='CASCADE'))
|
|
47
46
|
is_latest = Column(Boolean, default=False)
|
|
48
47
|
|
|
49
48
|
# Relationship with Environment
|
|
@@ -10,7 +10,7 @@ class EnvironmentStatus(Base):
|
|
|
10
10
|
|
|
11
11
|
__tablename__='ENVIRONMENT_STATUS'
|
|
12
12
|
|
|
13
|
-
id = Column(Integer, ForeignKey('ENVIRONMENT.id'), primary_key=True, nullable=False)
|
|
13
|
+
id = Column(Integer, ForeignKey('ENVIRONMENT.id', ondelete='CASCADE'), primary_key=True, nullable=False)
|
|
14
14
|
status = Column(String, nullable=False)
|
|
15
15
|
comment = Column(String)
|
|
16
16
|
status_changed_date = Column(DateTime, server_default=func.now(), nullable=False)
|
dataflow/models/role.py
CHANGED
|
@@ -13,7 +13,7 @@ class Role(Base):
|
|
|
13
13
|
id = Column(Integer, primary_key=True, index=True, autoincrement=True, nullable=False)
|
|
14
14
|
name = Column(String, unique=True, nullable=False)
|
|
15
15
|
description = Column(String, nullable=True)
|
|
16
|
-
base_role = Column(Enum('admin', 'user', name='base_role_field'), default='user', nullable=False)
|
|
16
|
+
base_role = Column(Enum('admin', 'user', 'applicant', name='base_role_field'), default='user', nullable=False)
|
|
17
17
|
|
|
18
18
|
users = relationship("User", back_populates="role_details", cascade="all, delete-orphan")
|
|
19
19
|
role_server_assocs = relationship("RoleServer", back_populates="role")
|
|
@@ -1,17 +1,29 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
|
+
# filepath: /home/hari/dbo/dataflow-core/dataflow/scripts/create_environment.sh
|
|
2
3
|
set -e
|
|
3
4
|
|
|
4
|
-
|
|
5
|
+
# Accept new parameters
|
|
6
|
+
yaml_file_path=$1
|
|
5
7
|
conda_env_path=$2
|
|
6
|
-
|
|
8
|
+
|
|
9
|
+
# Validate inputs
|
|
10
|
+
if [ -z "$yaml_file_path" ] || [ -z "$conda_env_path" ]; then
|
|
11
|
+
echo "Error: Missing required parameters"
|
|
12
|
+
exit 1
|
|
13
|
+
fi
|
|
14
|
+
|
|
15
|
+
if [ ! -f "$yaml_file_path" ]; then
|
|
16
|
+
echo "Error: YAML file does not exist: $yaml_file_path"
|
|
17
|
+
exit 1
|
|
18
|
+
fi
|
|
7
19
|
|
|
8
20
|
# Use an isolated conda package cache to avoid concurrency issues
|
|
9
21
|
export CONDA_PKGS_DIRS=$(mktemp -d)
|
|
22
|
+
|
|
10
23
|
# to delete conda package cache after script finishes
|
|
11
24
|
trap 'rm -rf "$CONDA_PKGS_DIRS"' EXIT
|
|
12
25
|
|
|
13
|
-
#
|
|
14
|
-
conda create --prefix $
|
|
26
|
+
# Create the conda environment from the YAML file
|
|
27
|
+
conda env create --file "$yaml_file_path" --prefix "$conda_env_path" --yes
|
|
15
28
|
|
|
16
|
-
|
|
17
|
-
${conda_env_path}/bin/pip install --root-user-action ignore ${libraries[@]}
|
|
29
|
+
conda env export --prefix "$conda_env_path" > "$yaml_file_path"
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# filepath: /home/hari/dbo/dataflow-core/dataflow/scripts/update_environment.sh
|
|
3
|
+
set -e
|
|
4
|
+
|
|
5
|
+
# Accept parameters
|
|
6
|
+
yaml_file_path=$1
|
|
7
|
+
conda_env_path=$2
|
|
8
|
+
|
|
9
|
+
# Validate inputs
|
|
10
|
+
if [ -z "$yaml_file_path" ] || [ -z "$conda_env_path" ]; then
|
|
11
|
+
echo "Error: Missing required parameters"
|
|
12
|
+
exit 1
|
|
13
|
+
fi
|
|
14
|
+
|
|
15
|
+
if [ ! -f "$yaml_file_path" ]; then
|
|
16
|
+
echo "Error: YAML file does not exist: $yaml_file_path"
|
|
17
|
+
exit 1
|
|
18
|
+
fi
|
|
19
|
+
|
|
20
|
+
if [ ! -d "$conda_env_path" ]; then
|
|
21
|
+
echo "Error: Conda environment does not exist at: $conda_env_path"
|
|
22
|
+
exit 1
|
|
23
|
+
fi
|
|
24
|
+
|
|
25
|
+
# Use an isolated conda package cache to avoid concurrency issues
|
|
26
|
+
export CONDA_PKGS_DIRS=$(mktemp -d)
|
|
27
|
+
|
|
28
|
+
# to delete conda package cache after script finishes
|
|
29
|
+
trap 'rm -rf "$CONDA_PKGS_DIRS"' EXIT
|
|
30
|
+
|
|
31
|
+
# Update the conda environment using the YAML file
|
|
32
|
+
conda env update --prefix "$conda_env_path" --file "$yaml_file_path" --prune
|
|
33
|
+
|
|
34
|
+
if [ ! -L "$yaml_file_path" ]; then
|
|
35
|
+
conda env export --prefix "$conda_env_path" > "$yaml_file_path"
|
|
36
|
+
fi
|
|
@@ -1,25 +1,25 @@
|
|
|
1
1
|
authenticator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
authenticator/dataflowairflowauthenticator.py,sha256=gEdCiL2yJQ7lYvAwbrjcAkccVMfehoMJldw9eU7cc2s,2243
|
|
3
|
-
authenticator/dataflowhubauthenticator.py,sha256
|
|
3
|
+
authenticator/dataflowhubauthenticator.py,sha256=-wFEPEQfgCgtghC0Eo9e18B-OU1JsPKam0tbQaYCg1s,13563
|
|
4
4
|
authenticator/dataflowsupersetauthenticator.py,sha256=NkAmDaIc-ui-qEolu4xz_UY7P_2g8111hwNjPvAOW1Q,2839
|
|
5
5
|
dataflow/__init__.py,sha256=WTRg8HMpMWSgxYJ9ZGVldx4k07fAbta3mBmZ1hG9mWE,30
|
|
6
6
|
dataflow/configuration.py,sha256=7To6XwH1eESiYp39eqPcswXWwrdBUdPF6xN6WnazOF0,663
|
|
7
7
|
dataflow/database_manager.py,sha256=tJHMuOZ9Muskrh9t4uLRlTuFU0VkHAzoHlGP5DORIC4,899
|
|
8
8
|
dataflow/dataflow.py,sha256=-UYZst7EO1GgaOjlAkKu-tu7RC6XsgadGeDp1MOvZiA,7439
|
|
9
9
|
dataflow/db.py,sha256=5UwE4w5Vn9RqFIhr8ARlu2haZX-PtHDLRPjmn5BG2m8,1649
|
|
10
|
-
dataflow/environment.py,sha256=
|
|
10
|
+
dataflow/environment.py,sha256=eAWYhQFM7CA3o-CA3qTWbkE8ZHZ-Jo2qvSkDQ4dKqmY,27238
|
|
11
11
|
dataflow/models/__init__.py,sha256=QMLiKj8BMhfScWMm8kgHkMjwAlFeg5Cym3_AI1NvBUA,783
|
|
12
12
|
dataflow/models/app_types.py,sha256=yE_ZB13lhpK7AZ7PyBwnQlf0RlIHYs_-vdMKx7_RMlY,379
|
|
13
13
|
dataflow/models/blacklist_library.py,sha256=B2oi3Z8GcR_glhLAyinFk0W8c9txXvm3uOER6dY-q7I,991
|
|
14
14
|
dataflow/models/connection.py,sha256=_VJL3KuIrm8t4lJmtunIL3-AXF9Yvi5wUolzdR3tE0E,1017
|
|
15
|
-
dataflow/models/environment.py,sha256=
|
|
16
|
-
dataflow/models/environment_status.py,sha256=
|
|
15
|
+
dataflow/models/environment.py,sha256=Vg-4vQe_cHIfOX-kPJWd6SIEapPJcgxoCITgdRrqT_o,2107
|
|
16
|
+
dataflow/models/environment_status.py,sha256=lvPDNUsUoTW9D97B07aKqJQHRKp4LvPM28pQDMPH1ac,536
|
|
17
17
|
dataflow/models/git_ssh.py,sha256=W15SDypxzGOz_aZkHEnVZ6DIMVsjAsbSIXVIEt2mPYU,694
|
|
18
18
|
dataflow/models/pinned_projects.py,sha256=rkpPX_f2U9HjmrRo7_K8rnZIeXuQKGq6hYTrtLmu21c,566
|
|
19
19
|
dataflow/models/project_details.py,sha256=94wTygXv9iGB0w8g_6vtkB5ZqIzpEv1W9uWwCA4hM0Y,1078
|
|
20
20
|
dataflow/models/recent_project_studio.py,sha256=m12KGCsv453C1ijHjfVD8E7cJ7Og_0N8uc7_9VlfkYw,812
|
|
21
21
|
dataflow/models/recent_projects.py,sha256=QqDlk3ll7tBaQl5hqvRarlB9_SUBuN44muLIuTVbPe0,301
|
|
22
|
-
dataflow/models/role.py,sha256=
|
|
22
|
+
dataflow/models/role.py,sha256=_I5F2TFox_k2-LGgjVuO2PIW9-gpwpoX8Te7m024A8k,693
|
|
23
23
|
dataflow/models/role_server.py,sha256=mMcfjsGX1cY8hOAOBBmrZgw8ozdfuvjKJoBlR6F0Kdc,689
|
|
24
24
|
dataflow/models/runtime.py,sha256=OiuBfZTMg81U10GS00DxfhiAmHlcyQUw5LBR8RaPl7s,415
|
|
25
25
|
dataflow/models/server_config.py,sha256=GTMtQfgtuvKUbxV16VhEpKGhYoNISFLRWdUPqBJmYbM,1365
|
|
@@ -29,15 +29,16 @@ dataflow/models/user.py,sha256=PT-zwZj7NWUubIj_7EY2EsjduMbI_42EyMclWMLESGk,1073
|
|
|
29
29
|
dataflow/models/user_environment.py,sha256=yI9NutULcLiwlycuEin6ROe6o1Sjdv_sgw2MEkJFeYg,568
|
|
30
30
|
dataflow/models/user_team.py,sha256=r_fmKvf6JuGgiiI9TXWjVG2QZ3WOvDrOwYWVQ3r8oWo,659
|
|
31
31
|
dataflow/models/variables.py,sha256=Sinvv3zFYni5i_GrL69cVfhCh4tOOaIHiEzWYRJ-i10,1132
|
|
32
|
-
dataflow/scripts/clone_environment.sh,sha256=
|
|
33
|
-
dataflow/scripts/create_environment.sh,sha256=
|
|
32
|
+
dataflow/scripts/clone_environment.sh,sha256=dVs-NAGHtpYsk-OjoZ_gbYuZZoi3jIbEp_zXbDXEVbc,455
|
|
33
|
+
dataflow/scripts/create_environment.sh,sha256=TLJ7FKYyhsLe0bqBy74FnpuvjFTSHiGXp1iLMcOaeJA,798
|
|
34
|
+
dataflow/scripts/update_environment.sh,sha256=p8r2qV4blqLyC7eksHSkUDoXx_UL4Xc4NWmx8y0h_rc,962
|
|
34
35
|
dataflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
36
|
dataflow/utils/aws_secrets_manager.py,sha256=A_fNs9VNah9dDdl9NhqizJamYU7xr2v_GXlw9InEDFk,2380
|
|
36
37
|
dataflow/utils/get_current_user.py,sha256=akjcUyTpmMdAZj9LFGSTs76hjBRjltNk9hLUqC_BdkA,1140
|
|
37
38
|
dataflow/utils/json_handler.py,sha256=5_7WdypegRBDe2HSqBXyrJAdd92wsha8qRcmQvCj1TA,782
|
|
38
39
|
dataflow/utils/logger.py,sha256=7BFrOq5Oiqn8P4XZbgJzMP5O07d2fpdECbbfsjrUuHw,1213
|
|
39
|
-
dataflow_core-2.1.
|
|
40
|
-
dataflow_core-2.1.
|
|
41
|
-
dataflow_core-2.1.
|
|
42
|
-
dataflow_core-2.1.
|
|
43
|
-
dataflow_core-2.1.
|
|
40
|
+
dataflow_core-2.1.3.dist-info/METADATA,sha256=dRhOgxGmyMCLl9eJQ0XfExl5Fp0BYZzdJfYGgvVS3Mo,301
|
|
41
|
+
dataflow_core-2.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
42
|
+
dataflow_core-2.1.3.dist-info/entry_points.txt,sha256=ppj_EIbYrJJwCPg1kfdsZk5q1N-Ejfis1neYrnjhO8o,117
|
|
43
|
+
dataflow_core-2.1.3.dist-info/top_level.txt,sha256=SZsUOpSCK9ntUy-3Tusxzf5A2e8ebwD8vouPb1dPt_8,23
|
|
44
|
+
dataflow_core-2.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|