dataflow-core 2.1.2__tar.gz → 2.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dataflow-core might be problematic. Click here for more details.

Files changed (51) hide show
  1. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/PKG-INFO +1 -1
  2. dataflow_core-2.1.3/dataflow/environment.py +644 -0
  3. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/environment.py +5 -6
  4. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/environment_status.py +1 -1
  5. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/scripts/clone_environment.sh +2 -0
  6. dataflow_core-2.1.3/dataflow/scripts/create_environment.sh +29 -0
  7. dataflow_core-2.1.3/dataflow/scripts/update_environment.sh +36 -0
  8. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow_core.egg-info/PKG-INFO +1 -1
  9. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow_core.egg-info/SOURCES.txt +1 -0
  10. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/setup.py +1 -1
  11. dataflow_core-2.1.2/dataflow/environment.py +0 -458
  12. dataflow_core-2.1.2/dataflow/scripts/create_environment.sh +0 -17
  13. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/README.md +0 -0
  14. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/authenticator/__init__.py +0 -0
  15. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/authenticator/dataflowairflowauthenticator.py +0 -0
  16. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/authenticator/dataflowhubauthenticator.py +0 -0
  17. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/authenticator/dataflowsupersetauthenticator.py +0 -0
  18. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/__init__.py +0 -0
  19. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/configuration.py +0 -0
  20. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/database_manager.py +0 -0
  21. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/dataflow.py +0 -0
  22. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/db.py +0 -0
  23. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/__init__.py +0 -0
  24. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/app_types.py +0 -0
  25. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/blacklist_library.py +0 -0
  26. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/connection.py +0 -0
  27. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/git_ssh.py +0 -0
  28. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/pinned_projects.py +0 -0
  29. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/project_details.py +0 -0
  30. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/recent_project_studio.py +0 -0
  31. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/recent_projects.py +0 -0
  32. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/role.py +0 -0
  33. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/role_server.py +0 -0
  34. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/runtime.py +0 -0
  35. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/server_config.py +0 -0
  36. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/session.py +0 -0
  37. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/team.py +0 -0
  38. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/user.py +0 -0
  39. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/user_environment.py +0 -0
  40. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/user_team.py +0 -0
  41. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/models/variables.py +0 -0
  42. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/utils/__init__.py +0 -0
  43. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/utils/aws_secrets_manager.py +0 -0
  44. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/utils/get_current_user.py +0 -0
  45. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/utils/json_handler.py +0 -0
  46. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow/utils/logger.py +0 -0
  47. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow_core.egg-info/dependency_links.txt +0 -0
  48. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow_core.egg-info/entry_points.txt +0 -0
  49. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow_core.egg-info/requires.txt +0 -0
  50. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/dataflow_core.egg-info/top_level.txt +0 -0
  51. {dataflow_core-2.1.2 → dataflow_core-2.1.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataflow-core
3
- Version: 2.1.2
3
+ Version: 2.1.3
4
4
  Summary: Dataflow core package
5
5
  Author: Dataflow
6
6
  Author-email:
@@ -0,0 +1,644 @@
1
+ import os, shutil, subprocess, datetime, yaml, re
2
+ from .models.environment import JobLogs, Environment
3
+ import json, asyncio, pkg_resources
4
+ from sqlalchemy.orm import Session
5
+ from .configuration import ConfigurationManager
6
+ from .utils.logger import CustomLogger
7
+
8
+ class EnvironmentManager:
9
+ def __init__(self):
10
+ """Initialize the EnvironmentManager"""
11
+ self.config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
12
+ self.env_base_path = self.config.get_config_value('paths', 'env_path')
13
+ self.env_logs_path = self.config.get_config_value('paths', 'env_logs_path')
14
+ self.env_version_path = self.config.get_config_value('paths', 'env_versions_path')
15
+ os.makedirs(self.env_version_path, exist_ok=True)
16
+ self.logger = CustomLogger().get_logger(__name__)
17
+
18
+ async def create_env(self, env_name, py_version, pip_libraries, conda_libraries, status, env_version='1', user_name=None, db:Session=None):
19
+ """
20
+ Creates a conda environment with specified Python version and packages.
21
+
22
+ Args:
23
+ env_name (str): Name of the environment
24
+ py_version (str): Python version to use
25
+ py_requirements (list): List of packages to install
26
+ status (str): Environment status ('draft' or 'published')
27
+ env_version (str): Version of the environment (for draft environments)
28
+ user_name (str): Username who initiated the creation
29
+ db (Session): Database session (optional, will create if None)
30
+
31
+ Returns:
32
+ str: Build status ('success' or 'failed')
33
+ """
34
+ # Set up logging
35
+ log_file_location = None
36
+ if db:
37
+ log_file_location = self._setup_logging(env_name, env_version, user_name, db)
38
+
39
+ # Create the conda environment YAML file
40
+ yaml_path = os.path.join(self.env_version_path, f"{env_name}_v{env_version}.yaml")
41
+ self.create_conda_yaml(
42
+ yaml_path=yaml_path,
43
+ env_name=env_name,
44
+ python_version=py_version,
45
+ conda_packages=conda_libraries,
46
+ pip_packages=pip_libraries
47
+ )
48
+
49
+ if status == "published":
50
+ return await self._execute_env_operation(
51
+ env_name=env_name,
52
+ status="published",
53
+ mode="create",
54
+ yaml_file_path=yaml_path,
55
+ version=int(env_version)
56
+ )
57
+ elif status == "draft":
58
+ mode = "create" if env_version == '1' else "update"
59
+ build_status = await self._execute_env_operation(
60
+ env_name=env_name,
61
+ status=status,
62
+ mode=mode,
63
+ yaml_file_path=yaml_path,
64
+ log_file_location=log_file_location,
65
+ version=int(env_version)
66
+ )
67
+
68
+ # Update job log status if db was provided
69
+ if db and log_file_location:
70
+ log_file_name = os.path.basename(log_file_location)
71
+ await self._update_job_status(log_file_name, build_status, log_file_location, db)
72
+ pip_libraries, conda_libraries = self.update_library_versions(yaml_path)
73
+ self.update_environment_db(env_name, env_version, pip_libraries, conda_libraries, build_status, db)
74
+
75
+ return build_status
76
+
77
+ else:
78
+ self.logger.error(f"Invalid status '{status}' provided for environment creation.")
79
+ raise ValueError("Invalid status. Use 'draft' or 'published'.")
80
+
81
+ async def clone_env(self, source_path, env_name, pip_libraries, conda_libraries, user_name, db=None):
82
+ """
83
+ Clones an existing conda environment.
84
+
85
+ Args:
86
+ source_path (str): Path to source environment
87
+ target_name (str): Name for the target environment
88
+ status (str): Environment status ('draft' or 'published')
89
+ env_version (str): Version of the environment (for draft environments)
90
+ user_name (str): Username who initiated the clone
91
+ db (Session): Database session (optional, will create if None)
92
+
93
+ Returns:
94
+ str: Build status ('success' or 'failed')
95
+ """
96
+ # Set up logging
97
+ log_file_location = None
98
+ if db:
99
+ log_file_location = self._setup_logging(env_name, "1", user_name, db)
100
+
101
+ yaml_path = f"{self.env_version_path}/{env_name}_v1.yaml"
102
+
103
+ # Perform the clone operation
104
+ clone_status = await self._execute_env_operation(
105
+ env_name=env_name,
106
+ status="draft",
107
+ mode="clone",
108
+ yaml_file_path=yaml_path,
109
+ source_path=source_path,
110
+ log_file_location=log_file_location,
111
+ version=1
112
+ )
113
+
114
+ # Update job log status if db was provided
115
+ if db and log_file_location:
116
+ log_file_name = os.path.basename(log_file_location)
117
+ await self._update_job_status(log_file_name, clone_status, log_file_location, db)
118
+ self.update_environment_db(
119
+ env_short_name=env_name,
120
+ version="1",
121
+ pip_libraries=pip_libraries,
122
+ conda_libraries=conda_libraries,
123
+ status=clone_status,
124
+ db=db
125
+ )
126
+
127
+ return clone_status
128
+
129
+ async def revert_env(self, env_name, curr_version, revert_version, new_version, user_name, db: Session):
130
+ """
131
+ Reverts an environment to a previous version.
132
+
133
+ Args:
134
+ env_name (str): Name of the environment
135
+ version (str): Version to revert to
136
+ db (Session): Database session
137
+
138
+ Returns:
139
+ str: Build status ('success' or 'failed')
140
+ """
141
+ try:
142
+ # Get the YAML file for the specified version
143
+ old_yaml_path = f"{self.env_version_path}/{env_name}_v{revert_version}.yaml"
144
+ new_yaml_path = f"{self.env_version_path}/{env_name}_v{new_version}.yaml"
145
+ if not os.path.exists(old_yaml_path):
146
+ raise FileNotFoundError(f"YAML file for version {revert_version} does not exist.")
147
+
148
+ os.symlink(old_yaml_path, new_yaml_path)
149
+
150
+ log_file_location = None
151
+ if db:
152
+ log_file_location = self._setup_logging(env_name, new_version, user_name, db)
153
+
154
+ # Execute the revert operation
155
+ revert_status = await self._execute_env_operation(
156
+ env_name=env_name,
157
+ status="draft",
158
+ mode="update",
159
+ yaml_file_path=new_yaml_path,
160
+ log_file_location=log_file_location,
161
+ version=int(new_version)
162
+ )
163
+
164
+ log_file_name = os.path.basename(log_file_location)
165
+ await self._update_job_status(log_file_name, revert_status, log_file_location, db)
166
+ pip_libraries, conda_libraries = self.update_library_versions(new_yaml_path)
167
+ self.update_environment_db(env_name, new_version, pip_libraries, conda_libraries, revert_status, db)
168
+
169
+ except Exception as e:
170
+ self.logger.error(f"Failed to revert environment {env_name}: {e}")
171
+ return "failed"
172
+
173
+ async def _execute_env_operation(
174
+ self,
175
+ env_name: str,
176
+ status: str,
177
+ mode: str,
178
+ yaml_file_path: str,
179
+ version: int,
180
+ source_path=None,
181
+ log_file_location=None,
182
+ ):
183
+ """
184
+ Executes environment operations (create or clone).
185
+
186
+ Args:
187
+ env_name (str): Name of the environment
188
+ status (str): Environment status ('draft' or 'published')
189
+ mode (str): Operation mode ('create' or 'clone')
190
+ env_version (str): Version of the environment (for draft environments)
191
+ py_version (str): Python version to use (for create mode)
192
+ py_requirements (list): List of packages to install (for create mode)
193
+ source_path (str): Path to source environment (for clone mode)
194
+ log_file_location (str): Path to log file
195
+
196
+ Returns:
197
+ str: Build status ('success' or 'failed')
198
+ """
199
+ self.logger.info(f"Executing environment operation: {env_name}, Status: {status}, Mode: {mode}")
200
+ status = status.lower()
201
+ conda_env_path = os.path.join(self.env_base_path, env_name)
202
+
203
+ try:
204
+ if os.path.exists(conda_env_path) and mode == "create":
205
+ raise FileExistsError(f"Environment '{env_name}' already exists at {conda_env_path}.")
206
+
207
+ os.makedirs(conda_env_path, exist_ok=True)
208
+
209
+ if mode == "create":
210
+ create_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/create_environment.sh')
211
+ command = ["bash", create_env_script_path, yaml_file_path, conda_env_path]
212
+
213
+ elif mode == "update":
214
+ update_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/update_environment.sh')
215
+ command = ["bash", update_env_script_path, yaml_file_path, conda_env_path]
216
+
217
+ elif mode == "clone":
218
+ clone_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/clone_environment.sh')
219
+ command = ["bash", clone_env_script_path, source_path, conda_env_path]
220
+
221
+ else:
222
+ raise ValueError("Invalid mode. Use 'create', 'update', or 'clone'.")
223
+
224
+ process = await asyncio.create_subprocess_exec(
225
+ *command,
226
+ stdout=asyncio.subprocess.PIPE,
227
+ stderr=asyncio.subprocess.PIPE
228
+ )
229
+
230
+ if not log_file_location:
231
+ return process
232
+
233
+ with open(log_file_location, "a") as log_file:
234
+ success_detected = False
235
+ try:
236
+ # Write an initial log entry to indicate the operation has started
237
+ start_message = {
238
+ "timestamp": self.format_timestamp(),
239
+ "type": "log",
240
+ "content": f"Starting environment {mode} operation for {env_name}"
241
+ }
242
+ log_file.write(json.dumps(start_message) + "\n")
243
+ log_file.flush()
244
+
245
+ # Process stdout line by line
246
+ while True:
247
+ line = await process.stdout.readline()
248
+ if not line:
249
+ break
250
+
251
+ line = line.decode()
252
+ message = {
253
+ "timestamp": self.format_timestamp(),
254
+ "type": "log",
255
+ "content": line.strip()
256
+ }
257
+ log_file.write(json.dumps(message) + "\n")
258
+ log_file.flush()
259
+
260
+ if "environment creation successful" in line.lower():
261
+ success_detected = True
262
+
263
+ await process.wait() # Ensure process is complete
264
+
265
+ if process.returncode != 0:
266
+ error_message = await process.stderr.read()
267
+ error_message = error_message.decode().strip()
268
+ error_message_dict = {
269
+ "timestamp": self.format_timestamp(),
270
+ "type": "error",
271
+ "content": error_message
272
+ }
273
+ log_file.write(json.dumps(error_message_dict) + "\n")
274
+
275
+ final_build_status = "failed" if process.returncode != 0 else "success"
276
+
277
+ except asyncio.CancelledError:
278
+ process.kill()
279
+ msg_content = "Environment operation cancelled due to request cancellation."
280
+ cancellation_message = {
281
+ "timestamp": self.format_timestamp(),
282
+ "type": "error",
283
+ "content": msg_content
284
+ }
285
+ log_file.write(json.dumps(cancellation_message) + "\n")
286
+ final_build_status = "failed"
287
+
288
+ finally:
289
+ if final_build_status != "success" and version == 1:
290
+ if os.path.exists(conda_env_path):
291
+ shutil.rmtree(conda_env_path)
292
+
293
+ return final_build_status
294
+
295
+ except OSError as e:
296
+ self.logger.error(f"OS error while operating on {conda_env_path}: {e}")
297
+ return "failed"
298
+ except subprocess.CalledProcessError as e:
299
+ self.logger.error(f"Subprocess error during environment operation: {e}")
300
+ return "failed"
301
+ except Exception as e:
302
+ self.logger.error(f"Unexpected error during environment operation for {env_name}: {e}")
303
+ return "failed"
304
+
305
+ def _setup_logging(self, env_name: str, env_version: str, user_name: str, db: Session):
306
+ """
307
+ Sets up logging for environment operations.
308
+
309
+ Args:
310
+ env_name (str): Name of the environment
311
+ env_version (str): Version of the environment
312
+ user_name (str): Username who initiated the operation
313
+ db (Session): Database session
314
+
315
+ Returns:
316
+ str: Path to the log file
317
+ """
318
+ versioned_name = f"{env_name}_v{env_version}"
319
+ log_file_name = f"envlog_{versioned_name}.log"
320
+ log_file_dir = self.config.get_config_value('paths', 'env_logs_path')
321
+ os.makedirs(log_file_dir, exist_ok=True)
322
+ log_file_location = os.path.join(log_file_dir, log_file_name)
323
+
324
+ # Clear log file if it exists
325
+ if os.path.exists(log_file_location):
326
+ open(log_file_location, "w").close()
327
+
328
+ # Create job entry
329
+ self.create_job_entry(user_name, db, log_file_name, log_file_location)
330
+
331
+ return log_file_location
332
+
333
+ async def _update_job_status(self, log_file_name: str, build_status: str, log_file_location: str, db: Session):
334
+ """
335
+ Updates job status with retry logic.
336
+
337
+ Args:
338
+ db (Session): Database session
339
+ log_file_name (str): Name of the log file
340
+ build_status (str): Build status ('success' or 'failed')
341
+ log_file_location (str): Path to the log file
342
+ """
343
+ attempts = 3
344
+ retry_delay = 3
345
+
346
+ while attempts > 0:
347
+ try:
348
+ self.update_job_log(db, log_file_name, build_status)
349
+ break
350
+ except Exception as e:
351
+ attempts -= 1
352
+
353
+ with open(log_file_location, "a") as log_file:
354
+ msg_content = "Failed to commit job completion time to database."
355
+ error_message = {
356
+ "timestamp": self.format_timestamp(),
357
+ "type": "error",
358
+ "content": msg_content
359
+ }
360
+ log_file.write(json.dumps(error_message) + "\n")
361
+
362
+ if attempts > 0:
363
+ await asyncio.sleep(retry_delay)
364
+ else:
365
+ self.logger.error(f"Failed to update job log after multiple attempts: {e}")
366
+
367
+ def create_job_entry(self, user_name: str, db: Session, log_file_name: str, log_file_location: str):
368
+ """
369
+ Creates or updates a job entry for environment tracking.
370
+
371
+ Args:
372
+ user_name (str): The user who initiated the job
373
+ db (Session): Database session
374
+ log_file_name (str): Log file name
375
+ log_file_location (str): Log file path
376
+
377
+ Returns:
378
+ JobLogs: The created or updated job entry
379
+ """
380
+ job = db.query(JobLogs).filter(JobLogs.log_file_name == log_file_name).first()
381
+
382
+ if job:
383
+ if job.status == "success":
384
+ self.logger.error(f"Job with log_file_name '{log_file_name}' already completed successfully.")
385
+ raise ValueError(f"Job with log_file_name '{log_file_name}' already completed successfully.")
386
+ if job.status == "failed":
387
+ job.created_at = datetime.datetime.now()
388
+ job.status = "in_progress"
389
+ else:
390
+ job = JobLogs(
391
+ created_at=datetime.datetime.now(),
392
+ log_file_name=log_file_name,
393
+ log_file_location=log_file_location,
394
+ created_by=user_name,
395
+ status="in_progress"
396
+ )
397
+ db.add(job)
398
+
399
+ db.commit()
400
+ return job
401
+
402
+ def update_job_log(self, db, log_file_name, final_build_status):
403
+ """
404
+ Updates the JobLogs table with completion time and status.
405
+
406
+ Args:
407
+ db (Session): Database session
408
+ log_file_name (str): Name of the log file
409
+ final_build_status (str): Final status of the build ('success' or 'failed')
410
+ """
411
+ try:
412
+ job_record = db.query(JobLogs).filter(JobLogs.log_file_name == log_file_name).first()
413
+ if job_record:
414
+ job_record.completed_at = datetime.datetime.now()
415
+ job_record.status = final_build_status
416
+ db.commit()
417
+ else:
418
+ self.logger.error(f"No job log found for file: {log_file_name}")
419
+ raise ValueError(f"No job log found for file: {log_file_name}")
420
+ except Exception as e:
421
+ self.logger.error(f"Failed to update job log for {log_file_name}: {e}")
422
+ db.rollback()
423
+ raise
424
+
425
+ def format_timestamp(self):
426
+ """
427
+ Generates a formatted timestamp string representing the current date and time.
428
+
429
+ Returns:
430
+ str: A string representing the current date and time in the specified format.
431
+ """
432
+ return datetime.datetime.now().strftime("%b %d %I:%M:%S %p")
433
+
434
+ def update_environment_db(self, env_short_name, version, pip_libraries, conda_libraries, status, db: Session):
435
+ """
436
+ Updates the environment table with the new version and libraries.
437
+ """
438
+ try:
439
+ if isinstance(pip_libraries, list):
440
+ pip_libraries = ", ".join(pip_libraries)
441
+ if isinstance(conda_libraries, list):
442
+ conda_libraries = ", ".join(conda_libraries)
443
+ current_env = db.query(Environment).filter(Environment.short_name == env_short_name).first()
444
+ if not current_env:
445
+ raise ValueError(f"Environment with short name '{env_short_name}' does not exist.")
446
+
447
+ env_status = "Draft" if status == "success" else "Failed"
448
+
449
+ db.query(Environment).filter(
450
+ Environment.short_name == env_short_name
451
+ ).update({"version": version, "pip_libraries": pip_libraries, "conda_libraries": conda_libraries, "status": env_status})
452
+ db.commit()
453
+
454
+ except Exception as e:
455
+ self.logger.error(f"Failed to update environment {env_short_name} in database: {e}")
456
+ db.rollback()
457
+ raise
458
+
459
+ def update_library_versions(self, yaml_path: str):
460
+ """
461
+ Updates libraries without version specifications by getting their actual installed versions from a conda YAML file.
462
+
463
+ Args:
464
+ yaml_path (str): Path to the conda environment YAML file.
465
+
466
+ Returns:
467
+ tuple: Updated lists of (pip_libraries, conda_libraries) with version specifications.
468
+ """
469
+ try:
470
+ # Define default conda packages to ignore
471
+ default_conda_packages = {
472
+ "_libgcc_mutex", "_openmp_mutex", "bzip2", "ca-certificates",
473
+ "ld_impl_linux-64", "libexpat", "libffi", "libgcc", "libgcc-ng",
474
+ "libgomp", "liblzma", "libnsl", "libsqlite", "libuuid", "libxcrypt",
475
+ "libzlib", "ncurses", "openssl", "readline", "setuptools", "tk",
476
+ "tzdata", "wheel", "libstdcxx-ng", "python"
477
+ }
478
+
479
+ # Read the YAML file
480
+ with open(yaml_path, 'r') as f:
481
+ yaml_content = yaml.safe_load(f)
482
+
483
+ # Extract conda and pip dependencies
484
+ dependencies = yaml_content.get('dependencies', [])
485
+
486
+ # Process conda libraries
487
+ conda_libraries = []
488
+ pip_libraries = []
489
+
490
+ for dep in dependencies:
491
+ if isinstance(dep, str):
492
+ if dep.startswith("python="):
493
+ continue
494
+
495
+ parts = dep.split('=')
496
+ package_name = parts[0].strip()
497
+
498
+ if package_name.lower() not in default_conda_packages:
499
+ if len(parts) >= 2:
500
+ package_with_version = f"{package_name}={parts[1]}"
501
+ conda_libraries.append(package_with_version)
502
+ else:
503
+ # No version specified, keep as is
504
+ conda_libraries.append(dep)
505
+
506
+ elif isinstance(dep, dict) and 'pip' in dep:
507
+ # This is the pip section
508
+ for pip_pkg in dep['pip']:
509
+ pip_libraries.append(pip_pkg)
510
+
511
+ return pip_libraries, conda_libraries
512
+
513
+ except Exception as e:
514
+ self.logger.error(f"Error reading YAML file and extracting libraries: {str(e)}")
515
+ return [], []
516
+
517
+ except subprocess.CalledProcessError as e:
518
+ self.logger.error(f"Error running pip freeze: {e.stderr}")
519
+ return pip_libraries
520
+ except Exception as e:
521
+ self.logger.error(f"Error updating library versions: {str(e)}")
522
+ return pip_libraries
523
+
524
+ def create_conda_yaml(self, yaml_path, env_name, python_version, conda_packages, pip_packages):
525
+ """
526
+ Creates a conda environment YAML file with specified packages and channels.
527
+
528
+ Args:
529
+ yaml_path (str): Path where to save the YAML file
530
+ env_name (str): Name of the conda environment
531
+ python_version (str): Python version to use
532
+ conda_channels (list): List of conda channels
533
+ conda_packages (list): List of conda packages to install
534
+ pip_packages (list): List of pip packages to install
535
+ """
536
+ try:
537
+ # Create the environment specification
538
+ env_spec = {
539
+ "name": env_name,
540
+ "channels": ["conda-forge", "defaults"],
541
+ "dependencies": [
542
+ f"python={python_version}"
543
+ ]
544
+ }
545
+
546
+ # Add conda packages
547
+ if conda_packages and len(conda_packages) > 0:
548
+ env_spec["dependencies"].extend(conda_packages)
549
+
550
+ pip_pattern = re.compile(r"^pip([=]{1,2}.*)?$") # matches pip, pip=..., pip==...
551
+ pip_found = any(pip_pattern.match(pkg.strip()) for pkg in conda_packages)
552
+
553
+ # if pip is not already included in conda packages, add it
554
+ if not pip_found:
555
+ env_spec["dependencies"].append("pip")
556
+
557
+ # Add pip packages if any
558
+ if pip_packages and len(pip_packages) > 0:
559
+ pip_section = {
560
+ "pip": pip_packages
561
+ }
562
+ env_spec["dependencies"].append(pip_section)
563
+
564
+ with open(yaml_path, 'w') as yaml_file:
565
+ yaml.dump(env_spec, yaml_file, default_flow_style=False)
566
+
567
+ return yaml_path
568
+
569
+ except Exception as e:
570
+ self.logger.error(f"Failed to create conda environment YAML file: {str(e)}")
571
+ raise Exception(f"Failed to create conda environment YAML file: {str(e)}")
572
+
573
+ def format_py_requirements(self, env):
574
+ """
575
+ Format pip and conda libraries into a standardized list of dictionaries
576
+ sorted alphabetically by library name.
577
+
578
+ Args:
579
+ env: Environment object containing pip_libraries and conda_libraries strings
580
+
581
+ Returns:
582
+ list: List of dictionaries with format [{"name":"lib_name", "version":"version", "manager":"pip|conda"}, ...]
583
+ """
584
+ py_requirements = []
585
+
586
+ # process libraries, handle both '==' and '=' version specifications
587
+ if env.pip_libraries:
588
+ for lib in env.pip_libraries.split(','):
589
+ lib = lib.strip()
590
+ if not lib:
591
+ continue
592
+
593
+ if '==' in lib:
594
+ name, version = lib.split('==', 1)
595
+ py_requirements.append({
596
+ "name": name.strip(),
597
+ "version": version.strip(),
598
+ "manager": "pip"
599
+ })
600
+ elif '=' in lib:
601
+ name, version = lib.split('=', 1)
602
+ py_requirements.append({
603
+ "name": name.strip(),
604
+ "version": version.strip(),
605
+ "manager": "pip"
606
+ })
607
+ else:
608
+ py_requirements.append({
609
+ "name": lib,
610
+ "version": "",
611
+ "manager": "pip"
612
+ })
613
+
614
+ if env.conda_libraries:
615
+ for lib in env.conda_libraries.split(','):
616
+ lib = lib.strip()
617
+ if not lib:
618
+ continue
619
+
620
+ if '==' in lib:
621
+ name, version = lib.split('==', 1)
622
+ py_requirements.append({
623
+ "name": name.strip(),
624
+ "version": version.strip(),
625
+ "manager": "conda"
626
+ })
627
+ elif '=' in lib:
628
+ name, version = lib.split('=', 1)
629
+ py_requirements.append({
630
+ "name": name.strip(),
631
+ "version": version.strip(),
632
+ "manager": "conda"
633
+ })
634
+ else:
635
+ py_requirements.append({
636
+ "name": lib,
637
+ "version": "",
638
+ "manager": "conda"
639
+ })
640
+
641
+ # sort the requirements list alphabetically by name
642
+ py_requirements.sort(key=lambda x: x["name"].lower())
643
+
644
+ return py_requirements
@@ -15,22 +15,21 @@ class EnvironmentAttributes(Base):
15
15
  enabled = Column(Boolean, default=True)
16
16
  version = Column(String, default=0)
17
17
  is_latest = Column(Boolean, default=True)
18
- base_image_id = Column(Integer, default=None)
18
+ base_env_id = Column(Integer, default=None)
19
19
  short_name = Column(String(5))
20
20
  status = Column(String, default="Saved")
21
21
  icon = Column(String)
22
22
  py_version = Column(String)
23
23
  r_version = Column(String)
24
- py_requirements = Column(Text)
24
+ pip_libraries = Column(Text)
25
+ conda_libraries = Column(Text)
25
26
  r_requirements = Column(Text)
26
- py_requirements_compiled = Column(Text)
27
- r_requirements_compiled = Column(Text)
28
27
  created_date = Column(DateTime, server_default=func.now())
29
28
  created_by = Column(String)
30
29
 
31
30
 
32
31
 
33
- class Environment(EnvironmentAttributes):
32
+ class Environment(EnvironmentAttributes):
34
33
  __tablename__ = 'ENVIRONMENT'
35
34
 
36
35
  id = Column(Integer, primary_key=True, autoincrement=True)
@@ -43,7 +42,7 @@ class ArchivedEnvironment(EnvironmentAttributes):
43
42
  __tablename__ = 'ARCHIVED_ENVIRONMENT'
44
43
 
45
44
  id = Column(Integer, primary_key=True, autoincrement=True)
46
- original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id'))
45
+ original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id', ondelete='CASCADE'))
47
46
  is_latest = Column(Boolean, default=False)
48
47
 
49
48
  # Relationship with Environment