dataflow-core 2.1.2__py3-none-any.whl → 2.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dataflow/environment.py CHANGED
@@ -1,18 +1,21 @@
1
- import os, shutil, subprocess, datetime
1
+ import os, shutil, subprocess, datetime, yaml, re
2
2
  from .models.environment import JobLogs, Environment
3
3
  import json, asyncio, pkg_resources
4
4
  from sqlalchemy.orm import Session
5
5
  from .configuration import ConfigurationManager
6
+ from .utils.logger import CustomLogger
6
7
 
7
8
  class EnvironmentManager:
8
9
  def __init__(self):
9
10
  """Initialize the EnvironmentManager"""
10
11
  self.config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
11
- self.published_env_path = self.config.get_config_value('paths', 'published_env_path')
12
- self.draft_env_path = self.config.get_config_value('paths', 'drafts_env_path')
12
+ self.env_base_path = self.config.get_config_value('paths', 'env_path')
13
13
  self.env_logs_path = self.config.get_config_value('paths', 'env_logs_path')
14
+ self.env_version_path = self.config.get_config_value('paths', 'env_versions_path')
15
+ os.makedirs(self.env_version_path, exist_ok=True)
16
+ self.logger = CustomLogger().get_logger(__name__)
14
17
 
15
- async def create_env(self, env_name, py_version, py_requirements, status, base_env_id, env_version=None, user_name=None, db:Session=None):
18
+ async def create_env(self, env_name, py_version, pip_libraries, conda_libraries, status, env_version='1', user_name=None, db:Session=None):
16
19
  """
17
20
  Creates a conda environment with specified Python version and packages.
18
21
 
@@ -26,47 +29,56 @@ class EnvironmentManager:
26
29
  db (Session): Database session (optional, will create if None)
27
30
 
28
31
  Returns:
29
- str: Build status ('success' or 'fail')
32
+ str: Build status ('success' or 'failed')
30
33
  """
31
34
  # Set up logging
32
35
  log_file_location = None
33
36
  if db:
34
37
  log_file_location = self._setup_logging(env_name, env_version, user_name, db)
35
38
 
39
+ # Create the conda environment YAML file
40
+ yaml_path = os.path.join(self.env_version_path, f"{env_name}_v{env_version}.yaml")
41
+ self.create_conda_yaml(
42
+ yaml_path=yaml_path,
43
+ env_name=env_name,
44
+ python_version=py_version,
45
+ conda_packages=conda_libraries,
46
+ pip_packages=pip_libraries
47
+ )
48
+
36
49
  if status == "published":
37
50
  return await self._execute_env_operation(
38
51
  env_name=env_name,
39
- py_version=py_version,
40
- py_requirements=py_requirements,
41
52
  status="published",
42
- env_version=None,
43
- mode="create"
53
+ mode="create",
54
+ yaml_file_path=yaml_path,
55
+ version=int(env_version)
44
56
  )
45
57
  elif status == "draft":
46
- # Build the environment
58
+ mode = "create" if env_version == '1' else "update"
47
59
  build_status = await self._execute_env_operation(
48
60
  env_name=env_name,
49
- py_version=py_version,
50
- py_requirements=py_requirements,
51
61
  status=status,
52
- env_version=env_version,
62
+ mode=mode,
63
+ yaml_file_path=yaml_path,
53
64
  log_file_location=log_file_location,
54
- mode="create"
65
+ version=int(env_version)
55
66
  )
56
67
 
57
68
  # Update job log status if db was provided
58
69
  if db and log_file_location:
59
70
  log_file_name = os.path.basename(log_file_location)
60
71
  await self._update_job_status(log_file_name, build_status, log_file_location, db)
61
- updated_py_requirements = self.update_library_versions(py_requirements, os.path.join(self.draft_env_path, env_name, f"{env_name}_v{env_version}"))
62
- self.update_environment_db(env_name, env_version, updated_py_requirements, base_env_id, py_version, db)
72
+ pip_libraries, conda_libraries = self.update_library_versions(yaml_path)
73
+ self.update_environment_db(env_name, env_version, pip_libraries, conda_libraries, build_status, db)
63
74
 
64
75
  return build_status
65
76
 
66
77
  else:
78
+ self.logger.error(f"Invalid status '{status}' provided for environment creation.")
67
79
  raise ValueError("Invalid status. Use 'draft' or 'published'.")
68
80
 
69
- async def clone_env(self, source_path, target_env_name, libraries, py_version, user_name=None, db: Session=None):
81
+ async def clone_env(self, source_path, env_name, pip_libraries, conda_libraries, user_name, db=None):
70
82
  """
71
83
  Clones an existing conda environment.
72
84
 
@@ -79,56 +91,95 @@ class EnvironmentManager:
79
91
  db (Session): Database session (optional, will create if None)
80
92
 
81
93
  Returns:
82
- str: Build status ('success' or 'fail')
94
+ str: Build status ('success' or 'failed')
83
95
  """
84
96
  # Set up logging
85
97
  log_file_location = None
86
98
  if db:
87
- log_file_location = self._setup_logging(target_env_name, "1", user_name, db)
99
+ log_file_location = self._setup_logging(env_name, "1", user_name, db)
100
+
101
+ yaml_path = f"{self.env_version_path}/{env_name}_v1.yaml"
88
102
 
89
103
  # Perform the clone operation
90
104
  clone_status = await self._execute_env_operation(
91
- env_name=target_env_name,
92
- source_path=source_path,
105
+ env_name=env_name,
93
106
  status="draft",
94
- env_version="1",
107
+ mode="clone",
108
+ yaml_file_path=yaml_path,
109
+ source_path=source_path,
95
110
  log_file_location=log_file_location,
96
- mode="clone"
111
+ version=1
97
112
  )
98
113
 
99
114
  # Update job log status if db was provided
100
115
  if db and log_file_location:
101
116
  log_file_name = os.path.basename(log_file_location)
102
117
  await self._update_job_status(log_file_name, clone_status, log_file_location, db)
103
- self.update_environment_db(env_short_name=target_env_name, version="1", libraries=libraries, base_env_id=None, py_version=py_version, db=db)
118
+ self.update_environment_db(
119
+ env_short_name=env_name,
120
+ version="1",
121
+ pip_libraries=pip_libraries,
122
+ conda_libraries=conda_libraries,
123
+ status=clone_status,
124
+ db=db
125
+ )
104
126
 
105
127
  return clone_status
106
128
 
107
- async def create_published_env(self, env_name, py_version, py_requirements):
129
+ async def revert_env(self, env_name, curr_version, revert_version, new_version, user_name, db: Session):
108
130
  """
109
- Creates a published conda environment.
131
+ Reverts an environment to a previous version.
110
132
 
111
133
  Args:
112
134
  env_name (str): Name of the environment
113
- py_version (str): Python version to use
114
- py_requirements (list): List of packages to install
115
- status (str): Environment status ('draft' or 'published')
116
- env_version (str): Version of the environment (for draft environments)
117
- user_name (str): Username who initiated the creation
135
+ version (str): Version to revert to
136
+ db (Session): Database session
118
137
 
119
138
  Returns:
120
- str: Build status ('success' or 'fail')
139
+ str: Build status ('success' or 'failed')
121
140
  """
122
- return self._execute_env_operation(
123
- env_name=env_name,
124
- py_version=py_version,
125
- py_requirements=py_requirements,
126
- status="published",
127
- env_version=None,
128
- mode="create"
129
- )
141
+ try:
142
+ # Get the YAML file for the specified version
143
+ old_yaml_path = f"{self.env_version_path}/{env_name}_v{revert_version}.yaml"
144
+ new_yaml_path = f"{self.env_version_path}/{env_name}_v{new_version}.yaml"
145
+ if not os.path.exists(old_yaml_path):
146
+ raise FileNotFoundError(f"YAML file for version {revert_version} does not exist.")
147
+
148
+ os.symlink(old_yaml_path, new_yaml_path)
149
+
150
+ log_file_location = None
151
+ if db:
152
+ log_file_location = self._setup_logging(env_name, new_version, user_name, db)
153
+
154
+ # Execute the revert operation
155
+ revert_status = await self._execute_env_operation(
156
+ env_name=env_name,
157
+ status="draft",
158
+ mode="update",
159
+ yaml_file_path=new_yaml_path,
160
+ log_file_location=log_file_location,
161
+ version=int(new_version)
162
+ )
163
+
164
+ log_file_name = os.path.basename(log_file_location)
165
+ await self._update_job_status(log_file_name, revert_status, log_file_location, db)
166
+ pip_libraries, conda_libraries = self.update_library_versions(new_yaml_path)
167
+ self.update_environment_db(env_name, new_version, pip_libraries, conda_libraries, revert_status, db)
130
168
 
131
- async def _execute_env_operation(self, env_name: str, status: str, mode: str, env_version: str = None, py_version: str = None, py_requirements=None, source_path=None, log_file_location=None):
169
+ except Exception as e:
170
+ self.logger.error(f"Failed to revert environment {env_name}: {e}")
171
+ return "failed"
172
+
173
+ async def _execute_env_operation(
174
+ self,
175
+ env_name: str,
176
+ status: str,
177
+ mode: str,
178
+ yaml_file_path: str,
179
+ version: int,
180
+ source_path=None,
181
+ log_file_location=None,
182
+ ):
132
183
  """
133
184
  Executes environment operations (create or clone).
134
185
 
@@ -143,34 +194,32 @@ class EnvironmentManager:
143
194
  log_file_location (str): Path to log file
144
195
 
145
196
  Returns:
146
- str: Build status ('success' or 'fail')
197
+ str: Build status ('success' or 'failed')
147
198
  """
199
+ self.logger.info(f"Executing environment operation: {env_name}, Status: {status}, Mode: {mode}")
148
200
  status = status.lower()
149
- if status == "published":
150
- env_base_path = self.config.get_config_value('paths', 'published_env_path')
151
- conda_env_path = os.path.join(env_base_path, env_name)
152
- else:
153
- env_base_path = self.config.get_config_value('paths', 'drafts_env_path')
154
- conda_env_path = os.path.join(env_base_path, env_name, f"{env_name}_v{env_version}")
201
+ conda_env_path = os.path.join(self.env_base_path, env_name)
155
202
 
156
203
  try:
157
- if not os.path.exists(conda_env_path):
158
- os.makedirs(conda_env_path, exist_ok=True)
204
+ if os.path.exists(conda_env_path) and mode == "create":
205
+ raise FileExistsError(f"Environment '{env_name}' already exists at {conda_env_path}.")
206
+
207
+ os.makedirs(conda_env_path, exist_ok=True)
159
208
 
160
- if mode == "create":
161
- # Convert requirements list to comma-separated string
162
- if isinstance(py_requirements, list):
163
- py_requirements = ",".join(py_requirements)
164
-
209
+ if mode == "create":
165
210
  create_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/create_environment.sh')
166
- command = ["bash", create_env_script_path, py_requirements, conda_env_path, py_version]
167
-
211
+ command = ["bash", create_env_script_path, yaml_file_path, conda_env_path]
212
+
213
+ elif mode == "update":
214
+ update_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/update_environment.sh')
215
+ command = ["bash", update_env_script_path, yaml_file_path, conda_env_path]
216
+
168
217
  elif mode == "clone":
169
218
  clone_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/clone_environment.sh')
170
219
  command = ["bash", clone_env_script_path, source_path, conda_env_path]
171
220
 
172
221
  else:
173
- raise ValueError("Invalid mode. Use 'create' or 'clone'.")
222
+ raise ValueError("Invalid mode. Use 'create', 'update', or 'clone'.")
174
223
 
175
224
  process = await asyncio.create_subprocess_exec(
176
225
  *command,
@@ -184,6 +233,16 @@ class EnvironmentManager:
184
233
  with open(log_file_location, "a") as log_file:
185
234
  success_detected = False
186
235
  try:
236
+ # Write an initial log entry to indicate the operation has started
237
+ start_message = {
238
+ "timestamp": self.format_timestamp(),
239
+ "type": "log",
240
+ "content": f"Starting environment {mode} operation for {env_name}"
241
+ }
242
+ log_file.write(json.dumps(start_message) + "\n")
243
+ log_file.flush()
244
+
245
+ # Process stdout line by line
187
246
  while True:
188
247
  line = await process.stdout.readline()
189
248
  if not line:
@@ -213,7 +272,7 @@ class EnvironmentManager:
213
272
  }
214
273
  log_file.write(json.dumps(error_message_dict) + "\n")
215
274
 
216
- final_build_status = "fail" if process.returncode != 0 else "success"
275
+ final_build_status = "failed" if process.returncode != 0 else "success"
217
276
 
218
277
  except asyncio.CancelledError:
219
278
  process.kill()
@@ -224,27 +283,24 @@ class EnvironmentManager:
224
283
  "content": msg_content
225
284
  }
226
285
  log_file.write(json.dumps(cancellation_message) + "\n")
227
- final_build_status = "fail"
286
+ final_build_status = "failed"
228
287
 
229
288
  finally:
230
- if final_build_status == "success" and status == "draft":
231
- symlink_path = os.path.join(env_base_path, env_name, "default")
232
- self.update_symlink(symlink_path, conda_env_path)
233
- elif final_build_status != "success":
289
+ if final_build_status != "success" and version == 1:
234
290
  if os.path.exists(conda_env_path):
235
291
  shutil.rmtree(conda_env_path)
236
292
 
237
293
  return final_build_status
238
294
 
239
295
  except OSError as e:
240
- print(f"OS error while operating on {conda_env_path}: {e}")
241
- return "fail"
296
+ self.logger.error(f"OS error while operating on {conda_env_path}: {e}")
297
+ return "failed"
242
298
  except subprocess.CalledProcessError as e:
243
- print(f"Subprocess error during environment operation: {e}")
244
- return "fail"
299
+ self.logger.error(f"Subprocess error during environment operation: {e}")
300
+ return "failed"
245
301
  except Exception as e:
246
- print(f"Unexpected error during environment operation for {env_name}: {e}")
247
- return "fail"
302
+ self.logger.error(f"Unexpected error during environment operation for {env_name}: {e}")
303
+ return "failed"
248
304
 
249
305
  def _setup_logging(self, env_name: str, env_version: str, user_name: str, db: Session):
250
306
  """
@@ -281,7 +337,7 @@ class EnvironmentManager:
281
337
  Args:
282
338
  db (Session): Database session
283
339
  log_file_name (str): Name of the log file
284
- build_status (str): Build status ('success' or 'fail')
340
+ build_status (str): Build status ('success' or 'failed')
285
341
  log_file_location (str): Path to the log file
286
342
  """
287
343
  attempts = 3
@@ -306,7 +362,7 @@ class EnvironmentManager:
306
362
  if attempts > 0:
307
363
  await asyncio.sleep(retry_delay)
308
364
  else:
309
- print(f"Failed to update job log after multiple attempts: {e}")
365
+ self.logger.error(f"Failed to update job log after multiple attempts: {e}")
310
366
 
311
367
  def create_job_entry(self, user_name: str, db: Session, log_file_name: str, log_file_location: str):
312
368
  """
@@ -325,8 +381,9 @@ class EnvironmentManager:
325
381
 
326
382
  if job:
327
383
  if job.status == "success":
384
+ self.logger.error(f"Job with log_file_name '{log_file_name}' already completed successfully.")
328
385
  raise ValueError(f"Job with log_file_name '{log_file_name}' already completed successfully.")
329
- if job.status == "fail":
386
+ if job.status == "failed":
330
387
  job.created_at = datetime.datetime.now()
331
388
  job.status = "in_progress"
332
389
  else:
@@ -349,7 +406,7 @@ class EnvironmentManager:
349
406
  Args:
350
407
  db (Session): Database session
351
408
  log_file_name (str): Name of the log file
352
- final_build_status (str): Final status of the build ('success' or 'fail')
409
+ final_build_status (str): Final status of the build ('success' or 'failed')
353
410
  """
354
411
  try:
355
412
  job_record = db.query(JobLogs).filter(JobLogs.log_file_name == log_file_name).first()
@@ -358,24 +415,12 @@ class EnvironmentManager:
358
415
  job_record.status = final_build_status
359
416
  db.commit()
360
417
  else:
418
+ self.logger.error(f"No job log found for file: {log_file_name}")
361
419
  raise ValueError(f"No job log found for file: {log_file_name}")
362
420
  except Exception as e:
421
+ self.logger.error(f"Failed to update job log for {log_file_name}: {e}")
363
422
  db.rollback()
364
423
  raise
365
-
366
- def update_symlink(self, symlink_path, conda_env_path):
367
- """
368
- Creates or updates the symlink to point to the default version.
369
- """
370
- symlink_dir = os.path.dirname(symlink_path)
371
- if not os.path.exists(symlink_dir):
372
- os.makedirs(symlink_dir, exist_ok=True)
373
-
374
- # If symlink exists, remove it before updating
375
- if os.path.islink(symlink_path):
376
- os.remove(symlink_path)
377
-
378
- subprocess.run(["ln", "-sf", conda_env_path, symlink_path], check=True)
379
424
 
380
425
  def format_timestamp(self):
381
426
  """
@@ -386,73 +431,214 @@ class EnvironmentManager:
386
431
  """
387
432
  return datetime.datetime.now().strftime("%b %d %I:%M:%S %p")
388
433
 
389
- def update_environment_db(self, env_short_name, version, libraries, base_env_id, py_version, db: Session):
434
+ def update_environment_db(self, env_short_name, version, pip_libraries, conda_libraries, status, db: Session):
390
435
  """
391
436
  Updates the environment table with the new version and libraries.
392
437
  """
393
438
  try:
394
- if isinstance(libraries, list):
395
- libraries = ", ".join(libraries)
439
+ if isinstance(pip_libraries, list):
440
+ pip_libraries = ", ".join(pip_libraries)
441
+ if isinstance(conda_libraries, list):
442
+ conda_libraries = ", ".join(conda_libraries)
396
443
  current_env = db.query(Environment).filter(Environment.short_name == env_short_name).first()
397
- status = "Draft" if current_env and current_env.status == "Saved" else current_env.status
398
- db.query(Environment).filter(Environment.short_name == env_short_name).update({"version": version, "py_requirements": libraries,"base_image_id": base_env_id,"py_version": py_version,"status": status})
444
+ if not current_env:
445
+ raise ValueError(f"Environment with short name '{env_short_name}' does not exist.")
446
+
447
+ env_status = "Draft" if status == "success" else "Failed"
448
+
449
+ db.query(Environment).filter(
450
+ Environment.short_name == env_short_name
451
+ ).update({"version": version, "pip_libraries": pip_libraries, "conda_libraries": conda_libraries, "status": env_status})
399
452
  db.commit()
400
453
 
401
454
  except Exception as e:
455
+ self.logger.error(f"Failed to update environment {env_short_name} in database: {e}")
402
456
  db.rollback()
403
457
  raise
404
458
 
405
-
406
- def update_library_versions(self, libraries: list, conda_env_path: str) -> list:
459
+ def update_library_versions(self, yaml_path: str):
407
460
  """
408
- Updates libraries without version specifications by getting their actual installed versions.
461
+ Updates libraries without version specifications by getting their actual installed versions from a conda YAML file.
409
462
 
410
463
  Args:
411
- libraries (list): List of library requirements, some may not have version specs.
412
- conda_env_path (str): Path to the conda environment where libraries are installed.
464
+ yaml_path (str): Path to the conda environment YAML file.
413
465
 
414
466
  Returns:
415
- list: Updated list of libraries with version specifications.
467
+ tuple: Updated lists of (pip_libraries, conda_libraries) with version specifications.
416
468
  """
417
469
  try:
418
- pip_freeze_cmd = f"{conda_env_path}/bin/pip freeze"
419
- result = subprocess.run(
420
- pip_freeze_cmd,
421
- shell=True,
422
- capture_output=True,
423
- text=True,
424
- check=True
425
- )
470
+ # Define default conda packages to ignore
471
+ default_conda_packages = {
472
+ "_libgcc_mutex", "_openmp_mutex", "bzip2", "ca-certificates",
473
+ "ld_impl_linux-64", "libexpat", "libffi", "libgcc", "libgcc-ng",
474
+ "libgomp", "liblzma", "libnsl", "libsqlite", "libuuid", "libxcrypt",
475
+ "libzlib", "ncurses", "openssl", "readline", "setuptools", "tk",
476
+ "tzdata", "wheel", "libstdcxx-ng", "python"
477
+ }
426
478
 
427
- installed_versions = {}
428
- for line in result.stdout.splitlines():
429
- if "==" in line:
430
- lib_name, version = line.split("==", 1)
431
- installed_versions[lib_name.lower()] = version
479
+ # Read the YAML file
480
+ with open(yaml_path, 'r') as f:
481
+ yaml_content = yaml.safe_load(f)
432
482
 
433
- # Update libraries without version specs
434
- updated_libraries = []
435
- for lib in libraries:
436
- # Skip libraries that are python version specifications
437
- if lib.lower().startswith("python=="):
438
- continue
439
-
440
- if "==" not in lib:
441
- lib_name = lib.strip()
442
- lib_name_lower = lib_name.lower()
483
+ # Extract conda and pip dependencies
484
+ dependencies = yaml_content.get('dependencies', [])
485
+
486
+ # Process conda libraries
487
+ conda_libraries = []
488
+ pip_libraries = []
489
+
490
+ for dep in dependencies:
491
+ if isinstance(dep, str):
492
+ if dep.startswith("python="):
493
+ continue
443
494
 
444
- if lib_name_lower in installed_versions:
445
- updated_libraries.append(f"{lib_name}=={installed_versions[lib_name_lower]}")
446
- else:
447
- updated_libraries.append(lib)
448
- else:
449
- updated_libraries.append(lib)
495
+ parts = dep.split('=')
496
+ package_name = parts[0].strip()
450
497
 
451
- return updated_libraries
498
+ if package_name.lower() not in default_conda_packages:
499
+ if len(parts) >= 2:
500
+ package_with_version = f"{package_name}={parts[1]}"
501
+ conda_libraries.append(package_with_version)
502
+ else:
503
+ # No version specified, keep as is
504
+ conda_libraries.append(dep)
505
+
506
+ elif isinstance(dep, dict) and 'pip' in dep:
507
+ # This is the pip section
508
+ for pip_pkg in dep['pip']:
509
+ pip_libraries.append(pip_pkg)
510
+
511
+ return pip_libraries, conda_libraries
512
+
513
+ except Exception as e:
514
+ self.logger.error(f"Error reading YAML file and extracting libraries: {str(e)}")
515
+ return [], []
452
516
 
453
517
  except subprocess.CalledProcessError as e:
454
- print(f"Error running pip freeze: {e.stderr}")
455
- return libraries
518
+ self.logger.error(f"Error running pip freeze: {e.stderr}")
519
+ return pip_libraries
520
+ except Exception as e:
521
+ self.logger.error(f"Error updating library versions: {str(e)}")
522
+ return pip_libraries
523
+
524
+ def create_conda_yaml(self, yaml_path, env_name, python_version, conda_packages, pip_packages):
525
+ """
526
+ Creates a conda environment YAML file with specified packages and channels.
527
+
528
+ Args:
529
+ yaml_path (str): Path where to save the YAML file
530
+ env_name (str): Name of the conda environment
531
+ python_version (str): Python version to use
532
+ conda_channels (list): List of conda channels
533
+ conda_packages (list): List of conda packages to install
534
+ pip_packages (list): List of pip packages to install
535
+ """
536
+ try:
537
+ # Create the environment specification
538
+ env_spec = {
539
+ "name": env_name,
540
+ "channels": ["conda-forge", "defaults"],
541
+ "dependencies": [
542
+ f"python={python_version}"
543
+ ]
544
+ }
545
+
546
+ # Add conda packages
547
+ if conda_packages and len(conda_packages) > 0:
548
+ env_spec["dependencies"].extend(conda_packages)
549
+
550
+ pip_pattern = re.compile(r"^pip([=]{1,2}.*)?$") # matches pip, pip=..., pip==...
551
+ pip_found = any(pip_pattern.match(pkg.strip()) for pkg in conda_packages)
552
+
553
+ # if pip is not already included in conda packages, add it
554
+ if not pip_found:
555
+ env_spec["dependencies"].append("pip")
556
+
557
+ # Add pip packages if any
558
+ if pip_packages and len(pip_packages) > 0:
559
+ pip_section = {
560
+ "pip": pip_packages
561
+ }
562
+ env_spec["dependencies"].append(pip_section)
563
+
564
+ with open(yaml_path, 'w') as yaml_file:
565
+ yaml.dump(env_spec, yaml_file, default_flow_style=False)
566
+
567
+ return yaml_path
568
+
456
569
  except Exception as e:
457
- print(f"Error updating library versions: {str(e)}")
458
- return libraries
570
+ self.logger.error(f"Failed to create conda environment YAML file: {str(e)}")
571
+ raise Exception(f"Failed to create conda environment YAML file: {str(e)}")
572
+
573
+ def format_py_requirements(self, env):
574
+ """
575
+ Format pip and conda libraries into a standardized list of dictionaries
576
+ sorted alphabetically by library name.
577
+
578
+ Args:
579
+ env: Environment object containing pip_libraries and conda_libraries strings
580
+
581
+ Returns:
582
+ list: List of dictionaries with format [{"name":"lib_name", "version":"version", "manager":"pip|conda"}, ...]
583
+ """
584
+ py_requirements = []
585
+
586
+ # process libraries, handle both '==' and '=' version specifications
587
+ if env.pip_libraries:
588
+ for lib in env.pip_libraries.split(','):
589
+ lib = lib.strip()
590
+ if not lib:
591
+ continue
592
+
593
+ if '==' in lib:
594
+ name, version = lib.split('==', 1)
595
+ py_requirements.append({
596
+ "name": name.strip(),
597
+ "version": version.strip(),
598
+ "manager": "pip"
599
+ })
600
+ elif '=' in lib:
601
+ name, version = lib.split('=', 1)
602
+ py_requirements.append({
603
+ "name": name.strip(),
604
+ "version": version.strip(),
605
+ "manager": "pip"
606
+ })
607
+ else:
608
+ py_requirements.append({
609
+ "name": lib,
610
+ "version": "",
611
+ "manager": "pip"
612
+ })
613
+
614
+ if env.conda_libraries:
615
+ for lib in env.conda_libraries.split(','):
616
+ lib = lib.strip()
617
+ if not lib:
618
+ continue
619
+
620
+ if '==' in lib:
621
+ name, version = lib.split('==', 1)
622
+ py_requirements.append({
623
+ "name": name.strip(),
624
+ "version": version.strip(),
625
+ "manager": "conda"
626
+ })
627
+ elif '=' in lib:
628
+ name, version = lib.split('=', 1)
629
+ py_requirements.append({
630
+ "name": name.strip(),
631
+ "version": version.strip(),
632
+ "manager": "conda"
633
+ })
634
+ else:
635
+ py_requirements.append({
636
+ "name": lib,
637
+ "version": "",
638
+ "manager": "conda"
639
+ })
640
+
641
+ # sort the requirements list alphabetically by name
642
+ py_requirements.sort(key=lambda x: x["name"].lower())
643
+
644
+ return py_requirements
@@ -15,22 +15,21 @@ class EnvironmentAttributes(Base):
15
15
  enabled = Column(Boolean, default=True)
16
16
  version = Column(String, default=0)
17
17
  is_latest = Column(Boolean, default=True)
18
- base_image_id = Column(Integer, default=None)
18
+ base_env_id = Column(Integer, default=None)
19
19
  short_name = Column(String(5))
20
20
  status = Column(String, default="Saved")
21
21
  icon = Column(String)
22
22
  py_version = Column(String)
23
23
  r_version = Column(String)
24
- py_requirements = Column(Text)
24
+ pip_libraries = Column(Text)
25
+ conda_libraries = Column(Text)
25
26
  r_requirements = Column(Text)
26
- py_requirements_compiled = Column(Text)
27
- r_requirements_compiled = Column(Text)
28
27
  created_date = Column(DateTime, server_default=func.now())
29
28
  created_by = Column(String)
30
29
 
31
30
 
32
31
 
33
- class Environment(EnvironmentAttributes):
32
+ class Environment(EnvironmentAttributes):
34
33
  __tablename__ = 'ENVIRONMENT'
35
34
 
36
35
  id = Column(Integer, primary_key=True, autoincrement=True)
@@ -43,7 +42,7 @@ class ArchivedEnvironment(EnvironmentAttributes):
43
42
  __tablename__ = 'ARCHIVED_ENVIRONMENT'
44
43
 
45
44
  id = Column(Integer, primary_key=True, autoincrement=True)
46
- original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id'))
45
+ original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id', ondelete='CASCADE'))
47
46
  is_latest = Column(Boolean, default=False)
48
47
 
49
48
  # Relationship with Environment
@@ -10,7 +10,7 @@ class EnvironmentStatus(Base):
10
10
 
11
11
  __tablename__='ENVIRONMENT_STATUS'
12
12
 
13
- id = Column(Integer, ForeignKey('ENVIRONMENT.id'), primary_key=True, nullable=False)
13
+ id = Column(Integer, ForeignKey('ENVIRONMENT.id', ondelete='CASCADE'), primary_key=True, nullable=False)
14
14
  status = Column(String, nullable=False)
15
15
  comment = Column(String)
16
16
  status_changed_date = Column(DateTime, server_default=func.now(), nullable=False)
@@ -12,4 +12,6 @@ trap 'rm -rf "$CONDA_PKGS_DIRS"' EXIT
12
12
  # 1. Cloning conda env
13
13
  conda create --clone ${source_env_name} --prefix ${target_env_path} --yes
14
14
 
15
+ conda env export --prefix "$conda_env_path" > "$yaml_file_path"
16
+
15
17
  echo "Environment Creation Successful"
@@ -1,17 +1,29 @@
1
1
  #!/bin/bash
2
+ # filepath: /home/hari/dbo/dataflow-core/dataflow/scripts/create_environment.sh
2
3
  set -e
3
4
 
4
- IFS=',' read -r -a libraries <<< $1
5
+ # Accept new parameters
6
+ yaml_file_path=$1
5
7
  conda_env_path=$2
6
- py_version=$3
8
+
9
+ # Validate inputs
10
+ if [ -z "$yaml_file_path" ] || [ -z "$conda_env_path" ]; then
11
+ echo "Error: Missing required parameters"
12
+ exit 1
13
+ fi
14
+
15
+ if [ ! -f "$yaml_file_path" ]; then
16
+ echo "Error: YAML file does not exist: $yaml_file_path"
17
+ exit 1
18
+ fi
7
19
 
8
20
  # Use an isolated conda package cache to avoid concurrency issues
9
21
  export CONDA_PKGS_DIRS=$(mktemp -d)
22
+
10
23
  # to delete conda package cache after script finishes
11
24
  trap 'rm -rf "$CONDA_PKGS_DIRS"' EXIT
12
25
 
13
- # 1. Creating conda environment
14
- conda create --prefix ${conda_env_path} --yes python=${py_version}
26
+ # Create the conda environment from the YAML file
27
+ conda env create --file "$yaml_file_path" --prefix "$conda_env_path" --yes
15
28
 
16
- # 2. Install user libraries
17
- ${conda_env_path}/bin/pip install --root-user-action ignore ${libraries[@]}
29
+ conda env export --prefix "$conda_env_path" > "$yaml_file_path"
@@ -0,0 +1,36 @@
1
+ #!/bin/bash
2
+ # filepath: /home/hari/dbo/dataflow-core/dataflow/scripts/update_environment.sh
3
+ set -e
4
+
5
+ # Accept parameters
6
+ yaml_file_path=$1
7
+ conda_env_path=$2
8
+
9
+ # Validate inputs
10
+ if [ -z "$yaml_file_path" ] || [ -z "$conda_env_path" ]; then
11
+ echo "Error: Missing required parameters"
12
+ exit 1
13
+ fi
14
+
15
+ if [ ! -f "$yaml_file_path" ]; then
16
+ echo "Error: YAML file does not exist: $yaml_file_path"
17
+ exit 1
18
+ fi
19
+
20
+ if [ ! -d "$conda_env_path" ]; then
21
+ echo "Error: Conda environment does not exist at: $conda_env_path"
22
+ exit 1
23
+ fi
24
+
25
+ # Use an isolated conda package cache to avoid concurrency issues
26
+ export CONDA_PKGS_DIRS=$(mktemp -d)
27
+
28
+ # to delete conda package cache after script finishes
29
+ trap 'rm -rf "$CONDA_PKGS_DIRS"' EXIT
30
+
31
+ # Update the conda environment using the YAML file
32
+ conda env update --prefix "$conda_env_path" --file "$yaml_file_path" --prune
33
+
34
+ if [ ! -L "$yaml_file_path" ]; then
35
+ conda env export --prefix "$conda_env_path" > "$yaml_file_path"
36
+ fi
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataflow-core
3
- Version: 2.1.2
3
+ Version: 2.1.3
4
4
  Summary: Dataflow core package
5
5
  Author: Dataflow
6
6
  Author-email:
@@ -7,13 +7,13 @@ dataflow/configuration.py,sha256=7To6XwH1eESiYp39eqPcswXWwrdBUdPF6xN6WnazOF0,663
7
7
  dataflow/database_manager.py,sha256=tJHMuOZ9Muskrh9t4uLRlTuFU0VkHAzoHlGP5DORIC4,899
8
8
  dataflow/dataflow.py,sha256=-UYZst7EO1GgaOjlAkKu-tu7RC6XsgadGeDp1MOvZiA,7439
9
9
  dataflow/db.py,sha256=5UwE4w5Vn9RqFIhr8ARlu2haZX-PtHDLRPjmn5BG2m8,1649
10
- dataflow/environment.py,sha256=04LwJwAjciBOz-EJJGzBA_BSQRKf587TJ30R8cDzvhg,19582
10
+ dataflow/environment.py,sha256=eAWYhQFM7CA3o-CA3qTWbkE8ZHZ-Jo2qvSkDQ4dKqmY,27238
11
11
  dataflow/models/__init__.py,sha256=QMLiKj8BMhfScWMm8kgHkMjwAlFeg5Cym3_AI1NvBUA,783
12
12
  dataflow/models/app_types.py,sha256=yE_ZB13lhpK7AZ7PyBwnQlf0RlIHYs_-vdMKx7_RMlY,379
13
13
  dataflow/models/blacklist_library.py,sha256=B2oi3Z8GcR_glhLAyinFk0W8c9txXvm3uOER6dY-q7I,991
14
14
  dataflow/models/connection.py,sha256=_VJL3KuIrm8t4lJmtunIL3-AXF9Yvi5wUolzdR3tE0E,1017
15
- dataflow/models/environment.py,sha256=HQW1L0qKksFR0qeQQbwcPjd-zcZwS1PEvzMOFrKgnpE,2142
16
- dataflow/models/environment_status.py,sha256=GnoAKI8GdCTqTq4HLhx16K2k--LMpjeJuRRRjGfAXoA,516
15
+ dataflow/models/environment.py,sha256=Vg-4vQe_cHIfOX-kPJWd6SIEapPJcgxoCITgdRrqT_o,2107
16
+ dataflow/models/environment_status.py,sha256=lvPDNUsUoTW9D97B07aKqJQHRKp4LvPM28pQDMPH1ac,536
17
17
  dataflow/models/git_ssh.py,sha256=W15SDypxzGOz_aZkHEnVZ6DIMVsjAsbSIXVIEt2mPYU,694
18
18
  dataflow/models/pinned_projects.py,sha256=rkpPX_f2U9HjmrRo7_K8rnZIeXuQKGq6hYTrtLmu21c,566
19
19
  dataflow/models/project_details.py,sha256=94wTygXv9iGB0w8g_6vtkB5ZqIzpEv1W9uWwCA4hM0Y,1078
@@ -29,15 +29,16 @@ dataflow/models/user.py,sha256=PT-zwZj7NWUubIj_7EY2EsjduMbI_42EyMclWMLESGk,1073
29
29
  dataflow/models/user_environment.py,sha256=yI9NutULcLiwlycuEin6ROe6o1Sjdv_sgw2MEkJFeYg,568
30
30
  dataflow/models/user_team.py,sha256=r_fmKvf6JuGgiiI9TXWjVG2QZ3WOvDrOwYWVQ3r8oWo,659
31
31
  dataflow/models/variables.py,sha256=Sinvv3zFYni5i_GrL69cVfhCh4tOOaIHiEzWYRJ-i10,1132
32
- dataflow/scripts/clone_environment.sh,sha256=PJfMWPgiWSkvY-x98WmjeAkRREjC9824JzTazHe2iQQ,390
33
- dataflow/scripts/create_environment.sh,sha256=ams50MD1r53cHRYDfpAvmEAMsCaCFvlL0cmnRVXhFgY,496
32
+ dataflow/scripts/clone_environment.sh,sha256=dVs-NAGHtpYsk-OjoZ_gbYuZZoi3jIbEp_zXbDXEVbc,455
33
+ dataflow/scripts/create_environment.sh,sha256=TLJ7FKYyhsLe0bqBy74FnpuvjFTSHiGXp1iLMcOaeJA,798
34
+ dataflow/scripts/update_environment.sh,sha256=p8r2qV4blqLyC7eksHSkUDoXx_UL4Xc4NWmx8y0h_rc,962
34
35
  dataflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
36
  dataflow/utils/aws_secrets_manager.py,sha256=A_fNs9VNah9dDdl9NhqizJamYU7xr2v_GXlw9InEDFk,2380
36
37
  dataflow/utils/get_current_user.py,sha256=akjcUyTpmMdAZj9LFGSTs76hjBRjltNk9hLUqC_BdkA,1140
37
38
  dataflow/utils/json_handler.py,sha256=5_7WdypegRBDe2HSqBXyrJAdd92wsha8qRcmQvCj1TA,782
38
39
  dataflow/utils/logger.py,sha256=7BFrOq5Oiqn8P4XZbgJzMP5O07d2fpdECbbfsjrUuHw,1213
39
- dataflow_core-2.1.2.dist-info/METADATA,sha256=4eTzui9zX33CbX6cQdj1p2uzZIq6OMD-ex_mAWzEY2E,301
40
- dataflow_core-2.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
- dataflow_core-2.1.2.dist-info/entry_points.txt,sha256=ppj_EIbYrJJwCPg1kfdsZk5q1N-Ejfis1neYrnjhO8o,117
42
- dataflow_core-2.1.2.dist-info/top_level.txt,sha256=SZsUOpSCK9ntUy-3Tusxzf5A2e8ebwD8vouPb1dPt_8,23
43
- dataflow_core-2.1.2.dist-info/RECORD,,
40
+ dataflow_core-2.1.3.dist-info/METADATA,sha256=dRhOgxGmyMCLl9eJQ0XfExl5Fp0BYZzdJfYGgvVS3Mo,301
41
+ dataflow_core-2.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
42
+ dataflow_core-2.1.3.dist-info/entry_points.txt,sha256=ppj_EIbYrJJwCPg1kfdsZk5q1N-Ejfis1neYrnjhO8o,117
43
+ dataflow_core-2.1.3.dist-info/top_level.txt,sha256=SZsUOpSCK9ntUy-3Tusxzf5A2e8ebwD8vouPb1dPt_8,23
44
+ dataflow_core-2.1.3.dist-info/RECORD,,