dataflow-core 2.1.2__py3-none-any.whl → 2.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dataflow-core might be problematic. Click here for more details.
- dataflow/environment.py +319 -133
- dataflow/models/environment.py +5 -6
- dataflow/models/environment_status.py +1 -1
- dataflow/scripts/clone_environment.sh +8 -4
- dataflow/scripts/create_environment.sh +23 -10
- dataflow/scripts/update_environment.sh +37 -0
- {dataflow_core-2.1.2.dist-info → dataflow_core-2.1.4.dist-info}/METADATA +1 -1
- {dataflow_core-2.1.2.dist-info → dataflow_core-2.1.4.dist-info}/RECORD +11 -10
- {dataflow_core-2.1.2.dist-info → dataflow_core-2.1.4.dist-info}/WHEEL +0 -0
- {dataflow_core-2.1.2.dist-info → dataflow_core-2.1.4.dist-info}/entry_points.txt +0 -0
- {dataflow_core-2.1.2.dist-info → dataflow_core-2.1.4.dist-info}/top_level.txt +0 -0
dataflow/environment.py
CHANGED
|
@@ -1,18 +1,21 @@
|
|
|
1
|
-
import os, shutil, subprocess, datetime
|
|
1
|
+
import os, shutil, subprocess, datetime, yaml, re
|
|
2
2
|
from .models.environment import JobLogs, Environment
|
|
3
3
|
import json, asyncio, pkg_resources
|
|
4
4
|
from sqlalchemy.orm import Session
|
|
5
5
|
from .configuration import ConfigurationManager
|
|
6
|
+
from .utils.logger import CustomLogger
|
|
6
7
|
|
|
7
8
|
class EnvironmentManager:
|
|
8
9
|
def __init__(self):
|
|
9
10
|
"""Initialize the EnvironmentManager"""
|
|
10
11
|
self.config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
|
|
11
|
-
self.
|
|
12
|
-
self.draft_env_path = self.config.get_config_value('paths', 'drafts_env_path')
|
|
12
|
+
self.env_base_path = self.config.get_config_value('paths', 'env_path')
|
|
13
13
|
self.env_logs_path = self.config.get_config_value('paths', 'env_logs_path')
|
|
14
|
+
self.env_version_path = self.config.get_config_value('paths', 'env_versions_path')
|
|
15
|
+
os.makedirs(self.env_version_path, exist_ok=True)
|
|
16
|
+
self.logger = CustomLogger().get_logger(__name__)
|
|
14
17
|
|
|
15
|
-
async def create_env(self, env_name, py_version,
|
|
18
|
+
async def create_env(self, env_name, py_version, pip_libraries, conda_libraries, status, env_version='1', user_name=None, db:Session=None):
|
|
16
19
|
"""
|
|
17
20
|
Creates a conda environment with specified Python version and packages.
|
|
18
21
|
|
|
@@ -26,47 +29,56 @@ class EnvironmentManager:
|
|
|
26
29
|
db (Session): Database session (optional, will create if None)
|
|
27
30
|
|
|
28
31
|
Returns:
|
|
29
|
-
str: Build status ('success' or '
|
|
32
|
+
str: Build status ('success' or 'failed')
|
|
30
33
|
"""
|
|
31
34
|
# Set up logging
|
|
32
35
|
log_file_location = None
|
|
33
36
|
if db:
|
|
34
37
|
log_file_location = self._setup_logging(env_name, env_version, user_name, db)
|
|
35
38
|
|
|
39
|
+
# Create the conda environment YAML file
|
|
40
|
+
yaml_path = os.path.join(self.env_version_path, f"{env_name}_v{env_version}.yaml")
|
|
41
|
+
self.create_conda_yaml(
|
|
42
|
+
yaml_path=yaml_path,
|
|
43
|
+
env_name=env_name,
|
|
44
|
+
python_version=py_version,
|
|
45
|
+
conda_packages=conda_libraries,
|
|
46
|
+
pip_packages=pip_libraries
|
|
47
|
+
)
|
|
48
|
+
|
|
36
49
|
if status == "published":
|
|
37
50
|
return await self._execute_env_operation(
|
|
38
51
|
env_name=env_name,
|
|
39
|
-
py_version=py_version,
|
|
40
|
-
py_requirements=py_requirements,
|
|
41
52
|
status="published",
|
|
42
|
-
|
|
43
|
-
|
|
53
|
+
mode="create",
|
|
54
|
+
yaml_file_path=yaml_path,
|
|
55
|
+
version=int(env_version)
|
|
44
56
|
)
|
|
45
57
|
elif status == "draft":
|
|
46
|
-
|
|
58
|
+
mode = "create" if env_version == '1' else "update"
|
|
47
59
|
build_status = await self._execute_env_operation(
|
|
48
60
|
env_name=env_name,
|
|
49
|
-
py_version=py_version,
|
|
50
|
-
py_requirements=py_requirements,
|
|
51
61
|
status=status,
|
|
52
|
-
|
|
62
|
+
mode=mode,
|
|
63
|
+
yaml_file_path=yaml_path,
|
|
53
64
|
log_file_location=log_file_location,
|
|
54
|
-
|
|
65
|
+
version=int(env_version)
|
|
55
66
|
)
|
|
56
67
|
|
|
57
68
|
# Update job log status if db was provided
|
|
58
69
|
if db and log_file_location:
|
|
59
70
|
log_file_name = os.path.basename(log_file_location)
|
|
60
71
|
await self._update_job_status(log_file_name, build_status, log_file_location, db)
|
|
61
|
-
|
|
62
|
-
self.update_environment_db(env_name, env_version,
|
|
72
|
+
pip_libraries, conda_libraries = self.update_library_versions(yaml_path)
|
|
73
|
+
self.update_environment_db(env_name, env_version, pip_libraries, conda_libraries, build_status, db)
|
|
63
74
|
|
|
64
75
|
return build_status
|
|
65
76
|
|
|
66
77
|
else:
|
|
78
|
+
self.logger.error(f"Invalid status '{status}' provided for environment creation.")
|
|
67
79
|
raise ValueError("Invalid status. Use 'draft' or 'published'.")
|
|
68
80
|
|
|
69
|
-
async def clone_env(self, source_path,
|
|
81
|
+
async def clone_env(self, source_path, env_name, pip_libraries, conda_libraries, user_name, db=None):
|
|
70
82
|
"""
|
|
71
83
|
Clones an existing conda environment.
|
|
72
84
|
|
|
@@ -79,56 +91,95 @@ class EnvironmentManager:
|
|
|
79
91
|
db (Session): Database session (optional, will create if None)
|
|
80
92
|
|
|
81
93
|
Returns:
|
|
82
|
-
str: Build status ('success' or '
|
|
94
|
+
str: Build status ('success' or 'failed')
|
|
83
95
|
"""
|
|
84
96
|
# Set up logging
|
|
85
97
|
log_file_location = None
|
|
86
98
|
if db:
|
|
87
|
-
log_file_location = self._setup_logging(
|
|
99
|
+
log_file_location = self._setup_logging(env_name, "1", user_name, db)
|
|
100
|
+
|
|
101
|
+
yaml_path = f"{self.env_version_path}/{env_name}_v1.yaml"
|
|
88
102
|
|
|
89
103
|
# Perform the clone operation
|
|
90
104
|
clone_status = await self._execute_env_operation(
|
|
91
|
-
env_name=
|
|
92
|
-
source_path=source_path,
|
|
105
|
+
env_name=env_name,
|
|
93
106
|
status="draft",
|
|
94
|
-
|
|
107
|
+
mode="clone",
|
|
108
|
+
yaml_file_path=yaml_path,
|
|
109
|
+
source_path=source_path,
|
|
95
110
|
log_file_location=log_file_location,
|
|
96
|
-
|
|
111
|
+
version=1
|
|
97
112
|
)
|
|
98
113
|
|
|
99
114
|
# Update job log status if db was provided
|
|
100
115
|
if db and log_file_location:
|
|
101
116
|
log_file_name = os.path.basename(log_file_location)
|
|
102
117
|
await self._update_job_status(log_file_name, clone_status, log_file_location, db)
|
|
103
|
-
self.update_environment_db(
|
|
118
|
+
self.update_environment_db(
|
|
119
|
+
env_short_name=env_name,
|
|
120
|
+
version="1",
|
|
121
|
+
pip_libraries=pip_libraries,
|
|
122
|
+
conda_libraries=conda_libraries,
|
|
123
|
+
status=clone_status,
|
|
124
|
+
db=db
|
|
125
|
+
)
|
|
104
126
|
|
|
105
127
|
return clone_status
|
|
106
128
|
|
|
107
|
-
async def
|
|
129
|
+
async def revert_env(self, env_name, curr_version, revert_version, new_version, user_name, db: Session):
|
|
108
130
|
"""
|
|
109
|
-
|
|
131
|
+
Reverts an environment to a previous version.
|
|
110
132
|
|
|
111
133
|
Args:
|
|
112
134
|
env_name (str): Name of the environment
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
status (str): Environment status ('draft' or 'published')
|
|
116
|
-
env_version (str): Version of the environment (for draft environments)
|
|
117
|
-
user_name (str): Username who initiated the creation
|
|
135
|
+
version (str): Version to revert to
|
|
136
|
+
db (Session): Database session
|
|
118
137
|
|
|
119
138
|
Returns:
|
|
120
|
-
str: Build status ('success' or '
|
|
139
|
+
str: Build status ('success' or 'failed')
|
|
121
140
|
"""
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
141
|
+
try:
|
|
142
|
+
# Get the YAML file for the specified version
|
|
143
|
+
old_yaml_path = f"{self.env_version_path}/{env_name}_v{revert_version}.yaml"
|
|
144
|
+
new_yaml_path = f"{self.env_version_path}/{env_name}_v{new_version}.yaml"
|
|
145
|
+
if not os.path.exists(old_yaml_path):
|
|
146
|
+
raise FileNotFoundError(f"YAML file for version {revert_version} does not exist.")
|
|
147
|
+
|
|
148
|
+
os.symlink(old_yaml_path, new_yaml_path)
|
|
149
|
+
|
|
150
|
+
log_file_location = None
|
|
151
|
+
if db:
|
|
152
|
+
log_file_location = self._setup_logging(env_name, new_version, user_name, db)
|
|
153
|
+
|
|
154
|
+
# Execute the revert operation
|
|
155
|
+
revert_status = await self._execute_env_operation(
|
|
156
|
+
env_name=env_name,
|
|
157
|
+
status="draft",
|
|
158
|
+
mode="update",
|
|
159
|
+
yaml_file_path=new_yaml_path,
|
|
160
|
+
log_file_location=log_file_location,
|
|
161
|
+
version=int(new_version)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
log_file_name = os.path.basename(log_file_location)
|
|
165
|
+
await self._update_job_status(log_file_name, revert_status, log_file_location, db)
|
|
166
|
+
pip_libraries, conda_libraries = self.update_library_versions(new_yaml_path)
|
|
167
|
+
self.update_environment_db(env_name, new_version, pip_libraries, conda_libraries, revert_status, db)
|
|
130
168
|
|
|
131
|
-
|
|
169
|
+
except Exception as e:
|
|
170
|
+
self.logger.error(f"Failed to revert environment {env_name}: {e}")
|
|
171
|
+
return "failed"
|
|
172
|
+
|
|
173
|
+
async def _execute_env_operation(
|
|
174
|
+
self,
|
|
175
|
+
env_name: str,
|
|
176
|
+
status: str,
|
|
177
|
+
mode: str,
|
|
178
|
+
yaml_file_path: str,
|
|
179
|
+
version: int,
|
|
180
|
+
source_path=None,
|
|
181
|
+
log_file_location=None,
|
|
182
|
+
):
|
|
132
183
|
"""
|
|
133
184
|
Executes environment operations (create or clone).
|
|
134
185
|
|
|
@@ -143,34 +194,32 @@ class EnvironmentManager:
|
|
|
143
194
|
log_file_location (str): Path to log file
|
|
144
195
|
|
|
145
196
|
Returns:
|
|
146
|
-
str: Build status ('success' or '
|
|
197
|
+
str: Build status ('success' or 'failed')
|
|
147
198
|
"""
|
|
199
|
+
self.logger.info(f"Executing environment operation: {env_name}, Status: {status}, Mode: {mode}")
|
|
148
200
|
status = status.lower()
|
|
149
|
-
|
|
150
|
-
env_base_path = self.config.get_config_value('paths', 'published_env_path')
|
|
151
|
-
conda_env_path = os.path.join(env_base_path, env_name)
|
|
152
|
-
else:
|
|
153
|
-
env_base_path = self.config.get_config_value('paths', 'drafts_env_path')
|
|
154
|
-
conda_env_path = os.path.join(env_base_path, env_name, f"{env_name}_v{env_version}")
|
|
201
|
+
conda_env_path = os.path.join(self.env_base_path, env_name)
|
|
155
202
|
|
|
156
203
|
try:
|
|
157
|
-
if
|
|
158
|
-
|
|
204
|
+
if os.path.exists(conda_env_path) and mode == "create":
|
|
205
|
+
raise FileExistsError(f"Environment '{env_name}' already exists at {conda_env_path}.")
|
|
206
|
+
|
|
207
|
+
os.makedirs(conda_env_path, exist_ok=True)
|
|
159
208
|
|
|
160
|
-
if mode == "create":
|
|
161
|
-
# Convert requirements list to comma-separated string
|
|
162
|
-
if isinstance(py_requirements, list):
|
|
163
|
-
py_requirements = ",".join(py_requirements)
|
|
164
|
-
|
|
209
|
+
if mode == "create":
|
|
165
210
|
create_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/create_environment.sh')
|
|
166
|
-
command = ["bash", create_env_script_path,
|
|
167
|
-
|
|
211
|
+
command = ["bash", create_env_script_path, yaml_file_path, conda_env_path]
|
|
212
|
+
|
|
213
|
+
elif mode == "update":
|
|
214
|
+
update_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/update_environment.sh')
|
|
215
|
+
command = ["bash", update_env_script_path, yaml_file_path, conda_env_path]
|
|
216
|
+
|
|
168
217
|
elif mode == "clone":
|
|
169
218
|
clone_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/clone_environment.sh')
|
|
170
219
|
command = ["bash", clone_env_script_path, source_path, conda_env_path]
|
|
171
220
|
|
|
172
221
|
else:
|
|
173
|
-
raise ValueError("Invalid mode. Use 'create' or 'clone'.")
|
|
222
|
+
raise ValueError("Invalid mode. Use 'create', 'update', or 'clone'.")
|
|
174
223
|
|
|
175
224
|
process = await asyncio.create_subprocess_exec(
|
|
176
225
|
*command,
|
|
@@ -184,6 +233,16 @@ class EnvironmentManager:
|
|
|
184
233
|
with open(log_file_location, "a") as log_file:
|
|
185
234
|
success_detected = False
|
|
186
235
|
try:
|
|
236
|
+
# Write an initial log entry to indicate the operation has started
|
|
237
|
+
start_message = {
|
|
238
|
+
"timestamp": self.format_timestamp(),
|
|
239
|
+
"type": "log",
|
|
240
|
+
"content": f"Starting environment {mode} operation for {env_name}"
|
|
241
|
+
}
|
|
242
|
+
log_file.write(json.dumps(start_message) + "\n")
|
|
243
|
+
log_file.flush()
|
|
244
|
+
|
|
245
|
+
# Process stdout line by line
|
|
187
246
|
while True:
|
|
188
247
|
line = await process.stdout.readline()
|
|
189
248
|
if not line:
|
|
@@ -213,7 +272,7 @@ class EnvironmentManager:
|
|
|
213
272
|
}
|
|
214
273
|
log_file.write(json.dumps(error_message_dict) + "\n")
|
|
215
274
|
|
|
216
|
-
final_build_status = "
|
|
275
|
+
final_build_status = "failed" if process.returncode != 0 else "success"
|
|
217
276
|
|
|
218
277
|
except asyncio.CancelledError:
|
|
219
278
|
process.kill()
|
|
@@ -224,27 +283,24 @@ class EnvironmentManager:
|
|
|
224
283
|
"content": msg_content
|
|
225
284
|
}
|
|
226
285
|
log_file.write(json.dumps(cancellation_message) + "\n")
|
|
227
|
-
final_build_status = "
|
|
286
|
+
final_build_status = "failed"
|
|
228
287
|
|
|
229
288
|
finally:
|
|
230
|
-
if final_build_status
|
|
231
|
-
symlink_path = os.path.join(env_base_path, env_name, "default")
|
|
232
|
-
self.update_symlink(symlink_path, conda_env_path)
|
|
233
|
-
elif final_build_status != "success":
|
|
289
|
+
if final_build_status != "success" and version == 1:
|
|
234
290
|
if os.path.exists(conda_env_path):
|
|
235
291
|
shutil.rmtree(conda_env_path)
|
|
236
292
|
|
|
237
293
|
return final_build_status
|
|
238
294
|
|
|
239
295
|
except OSError as e:
|
|
240
|
-
|
|
241
|
-
return "
|
|
296
|
+
self.logger.error(f"OS error while operating on {conda_env_path}: {e}")
|
|
297
|
+
return "failed"
|
|
242
298
|
except subprocess.CalledProcessError as e:
|
|
243
|
-
|
|
244
|
-
return "
|
|
299
|
+
self.logger.error(f"Subprocess error during environment operation: {e}")
|
|
300
|
+
return "failed"
|
|
245
301
|
except Exception as e:
|
|
246
|
-
|
|
247
|
-
return "
|
|
302
|
+
self.logger.error(f"Unexpected error during environment operation for {env_name}: {e}")
|
|
303
|
+
return "failed"
|
|
248
304
|
|
|
249
305
|
def _setup_logging(self, env_name: str, env_version: str, user_name: str, db: Session):
|
|
250
306
|
"""
|
|
@@ -281,7 +337,7 @@ class EnvironmentManager:
|
|
|
281
337
|
Args:
|
|
282
338
|
db (Session): Database session
|
|
283
339
|
log_file_name (str): Name of the log file
|
|
284
|
-
build_status (str): Build status ('success' or '
|
|
340
|
+
build_status (str): Build status ('success' or 'failed')
|
|
285
341
|
log_file_location (str): Path to the log file
|
|
286
342
|
"""
|
|
287
343
|
attempts = 3
|
|
@@ -306,7 +362,7 @@ class EnvironmentManager:
|
|
|
306
362
|
if attempts > 0:
|
|
307
363
|
await asyncio.sleep(retry_delay)
|
|
308
364
|
else:
|
|
309
|
-
|
|
365
|
+
self.logger.error(f"Failed to update job log after multiple attempts: {e}")
|
|
310
366
|
|
|
311
367
|
def create_job_entry(self, user_name: str, db: Session, log_file_name: str, log_file_location: str):
|
|
312
368
|
"""
|
|
@@ -325,8 +381,9 @@ class EnvironmentManager:
|
|
|
325
381
|
|
|
326
382
|
if job:
|
|
327
383
|
if job.status == "success":
|
|
384
|
+
self.logger.error(f"Job with log_file_name '{log_file_name}' already completed successfully.")
|
|
328
385
|
raise ValueError(f"Job with log_file_name '{log_file_name}' already completed successfully.")
|
|
329
|
-
if job.status == "
|
|
386
|
+
if job.status == "failed":
|
|
330
387
|
job.created_at = datetime.datetime.now()
|
|
331
388
|
job.status = "in_progress"
|
|
332
389
|
else:
|
|
@@ -349,7 +406,7 @@ class EnvironmentManager:
|
|
|
349
406
|
Args:
|
|
350
407
|
db (Session): Database session
|
|
351
408
|
log_file_name (str): Name of the log file
|
|
352
|
-
final_build_status (str): Final status of the build ('success' or '
|
|
409
|
+
final_build_status (str): Final status of the build ('success' or 'failed')
|
|
353
410
|
"""
|
|
354
411
|
try:
|
|
355
412
|
job_record = db.query(JobLogs).filter(JobLogs.log_file_name == log_file_name).first()
|
|
@@ -358,24 +415,12 @@ class EnvironmentManager:
|
|
|
358
415
|
job_record.status = final_build_status
|
|
359
416
|
db.commit()
|
|
360
417
|
else:
|
|
418
|
+
self.logger.error(f"No job log found for file: {log_file_name}")
|
|
361
419
|
raise ValueError(f"No job log found for file: {log_file_name}")
|
|
362
420
|
except Exception as e:
|
|
421
|
+
self.logger.error(f"Failed to update job log for {log_file_name}: {e}")
|
|
363
422
|
db.rollback()
|
|
364
423
|
raise
|
|
365
|
-
|
|
366
|
-
def update_symlink(self, symlink_path, conda_env_path):
|
|
367
|
-
"""
|
|
368
|
-
Creates or updates the symlink to point to the default version.
|
|
369
|
-
"""
|
|
370
|
-
symlink_dir = os.path.dirname(symlink_path)
|
|
371
|
-
if not os.path.exists(symlink_dir):
|
|
372
|
-
os.makedirs(symlink_dir, exist_ok=True)
|
|
373
|
-
|
|
374
|
-
# If symlink exists, remove it before updating
|
|
375
|
-
if os.path.islink(symlink_path):
|
|
376
|
-
os.remove(symlink_path)
|
|
377
|
-
|
|
378
|
-
subprocess.run(["ln", "-sf", conda_env_path, symlink_path], check=True)
|
|
379
424
|
|
|
380
425
|
def format_timestamp(self):
|
|
381
426
|
"""
|
|
@@ -386,73 +431,214 @@ class EnvironmentManager:
|
|
|
386
431
|
"""
|
|
387
432
|
return datetime.datetime.now().strftime("%b %d %I:%M:%S %p")
|
|
388
433
|
|
|
389
|
-
def update_environment_db(self, env_short_name, version,
|
|
434
|
+
def update_environment_db(self, env_short_name, version, pip_libraries, conda_libraries, status, db: Session):
|
|
390
435
|
"""
|
|
391
436
|
Updates the environment table with the new version and libraries.
|
|
392
437
|
"""
|
|
393
438
|
try:
|
|
394
|
-
if isinstance(
|
|
395
|
-
|
|
439
|
+
if isinstance(pip_libraries, list):
|
|
440
|
+
pip_libraries = ", ".join(pip_libraries)
|
|
441
|
+
if isinstance(conda_libraries, list):
|
|
442
|
+
conda_libraries = ", ".join(conda_libraries)
|
|
396
443
|
current_env = db.query(Environment).filter(Environment.short_name == env_short_name).first()
|
|
397
|
-
|
|
398
|
-
|
|
444
|
+
if not current_env:
|
|
445
|
+
raise ValueError(f"Environment with short name '{env_short_name}' does not exist.")
|
|
446
|
+
|
|
447
|
+
env_status = "Draft" if status == "success" else "Failed"
|
|
448
|
+
|
|
449
|
+
db.query(Environment).filter(
|
|
450
|
+
Environment.short_name == env_short_name
|
|
451
|
+
).update({"version": version, "pip_libraries": pip_libraries, "conda_libraries": conda_libraries, "status": env_status})
|
|
399
452
|
db.commit()
|
|
400
453
|
|
|
401
454
|
except Exception as e:
|
|
455
|
+
self.logger.error(f"Failed to update environment {env_short_name} in database: {e}")
|
|
402
456
|
db.rollback()
|
|
403
457
|
raise
|
|
404
458
|
|
|
405
|
-
|
|
406
|
-
def update_library_versions(self, libraries: list, conda_env_path: str) -> list:
|
|
459
|
+
def update_library_versions(self, yaml_path: str):
|
|
407
460
|
"""
|
|
408
|
-
Updates libraries without version specifications by getting their actual installed versions.
|
|
461
|
+
Updates libraries without version specifications by getting their actual installed versions from a conda YAML file.
|
|
409
462
|
|
|
410
463
|
Args:
|
|
411
|
-
|
|
412
|
-
conda_env_path (str): Path to the conda environment where libraries are installed.
|
|
464
|
+
yaml_path (str): Path to the conda environment YAML file.
|
|
413
465
|
|
|
414
466
|
Returns:
|
|
415
|
-
|
|
467
|
+
tuple: Updated lists of (pip_libraries, conda_libraries) with version specifications.
|
|
416
468
|
"""
|
|
417
469
|
try:
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
470
|
+
# Define default conda packages to ignore
|
|
471
|
+
default_conda_packages = {
|
|
472
|
+
"_libgcc_mutex", "_openmp_mutex", "bzip2", "ca-certificates",
|
|
473
|
+
"ld_impl_linux-64", "libexpat", "libffi", "libgcc", "libgcc-ng",
|
|
474
|
+
"libgomp", "liblzma", "libnsl", "libsqlite", "libuuid", "libxcrypt",
|
|
475
|
+
"libzlib", "ncurses", "openssl", "readline", "setuptools", "tk",
|
|
476
|
+
"tzdata", "wheel", "libstdcxx-ng", "python"
|
|
477
|
+
}
|
|
426
478
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
lib_name, version = line.split("==", 1)
|
|
431
|
-
installed_versions[lib_name.lower()] = version
|
|
479
|
+
# Read the YAML file
|
|
480
|
+
with open(yaml_path, 'r') as f:
|
|
481
|
+
yaml_content = yaml.safe_load(f)
|
|
432
482
|
|
|
433
|
-
#
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
483
|
+
# Extract conda and pip dependencies
|
|
484
|
+
dependencies = yaml_content.get('dependencies', [])
|
|
485
|
+
|
|
486
|
+
# Process conda libraries
|
|
487
|
+
conda_libraries = []
|
|
488
|
+
pip_libraries = []
|
|
489
|
+
|
|
490
|
+
for dep in dependencies:
|
|
491
|
+
if isinstance(dep, str):
|
|
492
|
+
if dep.startswith("python="):
|
|
493
|
+
continue
|
|
443
494
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
else:
|
|
447
|
-
updated_libraries.append(lib)
|
|
448
|
-
else:
|
|
449
|
-
updated_libraries.append(lib)
|
|
495
|
+
parts = dep.split('=')
|
|
496
|
+
package_name = parts[0].strip()
|
|
450
497
|
|
|
451
|
-
|
|
498
|
+
if package_name.lower() not in default_conda_packages:
|
|
499
|
+
if len(parts) >= 2:
|
|
500
|
+
package_with_version = f"{package_name}={parts[1]}"
|
|
501
|
+
conda_libraries.append(package_with_version)
|
|
502
|
+
else:
|
|
503
|
+
# No version specified, keep as is
|
|
504
|
+
conda_libraries.append(dep)
|
|
505
|
+
|
|
506
|
+
elif isinstance(dep, dict) and 'pip' in dep:
|
|
507
|
+
# This is the pip section
|
|
508
|
+
for pip_pkg in dep['pip']:
|
|
509
|
+
pip_libraries.append(pip_pkg)
|
|
510
|
+
|
|
511
|
+
return pip_libraries, conda_libraries
|
|
512
|
+
|
|
513
|
+
except Exception as e:
|
|
514
|
+
self.logger.error(f"Error reading YAML file and extracting libraries: {str(e)}")
|
|
515
|
+
return [], []
|
|
452
516
|
|
|
453
517
|
except subprocess.CalledProcessError as e:
|
|
454
|
-
|
|
455
|
-
return
|
|
518
|
+
self.logger.error(f"Error running pip freeze: {e.stderr}")
|
|
519
|
+
return pip_libraries
|
|
520
|
+
except Exception as e:
|
|
521
|
+
self.logger.error(f"Error updating library versions: {str(e)}")
|
|
522
|
+
return pip_libraries
|
|
523
|
+
|
|
524
|
+
def create_conda_yaml(self, yaml_path, env_name, python_version, conda_packages, pip_packages):
|
|
525
|
+
"""
|
|
526
|
+
Creates a conda environment YAML file with specified packages and channels.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
yaml_path (str): Path where to save the YAML file
|
|
530
|
+
env_name (str): Name of the conda environment
|
|
531
|
+
python_version (str): Python version to use
|
|
532
|
+
conda_channels (list): List of conda channels
|
|
533
|
+
conda_packages (list): List of conda packages to install
|
|
534
|
+
pip_packages (list): List of pip packages to install
|
|
535
|
+
"""
|
|
536
|
+
try:
|
|
537
|
+
# Create the environment specification
|
|
538
|
+
env_spec = {
|
|
539
|
+
"name": env_name,
|
|
540
|
+
"channels": ["conda-forge", "defaults"],
|
|
541
|
+
"dependencies": [
|
|
542
|
+
f"python={python_version}"
|
|
543
|
+
]
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
# Add conda packages
|
|
547
|
+
if conda_packages and len(conda_packages) > 0:
|
|
548
|
+
env_spec["dependencies"].extend(conda_packages)
|
|
549
|
+
|
|
550
|
+
pip_pattern = re.compile(r"^pip([=]{1,2}.*)?$") # matches pip, pip=..., pip==...
|
|
551
|
+
pip_found = any(pip_pattern.match(pkg.strip()) for pkg in conda_packages)
|
|
552
|
+
|
|
553
|
+
# if pip is not already included in conda packages, add it
|
|
554
|
+
if not pip_found:
|
|
555
|
+
env_spec["dependencies"].append("pip")
|
|
556
|
+
|
|
557
|
+
# Add pip packages if any
|
|
558
|
+
if pip_packages and len(pip_packages) > 0:
|
|
559
|
+
pip_section = {
|
|
560
|
+
"pip": pip_packages
|
|
561
|
+
}
|
|
562
|
+
env_spec["dependencies"].append(pip_section)
|
|
563
|
+
|
|
564
|
+
with open(yaml_path, 'w') as yaml_file:
|
|
565
|
+
yaml.dump(env_spec, yaml_file, default_flow_style=False)
|
|
566
|
+
|
|
567
|
+
return yaml_path
|
|
568
|
+
|
|
456
569
|
except Exception as e:
|
|
457
|
-
|
|
458
|
-
|
|
570
|
+
self.logger.error(f"Failed to create conda environment YAML file: {str(e)}")
|
|
571
|
+
raise Exception(f"Failed to create conda environment YAML file: {str(e)}")
|
|
572
|
+
|
|
573
|
+
def format_py_requirements(self, env):
|
|
574
|
+
"""
|
|
575
|
+
Format pip and conda libraries into a standardized list of dictionaries
|
|
576
|
+
sorted alphabetically by library name.
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
env: Environment object containing pip_libraries and conda_libraries strings
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
list: List of dictionaries with format [{"name":"lib_name", "version":"version", "manager":"pip|conda"}, ...]
|
|
583
|
+
"""
|
|
584
|
+
py_requirements = []
|
|
585
|
+
|
|
586
|
+
# process libraries, handle both '==' and '=' version specifications
|
|
587
|
+
if env.pip_libraries:
|
|
588
|
+
for lib in env.pip_libraries.split(','):
|
|
589
|
+
lib = lib.strip()
|
|
590
|
+
if not lib:
|
|
591
|
+
continue
|
|
592
|
+
|
|
593
|
+
if '==' in lib:
|
|
594
|
+
name, version = lib.split('==', 1)
|
|
595
|
+
py_requirements.append({
|
|
596
|
+
"name": name.strip(),
|
|
597
|
+
"version": version.strip(),
|
|
598
|
+
"manager": "pip"
|
|
599
|
+
})
|
|
600
|
+
elif '=' in lib:
|
|
601
|
+
name, version = lib.split('=', 1)
|
|
602
|
+
py_requirements.append({
|
|
603
|
+
"name": name.strip(),
|
|
604
|
+
"version": version.strip(),
|
|
605
|
+
"manager": "pip"
|
|
606
|
+
})
|
|
607
|
+
else:
|
|
608
|
+
py_requirements.append({
|
|
609
|
+
"name": lib,
|
|
610
|
+
"version": "",
|
|
611
|
+
"manager": "pip"
|
|
612
|
+
})
|
|
613
|
+
|
|
614
|
+
if env.conda_libraries:
|
|
615
|
+
for lib in env.conda_libraries.split(','):
|
|
616
|
+
lib = lib.strip()
|
|
617
|
+
if not lib:
|
|
618
|
+
continue
|
|
619
|
+
|
|
620
|
+
if '==' in lib:
|
|
621
|
+
name, version = lib.split('==', 1)
|
|
622
|
+
py_requirements.append({
|
|
623
|
+
"name": name.strip(),
|
|
624
|
+
"version": version.strip(),
|
|
625
|
+
"manager": "conda"
|
|
626
|
+
})
|
|
627
|
+
elif '=' in lib:
|
|
628
|
+
name, version = lib.split('=', 1)
|
|
629
|
+
py_requirements.append({
|
|
630
|
+
"name": name.strip(),
|
|
631
|
+
"version": version.strip(),
|
|
632
|
+
"manager": "conda"
|
|
633
|
+
})
|
|
634
|
+
else:
|
|
635
|
+
py_requirements.append({
|
|
636
|
+
"name": lib,
|
|
637
|
+
"version": "",
|
|
638
|
+
"manager": "conda"
|
|
639
|
+
})
|
|
640
|
+
|
|
641
|
+
# sort the requirements list alphabetically by name
|
|
642
|
+
py_requirements.sort(key=lambda x: x["name"].lower())
|
|
643
|
+
|
|
644
|
+
return py_requirements
|
dataflow/models/environment.py
CHANGED
|
@@ -15,22 +15,21 @@ class EnvironmentAttributes(Base):
|
|
|
15
15
|
enabled = Column(Boolean, default=True)
|
|
16
16
|
version = Column(String, default=0)
|
|
17
17
|
is_latest = Column(Boolean, default=True)
|
|
18
|
-
|
|
18
|
+
base_env_id = Column(Integer, default=None)
|
|
19
19
|
short_name = Column(String(5))
|
|
20
20
|
status = Column(String, default="Saved")
|
|
21
21
|
icon = Column(String)
|
|
22
22
|
py_version = Column(String)
|
|
23
23
|
r_version = Column(String)
|
|
24
|
-
|
|
24
|
+
pip_libraries = Column(Text)
|
|
25
|
+
conda_libraries = Column(Text)
|
|
25
26
|
r_requirements = Column(Text)
|
|
26
|
-
py_requirements_compiled = Column(Text)
|
|
27
|
-
r_requirements_compiled = Column(Text)
|
|
28
27
|
created_date = Column(DateTime, server_default=func.now())
|
|
29
28
|
created_by = Column(String)
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
|
|
33
|
-
class Environment(EnvironmentAttributes):
|
|
32
|
+
class Environment(EnvironmentAttributes):
|
|
34
33
|
__tablename__ = 'ENVIRONMENT'
|
|
35
34
|
|
|
36
35
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
@@ -43,7 +42,7 @@ class ArchivedEnvironment(EnvironmentAttributes):
|
|
|
43
42
|
__tablename__ = 'ARCHIVED_ENVIRONMENT'
|
|
44
43
|
|
|
45
44
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
46
|
-
original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id'))
|
|
45
|
+
original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id', ondelete='CASCADE'))
|
|
47
46
|
is_latest = Column(Boolean, default=False)
|
|
48
47
|
|
|
49
48
|
# Relationship with Environment
|
|
@@ -10,7 +10,7 @@ class EnvironmentStatus(Base):
|
|
|
10
10
|
|
|
11
11
|
__tablename__='ENVIRONMENT_STATUS'
|
|
12
12
|
|
|
13
|
-
id = Column(Integer, ForeignKey('ENVIRONMENT.id'), primary_key=True, nullable=False)
|
|
13
|
+
id = Column(Integer, ForeignKey('ENVIRONMENT.id', ondelete='CASCADE'), primary_key=True, nullable=False)
|
|
14
14
|
status = Column(String, nullable=False)
|
|
15
15
|
comment = Column(String)
|
|
16
16
|
status_changed_date = Column(DateTime, server_default=func.now(), nullable=False)
|
|
@@ -4,12 +4,16 @@ set -e
|
|
|
4
4
|
source_env_name=$1
|
|
5
5
|
target_env_path=$2
|
|
6
6
|
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
# Extract just the env name (basename) from the target path
|
|
8
|
+
env_name=$(basename "$target_env_path")
|
|
9
|
+
|
|
10
|
+
# Set unique cache dir per environment
|
|
11
|
+
export CONDA_PKGS_DIRS="/dataflow/envs/cache/${env_name}"
|
|
12
|
+
mkdir -p "$CONDA_PKGS_DIRS"
|
|
11
13
|
|
|
12
14
|
# 1. Cloning conda env
|
|
13
15
|
conda create --clone ${source_env_name} --prefix ${target_env_path} --yes
|
|
14
16
|
|
|
17
|
+
conda env export --prefix "$conda_env_path" > "$yaml_file_path"
|
|
18
|
+
|
|
15
19
|
echo "Environment Creation Successful"
|
|
@@ -1,17 +1,30 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
|
+
# filepath: /home/hari/dbo/dataflow-core/dataflow/scripts/create_environment.sh
|
|
2
3
|
set -e
|
|
3
4
|
|
|
4
|
-
|
|
5
|
+
# Accept new parameters
|
|
6
|
+
yaml_file_path=$1
|
|
5
7
|
conda_env_path=$2
|
|
6
|
-
py_version=$3
|
|
7
8
|
|
|
8
|
-
#
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
# Validate inputs
|
|
10
|
+
if [ -z "$yaml_file_path" ] || [ -z "$conda_env_path" ]; then
|
|
11
|
+
echo "Error: Missing required parameters"
|
|
12
|
+
exit 1
|
|
13
|
+
fi
|
|
12
14
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
+
if [ ! -f "$yaml_file_path" ]; then
|
|
16
|
+
echo "Error: YAML file does not exist: $yaml_file_path"
|
|
17
|
+
exit 1
|
|
18
|
+
fi
|
|
15
19
|
|
|
16
|
-
#
|
|
17
|
-
$
|
|
20
|
+
# Extract just the env name (basename) from the target path
|
|
21
|
+
env_name=$(basename "$conda_env_path")
|
|
22
|
+
|
|
23
|
+
# Set unique cache dir per environment
|
|
24
|
+
export CONDA_PKGS_DIRS="/dataflow/envs/cache/${env_name}"
|
|
25
|
+
mkdir -p "$CONDA_PKGS_DIRS"
|
|
26
|
+
|
|
27
|
+
# Create the conda environment from the YAML file
|
|
28
|
+
conda env create --file "$yaml_file_path" --prefix "$conda_env_path" --yes
|
|
29
|
+
|
|
30
|
+
conda env export --prefix "$conda_env_path" > "$yaml_file_path"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# filepath: /home/hari/dbo/dataflow-core/dataflow/scripts/update_environment.sh
|
|
3
|
+
set -e
|
|
4
|
+
|
|
5
|
+
# Accept parameters
|
|
6
|
+
yaml_file_path=$1
|
|
7
|
+
conda_env_path=$2
|
|
8
|
+
|
|
9
|
+
# Validate inputs
|
|
10
|
+
if [ -z "$yaml_file_path" ] || [ -z "$conda_env_path" ]; then
|
|
11
|
+
echo "Error: Missing required parameters"
|
|
12
|
+
exit 1
|
|
13
|
+
fi
|
|
14
|
+
|
|
15
|
+
if [ ! -f "$yaml_file_path" ]; then
|
|
16
|
+
echo "Error: YAML file does not exist: $yaml_file_path"
|
|
17
|
+
exit 1
|
|
18
|
+
fi
|
|
19
|
+
|
|
20
|
+
if [ ! -d "$conda_env_path" ]; then
|
|
21
|
+
echo "Error: Conda environment does not exist at: $conda_env_path"
|
|
22
|
+
exit 1
|
|
23
|
+
fi
|
|
24
|
+
|
|
25
|
+
# Extract just the env name (basename) from the target path
|
|
26
|
+
env_name=$(basename "$conda_env_path")
|
|
27
|
+
|
|
28
|
+
# Set unique cache dir per environment
|
|
29
|
+
export CONDA_PKGS_DIRS="/dataflow/envs/cache/${env_name}"
|
|
30
|
+
mkdir -p "$CONDA_PKGS_DIRS"
|
|
31
|
+
|
|
32
|
+
# Update the conda environment using the YAML file
|
|
33
|
+
conda env update --prefix "$conda_env_path" --file "$yaml_file_path" --prune
|
|
34
|
+
|
|
35
|
+
if [ ! -L "$yaml_file_path" ]; then
|
|
36
|
+
conda env export --prefix "$conda_env_path" > "$yaml_file_path"
|
|
37
|
+
fi
|
|
@@ -7,13 +7,13 @@ dataflow/configuration.py,sha256=7To6XwH1eESiYp39eqPcswXWwrdBUdPF6xN6WnazOF0,663
|
|
|
7
7
|
dataflow/database_manager.py,sha256=tJHMuOZ9Muskrh9t4uLRlTuFU0VkHAzoHlGP5DORIC4,899
|
|
8
8
|
dataflow/dataflow.py,sha256=-UYZst7EO1GgaOjlAkKu-tu7RC6XsgadGeDp1MOvZiA,7439
|
|
9
9
|
dataflow/db.py,sha256=5UwE4w5Vn9RqFIhr8ARlu2haZX-PtHDLRPjmn5BG2m8,1649
|
|
10
|
-
dataflow/environment.py,sha256=
|
|
10
|
+
dataflow/environment.py,sha256=eAWYhQFM7CA3o-CA3qTWbkE8ZHZ-Jo2qvSkDQ4dKqmY,27238
|
|
11
11
|
dataflow/models/__init__.py,sha256=QMLiKj8BMhfScWMm8kgHkMjwAlFeg5Cym3_AI1NvBUA,783
|
|
12
12
|
dataflow/models/app_types.py,sha256=yE_ZB13lhpK7AZ7PyBwnQlf0RlIHYs_-vdMKx7_RMlY,379
|
|
13
13
|
dataflow/models/blacklist_library.py,sha256=B2oi3Z8GcR_glhLAyinFk0W8c9txXvm3uOER6dY-q7I,991
|
|
14
14
|
dataflow/models/connection.py,sha256=_VJL3KuIrm8t4lJmtunIL3-AXF9Yvi5wUolzdR3tE0E,1017
|
|
15
|
-
dataflow/models/environment.py,sha256=
|
|
16
|
-
dataflow/models/environment_status.py,sha256=
|
|
15
|
+
dataflow/models/environment.py,sha256=Vg-4vQe_cHIfOX-kPJWd6SIEapPJcgxoCITgdRrqT_o,2107
|
|
16
|
+
dataflow/models/environment_status.py,sha256=lvPDNUsUoTW9D97B07aKqJQHRKp4LvPM28pQDMPH1ac,536
|
|
17
17
|
dataflow/models/git_ssh.py,sha256=W15SDypxzGOz_aZkHEnVZ6DIMVsjAsbSIXVIEt2mPYU,694
|
|
18
18
|
dataflow/models/pinned_projects.py,sha256=rkpPX_f2U9HjmrRo7_K8rnZIeXuQKGq6hYTrtLmu21c,566
|
|
19
19
|
dataflow/models/project_details.py,sha256=94wTygXv9iGB0w8g_6vtkB5ZqIzpEv1W9uWwCA4hM0Y,1078
|
|
@@ -29,15 +29,16 @@ dataflow/models/user.py,sha256=PT-zwZj7NWUubIj_7EY2EsjduMbI_42EyMclWMLESGk,1073
|
|
|
29
29
|
dataflow/models/user_environment.py,sha256=yI9NutULcLiwlycuEin6ROe6o1Sjdv_sgw2MEkJFeYg,568
|
|
30
30
|
dataflow/models/user_team.py,sha256=r_fmKvf6JuGgiiI9TXWjVG2QZ3WOvDrOwYWVQ3r8oWo,659
|
|
31
31
|
dataflow/models/variables.py,sha256=Sinvv3zFYni5i_GrL69cVfhCh4tOOaIHiEzWYRJ-i10,1132
|
|
32
|
-
dataflow/scripts/clone_environment.sh,sha256=
|
|
33
|
-
dataflow/scripts/create_environment.sh,sha256=
|
|
32
|
+
dataflow/scripts/clone_environment.sh,sha256=xWJBw9z1W1rztrzLXYro3UtEdFuBSqNrB83y45zqFfE,487
|
|
33
|
+
dataflow/scripts/create_environment.sh,sha256=3FHgNplJuEZvyTsLqlCJNX9oyfXgsfqn80VZk2xtvso,828
|
|
34
|
+
dataflow/scripts/update_environment.sh,sha256=2dtn2xlNi6frpig-sqlGE1_IKRbbkqYOCpf_qyMKKII,992
|
|
34
35
|
dataflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
36
|
dataflow/utils/aws_secrets_manager.py,sha256=A_fNs9VNah9dDdl9NhqizJamYU7xr2v_GXlw9InEDFk,2380
|
|
36
37
|
dataflow/utils/get_current_user.py,sha256=akjcUyTpmMdAZj9LFGSTs76hjBRjltNk9hLUqC_BdkA,1140
|
|
37
38
|
dataflow/utils/json_handler.py,sha256=5_7WdypegRBDe2HSqBXyrJAdd92wsha8qRcmQvCj1TA,782
|
|
38
39
|
dataflow/utils/logger.py,sha256=7BFrOq5Oiqn8P4XZbgJzMP5O07d2fpdECbbfsjrUuHw,1213
|
|
39
|
-
dataflow_core-2.1.
|
|
40
|
-
dataflow_core-2.1.
|
|
41
|
-
dataflow_core-2.1.
|
|
42
|
-
dataflow_core-2.1.
|
|
43
|
-
dataflow_core-2.1.
|
|
40
|
+
dataflow_core-2.1.4.dist-info/METADATA,sha256=ImIZofq3kQ0DOVQxA4YPc2Tcy3Q5_bRxO_A7TzE0X7E,301
|
|
41
|
+
dataflow_core-2.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
42
|
+
dataflow_core-2.1.4.dist-info/entry_points.txt,sha256=ppj_EIbYrJJwCPg1kfdsZk5q1N-Ejfis1neYrnjhO8o,117
|
|
43
|
+
dataflow_core-2.1.4.dist-info/top_level.txt,sha256=SZsUOpSCK9ntUy-3Tusxzf5A2e8ebwD8vouPb1dPt_8,23
|
|
44
|
+
dataflow_core-2.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|