dataflow-core 2.0.6__tar.gz → 2.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dataflow-core might be problematic. Click here for more details.
- dataflow_core-2.0.8/PKG-INFO +14 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/dataflowhubauthenticator.py +17 -1
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow/dataflow.py +3 -41
- dataflow_core-2.0.8/dataflow/environment.py +459 -0
- dataflow_core-2.0.8/dataflow/models/environment.py +64 -0
- dataflow_core-2.0.8/dataflow/scripts/clone_environment.sh +15 -0
- dataflow_core-2.0.8/dataflow/scripts/create_environment.sh +17 -0
- dataflow_core-2.0.8/dataflow_core.egg-info/PKG-INFO +14 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow_core.egg-info/SOURCES.txt +4 -2
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow_core.egg-info/entry_points.txt +0 -1
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/setup.py +17 -2
- dataflow-core-2.0.6/PKG-INFO +0 -10
- dataflow-core-2.0.6/dataflow/models/session.py +0 -17
- dataflow-core-2.0.6/dataflow/models/user.py +0 -23
- dataflow-core-2.0.6/dataflow_core.egg-info/PKG-INFO +0 -10
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/README.md +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/__init__.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/dataflowairflowauthenticator.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/dataflowsupersetauthenticator.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/package/__init__.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/package/configuration.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/package/models/__init__.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/package/models/database.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/package/models/session.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/authenticator/package/models/user.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow/__init__.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow/configuration.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow/models/__init__.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow/models/database.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow/utils/__init__.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow/utils/aws_secrets_manager.py +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow_core.egg-info/dependency_links.txt +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow_core.egg-info/requires.txt +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/dataflow_core.egg-info/top_level.txt +0 -0
- {dataflow-core-2.0.6 → dataflow_core-2.0.8}/setup.cfg +0 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dataflow-core
|
|
3
|
+
Version: 2.0.8
|
|
4
|
+
Summary: Dataflow core package
|
|
5
|
+
Author: Dataflow
|
|
6
|
+
Author-email:
|
|
7
|
+
Requires-Dist: sqlalchemy
|
|
8
|
+
Requires-Dist: boto3
|
|
9
|
+
Requires-Dist: psycopg2-binary
|
|
10
|
+
Requires-Dist: pymysql
|
|
11
|
+
Requires-Dist: requests
|
|
12
|
+
Dynamic: author
|
|
13
|
+
Dynamic: requires-dist
|
|
14
|
+
Dynamic: summary
|
|
@@ -65,7 +65,23 @@ class DataflowHubAuthenticator(Authenticator):
|
|
|
65
65
|
self.db.refresh(db_item)
|
|
66
66
|
|
|
67
67
|
expires = datetime.now(timezone.utc) + timedelta(days=365)
|
|
68
|
-
handler.
|
|
68
|
+
host = handler.request.host
|
|
69
|
+
parts = host.split('.')
|
|
70
|
+
if len(parts) >= 2:
|
|
71
|
+
domain = '.'.join(parts[-2:])
|
|
72
|
+
else:
|
|
73
|
+
domain = host
|
|
74
|
+
base_domain = f".{domain}"
|
|
75
|
+
handler.set_cookie(
|
|
76
|
+
"dataflow_session",
|
|
77
|
+
session_id,
|
|
78
|
+
domain=base_domain,
|
|
79
|
+
path="/",
|
|
80
|
+
expires=expires,
|
|
81
|
+
secure=True,
|
|
82
|
+
httponly=True,
|
|
83
|
+
samesite="None"
|
|
84
|
+
)
|
|
69
85
|
user_dict = {"name": username, "session_id": session_id}
|
|
70
86
|
return user_dict
|
|
71
87
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import os, requests
|
|
1
|
+
import os, requests, shutil, subprocess, datetime
|
|
2
2
|
from .models.database import DatabaseManager
|
|
3
|
+
from .models.environment import JobLogs
|
|
3
4
|
from sqlalchemy.inspection import inspect
|
|
4
5
|
from .utils.aws_secrets_manager import SecretsManagerClient
|
|
5
6
|
import json, asyncio, pkg_resources
|
|
@@ -85,43 +86,4 @@ class Dataflow:
|
|
|
85
86
|
return next(connection_instance.get_session())
|
|
86
87
|
|
|
87
88
|
except Exception as e:
|
|
88
|
-
return None
|
|
89
|
-
|
|
90
|
-
async def create_env(self, env_name, py_version, py_requirements, status, env_version=None):
|
|
91
|
-
"""
|
|
92
|
-
Creates a conda environment at the specified path and installs libraries in one command.
|
|
93
|
-
"""
|
|
94
|
-
config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
|
|
95
|
-
status = status.lower()
|
|
96
|
-
if status == "published":
|
|
97
|
-
env_base_path = config.get_config_value('paths', 'published_env_path')
|
|
98
|
-
conda_env_path = os.path.join(env_base_path, env_name)
|
|
99
|
-
else:
|
|
100
|
-
env_base_path = config.get_config_value('paths', 'drafts_env_path')
|
|
101
|
-
conda_env_path = os.path.join(env_base_path, env_name, f"{env_name}_v{env_version}")
|
|
102
|
-
try:
|
|
103
|
-
if not os.path.exists(conda_env_path):
|
|
104
|
-
os.makedirs(conda_env_path, exist_ok=True)
|
|
105
|
-
|
|
106
|
-
py_requirements = ",".join(py_requirements)
|
|
107
|
-
|
|
108
|
-
script_path = pkg_resources.resource_filename('dataflow', 'scripts/create_environment.sh')
|
|
109
|
-
|
|
110
|
-
# Make the script executable
|
|
111
|
-
os.chmod(script_path, 0o755)
|
|
112
|
-
|
|
113
|
-
# Prepare command with arguments
|
|
114
|
-
command = ["bash", script_path, py_requirements, conda_env_path, py_version]
|
|
115
|
-
|
|
116
|
-
process = await asyncio.create_subprocess_exec(
|
|
117
|
-
*command,
|
|
118
|
-
stdout=asyncio.subprocess.PIPE,
|
|
119
|
-
stderr=asyncio.subprocess.PIPE
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
return process
|
|
123
|
-
except OSError as e:
|
|
124
|
-
print(f"OS error while creating {conda_env_path}: {e}")
|
|
125
|
-
except Exception as e:
|
|
126
|
-
print(f"Unexpected error while creating {conda_env_path}: {e}")
|
|
127
|
-
return {"error": str(e)}
|
|
89
|
+
return None
|
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
import os, shutil, subprocess, datetime
|
|
2
|
+
from .models.database import DatabaseManager
|
|
3
|
+
from .models.environment import JobLogs, Environment
|
|
4
|
+
import json, asyncio, pkg_resources
|
|
5
|
+
from sqlalchemy.orm import Session
|
|
6
|
+
from authenticator.package.configuration import ConfigurationManager
|
|
7
|
+
|
|
8
|
+
class EnvironmentManager:
|
|
9
|
+
def __init__(self):
|
|
10
|
+
"""Initialize the EnvironmentManager"""
|
|
11
|
+
self.config = ConfigurationManager('/dataflow/app/config/dataflow.cfg')
|
|
12
|
+
self.published_env_path = self.config.get_config_value('paths', 'published_env_path')
|
|
13
|
+
self.draft_env_path = self.config.get_config_value('paths', 'drafts_env_path')
|
|
14
|
+
self.env_logs_path = self.config.get_config_value('paths', 'env_logs_path')
|
|
15
|
+
|
|
16
|
+
async def create_env(self, env_name, py_version, py_requirements, status, base_env_id, env_version=None, user_name=None, db:Session=None):
|
|
17
|
+
"""
|
|
18
|
+
Creates a conda environment with specified Python version and packages.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
env_name (str): Name of the environment
|
|
22
|
+
py_version (str): Python version to use
|
|
23
|
+
py_requirements (list): List of packages to install
|
|
24
|
+
status (str): Environment status ('draft' or 'published')
|
|
25
|
+
env_version (str): Version of the environment (for draft environments)
|
|
26
|
+
user_name (str): Username who initiated the creation
|
|
27
|
+
db (Session): Database session (optional, will create if None)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
str: Build status ('success' or 'fail')
|
|
31
|
+
"""
|
|
32
|
+
# Set up logging
|
|
33
|
+
log_file_location = None
|
|
34
|
+
if db:
|
|
35
|
+
log_file_location = self._setup_logging(env_name, env_version, user_name, db)
|
|
36
|
+
|
|
37
|
+
if status == "published":
|
|
38
|
+
return await self._execute_env_operation(
|
|
39
|
+
env_name=env_name,
|
|
40
|
+
py_version=py_version,
|
|
41
|
+
py_requirements=py_requirements,
|
|
42
|
+
status="published",
|
|
43
|
+
env_version=None,
|
|
44
|
+
mode="create"
|
|
45
|
+
)
|
|
46
|
+
elif status == "draft":
|
|
47
|
+
# Build the environment
|
|
48
|
+
build_status = await self._execute_env_operation(
|
|
49
|
+
env_name=env_name,
|
|
50
|
+
py_version=py_version,
|
|
51
|
+
py_requirements=py_requirements,
|
|
52
|
+
status=status,
|
|
53
|
+
env_version=env_version,
|
|
54
|
+
log_file_location=log_file_location,
|
|
55
|
+
mode="create"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Update job log status if db was provided
|
|
59
|
+
if db and log_file_location:
|
|
60
|
+
log_file_name = os.path.basename(log_file_location)
|
|
61
|
+
await self._update_job_status(log_file_name, build_status, log_file_location, db)
|
|
62
|
+
updated_py_requirements = self.update_library_versions(py_requirements, os.path.join(self.draft_env_path, env_name, f"{env_name}_v{env_version}"))
|
|
63
|
+
self.update_environment_db(env_name, env_version, updated_py_requirements, base_env_id, py_version, db)
|
|
64
|
+
|
|
65
|
+
return build_status
|
|
66
|
+
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError("Invalid status. Use 'draft' or 'published'.")
|
|
69
|
+
|
|
70
|
+
async def clone_env(self, source_path, target_env_name, libraries, py_version, user_name=None, db: Session=None):
|
|
71
|
+
"""
|
|
72
|
+
Clones an existing conda environment.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
source_path (str): Path to source environment
|
|
76
|
+
target_name (str): Name for the target environment
|
|
77
|
+
status (str): Environment status ('draft' or 'published')
|
|
78
|
+
env_version (str): Version of the environment (for draft environments)
|
|
79
|
+
user_name (str): Username who initiated the clone
|
|
80
|
+
db (Session): Database session (optional, will create if None)
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
str: Build status ('success' or 'fail')
|
|
84
|
+
"""
|
|
85
|
+
# Set up logging
|
|
86
|
+
log_file_location = None
|
|
87
|
+
if db:
|
|
88
|
+
log_file_location = self._setup_logging(target_env_name, "1", user_name, db)
|
|
89
|
+
|
|
90
|
+
# Perform the clone operation
|
|
91
|
+
clone_status = await self._execute_env_operation(
|
|
92
|
+
env_name=target_env_name,
|
|
93
|
+
source_path=source_path,
|
|
94
|
+
status="draft",
|
|
95
|
+
env_version="1",
|
|
96
|
+
log_file_location=log_file_location,
|
|
97
|
+
mode="clone"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Update job log status if db was provided
|
|
101
|
+
if db and log_file_location:
|
|
102
|
+
log_file_name = os.path.basename(log_file_location)
|
|
103
|
+
await self._update_job_status(log_file_name, clone_status, log_file_location, db)
|
|
104
|
+
self.update_environment_db(env_short_name=target_env_name, version="1", libraries=libraries, base_env_id=None, py_version=py_version, db=db)
|
|
105
|
+
|
|
106
|
+
return clone_status
|
|
107
|
+
|
|
108
|
+
async def create_published_env(self, env_name, py_version, py_requirements):
|
|
109
|
+
"""
|
|
110
|
+
Creates a published conda environment.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
env_name (str): Name of the environment
|
|
114
|
+
py_version (str): Python version to use
|
|
115
|
+
py_requirements (list): List of packages to install
|
|
116
|
+
status (str): Environment status ('draft' or 'published')
|
|
117
|
+
env_version (str): Version of the environment (for draft environments)
|
|
118
|
+
user_name (str): Username who initiated the creation
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
str: Build status ('success' or 'fail')
|
|
122
|
+
"""
|
|
123
|
+
return self._execute_env_operation(
|
|
124
|
+
env_name=env_name,
|
|
125
|
+
py_version=py_version,
|
|
126
|
+
py_requirements=py_requirements,
|
|
127
|
+
status="published",
|
|
128
|
+
env_version=None,
|
|
129
|
+
mode="create"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
async def _execute_env_operation(self, env_name: str, status: str, mode: str, env_version: str = None, py_version: str = None, py_requirements=None, source_path=None, log_file_location=None):
|
|
133
|
+
"""
|
|
134
|
+
Executes environment operations (create or clone).
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
env_name (str): Name of the environment
|
|
138
|
+
status (str): Environment status ('draft' or 'published')
|
|
139
|
+
mode (str): Operation mode ('create' or 'clone')
|
|
140
|
+
env_version (str): Version of the environment (for draft environments)
|
|
141
|
+
py_version (str): Python version to use (for create mode)
|
|
142
|
+
py_requirements (list): List of packages to install (for create mode)
|
|
143
|
+
source_path (str): Path to source environment (for clone mode)
|
|
144
|
+
log_file_location (str): Path to log file
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
str: Build status ('success' or 'fail')
|
|
148
|
+
"""
|
|
149
|
+
status = status.lower()
|
|
150
|
+
if status == "published":
|
|
151
|
+
env_base_path = self.config.get_config_value('paths', 'published_env_path')
|
|
152
|
+
conda_env_path = os.path.join(env_base_path, env_name)
|
|
153
|
+
else:
|
|
154
|
+
env_base_path = self.config.get_config_value('paths', 'drafts_env_path')
|
|
155
|
+
conda_env_path = os.path.join(env_base_path, env_name, f"{env_name}_v{env_version}")
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
if not os.path.exists(conda_env_path):
|
|
159
|
+
os.makedirs(conda_env_path, exist_ok=True)
|
|
160
|
+
|
|
161
|
+
if mode == "create":
|
|
162
|
+
# Convert requirements list to comma-separated string
|
|
163
|
+
if isinstance(py_requirements, list):
|
|
164
|
+
py_requirements = ",".join(py_requirements)
|
|
165
|
+
|
|
166
|
+
create_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/create_environment.sh')
|
|
167
|
+
command = ["bash", create_env_script_path, py_requirements, conda_env_path, py_version]
|
|
168
|
+
|
|
169
|
+
elif mode == "clone":
|
|
170
|
+
clone_env_script_path = pkg_resources.resource_filename('dataflow', 'scripts/clone_environment.sh')
|
|
171
|
+
command = ["bash", clone_env_script_path, source_path, conda_env_path]
|
|
172
|
+
|
|
173
|
+
else:
|
|
174
|
+
raise ValueError("Invalid mode. Use 'create' or 'clone'.")
|
|
175
|
+
|
|
176
|
+
process = await asyncio.create_subprocess_exec(
|
|
177
|
+
*command,
|
|
178
|
+
stdout=asyncio.subprocess.PIPE,
|
|
179
|
+
stderr=asyncio.subprocess.PIPE
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if not log_file_location:
|
|
183
|
+
return process
|
|
184
|
+
|
|
185
|
+
with open(log_file_location, "a") as log_file:
|
|
186
|
+
success_detected = False
|
|
187
|
+
try:
|
|
188
|
+
while True:
|
|
189
|
+
line = await process.stdout.readline()
|
|
190
|
+
if not line:
|
|
191
|
+
break
|
|
192
|
+
|
|
193
|
+
line = line.decode()
|
|
194
|
+
message = {
|
|
195
|
+
"timestamp": self.format_timestamp(),
|
|
196
|
+
"type": "log",
|
|
197
|
+
"content": line.strip()
|
|
198
|
+
}
|
|
199
|
+
log_file.write(json.dumps(message) + "\n")
|
|
200
|
+
log_file.flush()
|
|
201
|
+
|
|
202
|
+
if "environment creation successful" in line.lower():
|
|
203
|
+
success_detected = True
|
|
204
|
+
|
|
205
|
+
await process.wait() # Ensure process is complete
|
|
206
|
+
|
|
207
|
+
if process.returncode != 0:
|
|
208
|
+
error_message = await process.stderr.read()
|
|
209
|
+
error_message = error_message.decode().strip()
|
|
210
|
+
error_message_dict = {
|
|
211
|
+
"timestamp": self.format_timestamp(),
|
|
212
|
+
"type": "error",
|
|
213
|
+
"content": error_message
|
|
214
|
+
}
|
|
215
|
+
log_file.write(json.dumps(error_message_dict) + "\n")
|
|
216
|
+
|
|
217
|
+
final_build_status = "fail" if process.returncode != 0 else "success"
|
|
218
|
+
|
|
219
|
+
except asyncio.CancelledError:
|
|
220
|
+
process.kill()
|
|
221
|
+
msg_content = "Environment operation cancelled due to request cancellation."
|
|
222
|
+
cancellation_message = {
|
|
223
|
+
"timestamp": self.format_timestamp(),
|
|
224
|
+
"type": "error",
|
|
225
|
+
"content": msg_content
|
|
226
|
+
}
|
|
227
|
+
log_file.write(json.dumps(cancellation_message) + "\n")
|
|
228
|
+
final_build_status = "fail"
|
|
229
|
+
|
|
230
|
+
finally:
|
|
231
|
+
if final_build_status == "success" and status == "draft":
|
|
232
|
+
symlink_path = os.path.join(env_base_path, env_name, "default")
|
|
233
|
+
self.update_symlink(symlink_path, conda_env_path)
|
|
234
|
+
elif final_build_status != "success":
|
|
235
|
+
if os.path.exists(conda_env_path):
|
|
236
|
+
shutil.rmtree(conda_env_path)
|
|
237
|
+
|
|
238
|
+
return final_build_status
|
|
239
|
+
|
|
240
|
+
except OSError as e:
|
|
241
|
+
print(f"OS error while operating on {conda_env_path}: {e}")
|
|
242
|
+
return "fail"
|
|
243
|
+
except subprocess.CalledProcessError as e:
|
|
244
|
+
print(f"Subprocess error during environment operation: {e}")
|
|
245
|
+
return "fail"
|
|
246
|
+
except Exception as e:
|
|
247
|
+
print(f"Unexpected error during environment operation for {env_name}: {e}")
|
|
248
|
+
return "fail"
|
|
249
|
+
|
|
250
|
+
def _setup_logging(self, env_name: str, env_version: str, user_name: str, db: Session):
|
|
251
|
+
"""
|
|
252
|
+
Sets up logging for environment operations.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
env_name (str): Name of the environment
|
|
256
|
+
env_version (str): Version of the environment
|
|
257
|
+
user_name (str): Username who initiated the operation
|
|
258
|
+
db (Session): Database session
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
str: Path to the log file
|
|
262
|
+
"""
|
|
263
|
+
versioned_name = f"{env_name}_v{env_version}"
|
|
264
|
+
log_file_name = f"envlog_{versioned_name}.log"
|
|
265
|
+
log_file_dir = self.config.get_config_value('paths', 'env_logs_path')
|
|
266
|
+
os.makedirs(log_file_dir, exist_ok=True)
|
|
267
|
+
log_file_location = os.path.join(log_file_dir, log_file_name)
|
|
268
|
+
|
|
269
|
+
# Clear log file if it exists
|
|
270
|
+
if os.path.exists(log_file_location):
|
|
271
|
+
open(log_file_location, "w").close()
|
|
272
|
+
|
|
273
|
+
# Create job entry
|
|
274
|
+
self.create_job_entry(user_name, db, log_file_name, log_file_location)
|
|
275
|
+
|
|
276
|
+
return log_file_location
|
|
277
|
+
|
|
278
|
+
async def _update_job_status(self, log_file_name: str, build_status: str, log_file_location: str, db: Session):
|
|
279
|
+
"""
|
|
280
|
+
Updates job status with retry logic.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
db (Session): Database session
|
|
284
|
+
log_file_name (str): Name of the log file
|
|
285
|
+
build_status (str): Build status ('success' or 'fail')
|
|
286
|
+
log_file_location (str): Path to the log file
|
|
287
|
+
"""
|
|
288
|
+
attempts = 3
|
|
289
|
+
retry_delay = 3
|
|
290
|
+
|
|
291
|
+
while attempts > 0:
|
|
292
|
+
try:
|
|
293
|
+
self.update_job_log(db, log_file_name, build_status)
|
|
294
|
+
break
|
|
295
|
+
except Exception as e:
|
|
296
|
+
attempts -= 1
|
|
297
|
+
|
|
298
|
+
with open(log_file_location, "a") as log_file:
|
|
299
|
+
msg_content = "Failed to commit job completion time to database."
|
|
300
|
+
error_message = {
|
|
301
|
+
"timestamp": self.format_timestamp(),
|
|
302
|
+
"type": "error",
|
|
303
|
+
"content": msg_content
|
|
304
|
+
}
|
|
305
|
+
log_file.write(json.dumps(error_message) + "\n")
|
|
306
|
+
|
|
307
|
+
if attempts > 0:
|
|
308
|
+
await asyncio.sleep(retry_delay)
|
|
309
|
+
else:
|
|
310
|
+
print(f"Failed to update job log after multiple attempts: {e}")
|
|
311
|
+
|
|
312
|
+
def create_job_entry(self, user_name: str, db: Session, log_file_name: str, log_file_location: str):
|
|
313
|
+
"""
|
|
314
|
+
Creates or updates a job entry for environment tracking.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
user_name (str): The user who initiated the job
|
|
318
|
+
db (Session): Database session
|
|
319
|
+
log_file_name (str): Log file name
|
|
320
|
+
log_file_location (str): Log file path
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
JobLogs: The created or updated job entry
|
|
324
|
+
"""
|
|
325
|
+
job = db.query(JobLogs).filter(JobLogs.log_file_name == log_file_name).first()
|
|
326
|
+
|
|
327
|
+
if job:
|
|
328
|
+
if job.status == "success":
|
|
329
|
+
raise ValueError(f"Job with log_file_name '{log_file_name}' already completed successfully.")
|
|
330
|
+
if job.status == "fail":
|
|
331
|
+
job.created_at = datetime.datetime.now()
|
|
332
|
+
job.status = "in_progress"
|
|
333
|
+
else:
|
|
334
|
+
job = JobLogs(
|
|
335
|
+
created_at=datetime.datetime.now(),
|
|
336
|
+
log_file_name=log_file_name,
|
|
337
|
+
log_file_location=log_file_location,
|
|
338
|
+
created_by=user_name,
|
|
339
|
+
status="in_progress"
|
|
340
|
+
)
|
|
341
|
+
db.add(job)
|
|
342
|
+
|
|
343
|
+
db.commit()
|
|
344
|
+
return job
|
|
345
|
+
|
|
346
|
+
def update_job_log(self, db, log_file_name, final_build_status):
|
|
347
|
+
"""
|
|
348
|
+
Updates the JobLogs table with completion time and status.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
db (Session): Database session
|
|
352
|
+
log_file_name (str): Name of the log file
|
|
353
|
+
final_build_status (str): Final status of the build ('success' or 'fail')
|
|
354
|
+
"""
|
|
355
|
+
try:
|
|
356
|
+
job_record = db.query(JobLogs).filter(JobLogs.log_file_name == log_file_name).first()
|
|
357
|
+
if job_record:
|
|
358
|
+
job_record.completed_at = datetime.datetime.now()
|
|
359
|
+
job_record.status = final_build_status
|
|
360
|
+
db.commit()
|
|
361
|
+
else:
|
|
362
|
+
raise ValueError(f"No job log found for file: {log_file_name}")
|
|
363
|
+
except Exception as e:
|
|
364
|
+
db.rollback()
|
|
365
|
+
raise
|
|
366
|
+
|
|
367
|
+
def update_symlink(self, symlink_path, conda_env_path):
|
|
368
|
+
"""
|
|
369
|
+
Creates or updates the symlink to point to the default version.
|
|
370
|
+
"""
|
|
371
|
+
symlink_dir = os.path.dirname(symlink_path)
|
|
372
|
+
if not os.path.exists(symlink_dir):
|
|
373
|
+
os.makedirs(symlink_dir, exist_ok=True)
|
|
374
|
+
|
|
375
|
+
# If symlink exists, remove it before updating
|
|
376
|
+
if os.path.islink(symlink_path):
|
|
377
|
+
os.remove(symlink_path)
|
|
378
|
+
|
|
379
|
+
subprocess.run(["ln", "-sf", conda_env_path, symlink_path], check=True)
|
|
380
|
+
|
|
381
|
+
def format_timestamp(self):
|
|
382
|
+
"""
|
|
383
|
+
Generates a formatted timestamp string representing the current date and time.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
str: A string representing the current date and time in the specified format.
|
|
387
|
+
"""
|
|
388
|
+
return datetime.datetime.now().strftime("%b %d %I:%M:%S %p")
|
|
389
|
+
|
|
390
|
+
def update_environment_db(self, env_short_name, version, libraries, base_env_id, py_version, db: Session):
|
|
391
|
+
"""
|
|
392
|
+
Updates the environment table with the new version and libraries.
|
|
393
|
+
"""
|
|
394
|
+
try:
|
|
395
|
+
if isinstance(libraries, list):
|
|
396
|
+
libraries = ", ".join(libraries)
|
|
397
|
+
current_env = db.query(Environment).filter(Environment.short_name == env_short_name).first()
|
|
398
|
+
status = "Draft" if current_env and current_env.status == "Saved" else current_env.status
|
|
399
|
+
db.query(Environment).filter(Environment.short_name == env_short_name).update({"version": version, "py_requirements": libraries,"base_image_id": base_env_id,"py_version": py_version,"status": status})
|
|
400
|
+
db.commit()
|
|
401
|
+
|
|
402
|
+
except Exception as e:
|
|
403
|
+
db.rollback()
|
|
404
|
+
raise
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def update_library_versions(self, libraries: list, conda_env_path: str) -> list:
|
|
408
|
+
"""
|
|
409
|
+
Updates libraries without version specifications by getting their actual installed versions.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
libraries (list): List of library requirements, some may not have version specs.
|
|
413
|
+
conda_env_path (str): Path to the conda environment where libraries are installed.
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
list: Updated list of libraries with version specifications.
|
|
417
|
+
"""
|
|
418
|
+
try:
|
|
419
|
+
pip_freeze_cmd = f"{conda_env_path}/bin/pip freeze"
|
|
420
|
+
result = subprocess.run(
|
|
421
|
+
pip_freeze_cmd,
|
|
422
|
+
shell=True,
|
|
423
|
+
capture_output=True,
|
|
424
|
+
text=True,
|
|
425
|
+
check=True
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
installed_versions = {}
|
|
429
|
+
for line in result.stdout.splitlines():
|
|
430
|
+
if "==" in line:
|
|
431
|
+
lib_name, version = line.split("==", 1)
|
|
432
|
+
installed_versions[lib_name.lower()] = version
|
|
433
|
+
|
|
434
|
+
# Update libraries without version specs
|
|
435
|
+
updated_libraries = []
|
|
436
|
+
for lib in libraries:
|
|
437
|
+
# Skip libraries that are python version specifications
|
|
438
|
+
if lib.lower().startswith("python=="):
|
|
439
|
+
continue
|
|
440
|
+
|
|
441
|
+
if "==" not in lib:
|
|
442
|
+
lib_name = lib.strip()
|
|
443
|
+
lib_name_lower = lib_name.lower()
|
|
444
|
+
|
|
445
|
+
if lib_name_lower in installed_versions:
|
|
446
|
+
updated_libraries.append(f"{lib_name}=={installed_versions[lib_name_lower]}")
|
|
447
|
+
else:
|
|
448
|
+
updated_libraries.append(lib)
|
|
449
|
+
else:
|
|
450
|
+
updated_libraries.append(lib)
|
|
451
|
+
|
|
452
|
+
return updated_libraries
|
|
453
|
+
|
|
454
|
+
except subprocess.CalledProcessError as e:
|
|
455
|
+
print(f"Error running pip freeze: {e.stderr}")
|
|
456
|
+
return libraries
|
|
457
|
+
except Exception as e:
|
|
458
|
+
print(f"Error updating library versions: {str(e)}")
|
|
459
|
+
return libraries
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from sqlalchemy import Column, Integer, String, Boolean, Text, ForeignKey, DateTime
|
|
2
|
+
from sqlalchemy.orm import relationship, declarative_base
|
|
3
|
+
from sqlalchemy.sql import func
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
Base = declarative_base()
|
|
7
|
+
|
|
8
|
+
class EnvironmentAttributes(Base):
|
|
9
|
+
"""
|
|
10
|
+
Shared columns between Environment and ArchivedEnvironment.
|
|
11
|
+
"""
|
|
12
|
+
__abstract__ = True
|
|
13
|
+
|
|
14
|
+
name = Column(String)
|
|
15
|
+
url = Column(String)
|
|
16
|
+
enabled = Column(Boolean, default=True)
|
|
17
|
+
version = Column(String, default=0)
|
|
18
|
+
is_latest = Column(Boolean, default=True)
|
|
19
|
+
base_image_id = Column(Integer, default=None)
|
|
20
|
+
short_name = Column(String(5))
|
|
21
|
+
status = Column(String, default="Saved")
|
|
22
|
+
icon = Column(String)
|
|
23
|
+
py_version = Column(String)
|
|
24
|
+
r_version = Column(String)
|
|
25
|
+
py_requirements = Column(Text)
|
|
26
|
+
r_requirements = Column(Text)
|
|
27
|
+
py_requirements_compiled = Column(Text)
|
|
28
|
+
r_requirements_compiled = Column(Text)
|
|
29
|
+
created_date = Column(DateTime, server_default=func.now())
|
|
30
|
+
created_by = Column(String)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Environment(EnvironmentAttributes):
|
|
35
|
+
__tablename__ = 'ENVIRONMENT'
|
|
36
|
+
|
|
37
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
38
|
+
short_name = Column(String(5), unique=True)
|
|
39
|
+
|
|
40
|
+
# Relationship with ArchivedEnvironment
|
|
41
|
+
archived_versions = relationship("ArchivedEnvironment", back_populates="original_environment")
|
|
42
|
+
|
|
43
|
+
class ArchivedEnvironment(EnvironmentAttributes):
|
|
44
|
+
__tablename__ = 'ARCHIVED_ENVIRONMENT'
|
|
45
|
+
|
|
46
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
47
|
+
original_env_id = Column(Integer, ForeignKey('ENVIRONMENT.id'))
|
|
48
|
+
is_latest = Column(Boolean, default=False)
|
|
49
|
+
|
|
50
|
+
# Relationship with Environment
|
|
51
|
+
original_environment = relationship("Environment", back_populates="archived_versions")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class JobLogs(Base):
|
|
56
|
+
__tablename__ = "JOB_LOGS"
|
|
57
|
+
|
|
58
|
+
id = Column(Integer, primary_key=True, index=True)
|
|
59
|
+
created_at = Column(DateTime, default=datetime.now)
|
|
60
|
+
completed_at = Column(DateTime, nullable=True)
|
|
61
|
+
log_file_name = Column(String, unique=True, nullable=False)
|
|
62
|
+
log_file_location = Column(String, nullable=False)
|
|
63
|
+
status = Column(String)
|
|
64
|
+
created_by = Column(String)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
set -e
|
|
3
|
+
|
|
4
|
+
source_env_name=$1
|
|
5
|
+
target_env_path=$2
|
|
6
|
+
|
|
7
|
+
# Use an isolated conda package cache to avoid concurrency issues
|
|
8
|
+
export CONDA_PKGS_DIRS=$(mktemp -d)
|
|
9
|
+
# to delete conda package cache after script finishes
|
|
10
|
+
trap 'rm -rf "$CONDA_PKGS_DIRS"' EXIT
|
|
11
|
+
|
|
12
|
+
# 1. Cloning conda env
|
|
13
|
+
conda create --clone ${source_env_name} --prefix ${target_env_path} --yes
|
|
14
|
+
|
|
15
|
+
echo "Environment Creation Successful"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
set -e
|
|
3
|
+
|
|
4
|
+
IFS=',' read -r -a libraries <<< $1
|
|
5
|
+
conda_env_path=$2
|
|
6
|
+
py_version=$3
|
|
7
|
+
|
|
8
|
+
# Use an isolated conda package cache to avoid concurrency issues
|
|
9
|
+
export CONDA_PKGS_DIRS=$(mktemp -d)
|
|
10
|
+
# to delete conda package cache after script finishes
|
|
11
|
+
trap 'rm -rf "$CONDA_PKGS_DIRS"' EXIT
|
|
12
|
+
|
|
13
|
+
# 1. Creating conda environment
|
|
14
|
+
conda create --prefix ${conda_env_path} --yes python=${py_version}
|
|
15
|
+
|
|
16
|
+
# 2. Install user libraries
|
|
17
|
+
${conda_env_path}/bin/pip install --root-user-action ignore ${libraries[@]}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dataflow-core
|
|
3
|
+
Version: 2.0.8
|
|
4
|
+
Summary: Dataflow core package
|
|
5
|
+
Author: Dataflow
|
|
6
|
+
Author-email:
|
|
7
|
+
Requires-Dist: sqlalchemy
|
|
8
|
+
Requires-Dist: boto3
|
|
9
|
+
Requires-Dist: psycopg2-binary
|
|
10
|
+
Requires-Dist: pymysql
|
|
11
|
+
Requires-Dist: requests
|
|
12
|
+
Dynamic: author
|
|
13
|
+
Dynamic: requires-dist
|
|
14
|
+
Dynamic: summary
|
|
@@ -13,10 +13,12 @@ authenticator/package/models/user.py
|
|
|
13
13
|
dataflow/__init__.py
|
|
14
14
|
dataflow/configuration.py
|
|
15
15
|
dataflow/dataflow.py
|
|
16
|
+
dataflow/environment.py
|
|
16
17
|
dataflow/models/__init__.py
|
|
17
18
|
dataflow/models/database.py
|
|
18
|
-
dataflow/models/
|
|
19
|
-
dataflow/
|
|
19
|
+
dataflow/models/environment.py
|
|
20
|
+
dataflow/scripts/clone_environment.sh
|
|
21
|
+
dataflow/scripts/create_environment.sh
|
|
20
22
|
dataflow/utils/__init__.py
|
|
21
23
|
dataflow/utils/aws_secrets_manager.py
|
|
22
24
|
dataflow_core.egg-info/PKG-INFO
|
|
@@ -1,8 +1,20 @@
|
|
|
1
1
|
from setuptools import setup, find_packages
|
|
2
|
+
from setuptools.command.install import install
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
class PostInstall(install):
|
|
6
|
+
"""Post-installation script to set executable permissions for scripts."""
|
|
7
|
+
def run(self):
|
|
8
|
+
install.run(self)
|
|
9
|
+
install_dir = os.path.join(self.install_lib, 'dataflow', 'scripts')
|
|
10
|
+
for filename in os.listdir(install_dir):
|
|
11
|
+
if filename.endswith('.sh'):
|
|
12
|
+
filepath = os.path.join(install_dir, filename)
|
|
13
|
+
os.chmod(filepath, 0o755)
|
|
2
14
|
|
|
3
15
|
setup(
|
|
4
16
|
name="dataflow-core",
|
|
5
|
-
version="2.0.
|
|
17
|
+
version="2.0.8",
|
|
6
18
|
packages=find_packages(include=["dataflow", "dataflow.*", "authenticator", "authenticator.*"]),
|
|
7
19
|
include_package_data=True,
|
|
8
20
|
package_data={
|
|
@@ -23,4 +35,7 @@ setup(
|
|
|
23
35
|
'dataflow_authenticator = authenticator.dataflowhubauthenticator:DataflowHubAuthenticator',
|
|
24
36
|
],
|
|
25
37
|
},
|
|
26
|
-
|
|
38
|
+
cmdclass={
|
|
39
|
+
'install': PostInstall,
|
|
40
|
+
},
|
|
41
|
+
)
|
dataflow-core-2.0.6/PKG-INFO
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
"""models.py"""
|
|
2
|
-
from sqlalchemy import Column, Integer, String
|
|
3
|
-
from sqlalchemy.ext.declarative import declarative_base
|
|
4
|
-
|
|
5
|
-
#instance for create declarative base
|
|
6
|
-
Base=declarative_base()
|
|
7
|
-
|
|
8
|
-
class Session_table(Base):
|
|
9
|
-
"""
|
|
10
|
-
Table SESSIONS
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__tablename__='SESSION'
|
|
14
|
-
|
|
15
|
-
id = Column(Integer, primary_key=True, index=True, unique=True, nullable=False, autoincrement=True)
|
|
16
|
-
session_id = Column(String, unique=True, nullable=False)
|
|
17
|
-
user_id = Column(String, nullable=False)
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
"""models.py"""
|
|
2
|
-
from sqlalchemy import Column, Integer, String, LargeBinary, Enum
|
|
3
|
-
from sqlalchemy.ext.declarative import declarative_base
|
|
4
|
-
|
|
5
|
-
#instance for create declarative base
|
|
6
|
-
Base=declarative_base()
|
|
7
|
-
|
|
8
|
-
class User(Base):
|
|
9
|
-
"""
|
|
10
|
-
Table USER
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__tablename__='USER'
|
|
14
|
-
|
|
15
|
-
user_id = Column(Integer, primary_key=True, index=True, autoincrement=True, nullable=False)
|
|
16
|
-
user_name = Column(String, unique=True, nullable=False)
|
|
17
|
-
first_name = Column(String)
|
|
18
|
-
last_name = Column(String)
|
|
19
|
-
email = Column(String, unique=True)
|
|
20
|
-
role = Column(Enum('admin', 'user', name='role_field'), nullable=False)
|
|
21
|
-
image = Column(LargeBinary)
|
|
22
|
-
active = Column(Enum('N', 'Y', name='active_field'), nullable=False, server_default=str("N"))
|
|
23
|
-
password = Column(String, nullable=False)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|