runnable 0.17.1__py3-none-any.whl → 0.18.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- extensions/README.md +0 -0
- extensions/__init__.py +0 -0
- extensions/catalog/README.md +0 -0
- extensions/catalog/file_system.py +253 -0
- extensions/catalog/pyproject.toml +14 -0
- extensions/job_executor/README.md +0 -0
- extensions/job_executor/__init__.py +160 -0
- extensions/job_executor/k8s.py +362 -0
- extensions/job_executor/k8s_job_spec.yaml +37 -0
- extensions/job_executor/local.py +61 -0
- extensions/job_executor/local_container.py +192 -0
- extensions/job_executor/pyproject.toml +16 -0
- extensions/nodes/README.md +0 -0
- extensions/nodes/nodes.py +954 -0
- extensions/nodes/pyproject.toml +15 -0
- extensions/pipeline_executor/README.md +0 -0
- extensions/pipeline_executor/__init__.py +644 -0
- extensions/pipeline_executor/argo.py +1307 -0
- extensions/pipeline_executor/argo_specification.yaml +51 -0
- extensions/pipeline_executor/local.py +62 -0
- extensions/pipeline_executor/local_container.py +363 -0
- extensions/pipeline_executor/mocked.py +161 -0
- extensions/pipeline_executor/pyproject.toml +16 -0
- extensions/pipeline_executor/retry.py +180 -0
- extensions/run_log_store/README.md +0 -0
- extensions/run_log_store/__init__.py +0 -0
- extensions/run_log_store/chunked_fs.py +113 -0
- extensions/run_log_store/db/implementation_FF.py +163 -0
- extensions/run_log_store/db/integration_FF.py +0 -0
- extensions/run_log_store/file_system.py +145 -0
- extensions/run_log_store/generic_chunked.py +599 -0
- extensions/run_log_store/pyproject.toml +15 -0
- extensions/secrets/README.md +0 -0
- extensions/secrets/dotenv.py +62 -0
- extensions/secrets/pyproject.toml +15 -0
- {runnable-0.17.1.dist-info → runnable-0.18.0.dist-info}/METADATA +1 -7
- runnable-0.18.0.dist-info/RECORD +58 -0
- runnable-0.17.1.dist-info/RECORD +0 -23
- {runnable-0.17.1.dist-info → runnable-0.18.0.dist-info}/WHEEL +0 -0
- {runnable-0.17.1.dist-info → runnable-0.18.0.dist-info}/entry_points.txt +0 -0
- {runnable-0.17.1.dist-info → runnable-0.18.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,362 @@
|
|
1
|
+
import logging
|
2
|
+
import shlex
|
3
|
+
from enum import Enum
|
4
|
+
from typing import Annotated, List, Optional
|
5
|
+
|
6
|
+
from kubernetes import client
|
7
|
+
from kubernetes import config as k8s_config
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, PlainSerializer, PrivateAttr
|
9
|
+
from pydantic.alias_generators import to_camel
|
10
|
+
|
11
|
+
from extensions.job_executor import GenericJobExecutor
|
12
|
+
from runnable import console, defaults, utils
|
13
|
+
from runnable.datastore import DataCatalog
|
14
|
+
from runnable.tasks import BaseTaskType
|
15
|
+
|
16
|
+
logger = logging.getLogger(defaults.NAME)
|
17
|
+
|
18
|
+
|
19
|
+
class Operator(str, Enum):
|
20
|
+
NOT_IN = "NotIn"
|
21
|
+
EXISTS = "Exists"
|
22
|
+
DOES_NOT_EXIST = "DoesNotExist"
|
23
|
+
|
24
|
+
|
25
|
+
class RestartPolicy(str, Enum):
|
26
|
+
ALWAYS = "Always"
|
27
|
+
ON_FAILURE = "OnFailure"
|
28
|
+
NEVER = "Never"
|
29
|
+
|
30
|
+
|
31
|
+
class ImagePullPolicy(str, Enum):
|
32
|
+
ALWAYS = "Always"
|
33
|
+
IF_NOT_PRESENT = "IfNotPresent"
|
34
|
+
NEVER = "Never"
|
35
|
+
|
36
|
+
|
37
|
+
class LabelSelectorRequirement(BaseModel):
|
38
|
+
key: str
|
39
|
+
operator: Operator
|
40
|
+
values: list[str]
|
41
|
+
|
42
|
+
|
43
|
+
class LabelSelector(BaseModel):
|
44
|
+
match_expressions: list[LabelSelectorRequirement]
|
45
|
+
match_labels: dict[str, str]
|
46
|
+
|
47
|
+
|
48
|
+
class ObjectMetaData(BaseModel):
|
49
|
+
generate_name: Optional[str]
|
50
|
+
annotations: Optional[dict[str, str]]
|
51
|
+
namespace: Optional[str] = "default"
|
52
|
+
|
53
|
+
|
54
|
+
class EnvVar(BaseModel):
|
55
|
+
name: str
|
56
|
+
value: str
|
57
|
+
|
58
|
+
|
59
|
+
class Request(BaseModel):
|
60
|
+
"""
|
61
|
+
The default requests
|
62
|
+
"""
|
63
|
+
|
64
|
+
memory: str = "1Gi"
|
65
|
+
cpu: str = "250m"
|
66
|
+
|
67
|
+
|
68
|
+
VendorGPU = Annotated[
|
69
|
+
Optional[int],
|
70
|
+
PlainSerializer(lambda x: str(x), return_type=str, when_used="unless-none"),
|
71
|
+
]
|
72
|
+
|
73
|
+
|
74
|
+
class Limit(Request):
|
75
|
+
"""
|
76
|
+
The default limits
|
77
|
+
"""
|
78
|
+
|
79
|
+
gpu: VendorGPU = Field(default=None, serialization_alias="nvidia.com/gpu")
|
80
|
+
|
81
|
+
|
82
|
+
class Resources(BaseModel):
|
83
|
+
limits: Limit = Limit()
|
84
|
+
requests: Request = Request()
|
85
|
+
|
86
|
+
|
87
|
+
class VolumeMount(BaseModel):
|
88
|
+
name: str
|
89
|
+
mount_path: str
|
90
|
+
|
91
|
+
|
92
|
+
class Container(BaseModel):
|
93
|
+
image: str
|
94
|
+
env: list[EnvVar] = Field(default_factory=list)
|
95
|
+
image_pull_policy: ImagePullPolicy = ImagePullPolicy.NEVER
|
96
|
+
resources: Resources = Resources()
|
97
|
+
volume_mounts: Optional[list[VolumeMount]] = Field(default_factory=lambda: [])
|
98
|
+
|
99
|
+
|
100
|
+
class HostPath(BaseModel):
|
101
|
+
path: str
|
102
|
+
|
103
|
+
|
104
|
+
class Volume(BaseModel):
|
105
|
+
name: str
|
106
|
+
host_path: HostPath
|
107
|
+
|
108
|
+
|
109
|
+
class TemplateSpec(BaseModel):
|
110
|
+
active_deadline_seconds: int = Field(default=60 * 60 * 2) # 2 hours
|
111
|
+
node_selector: Optional[dict[str, str]] = None
|
112
|
+
tolerations: Optional[list[dict[str, str]]] = None
|
113
|
+
volumes: Optional[list[Volume]] = Field(default_factory=lambda: [])
|
114
|
+
service_account_name: Optional[str] = "default"
|
115
|
+
restart_policy: RestartPolicy = RestartPolicy.NEVER
|
116
|
+
container: Container
|
117
|
+
|
118
|
+
|
119
|
+
class Template(BaseModel):
|
120
|
+
spec: TemplateSpec
|
121
|
+
metadata: Optional[ObjectMetaData] = None
|
122
|
+
|
123
|
+
|
124
|
+
class Spec(BaseModel):
|
125
|
+
active_deadline_seconds: Optional[int] = Field(default=60 * 60 * 2) # 2 hours
|
126
|
+
backoff_limit: int = 6
|
127
|
+
selector: Optional[LabelSelector] = None
|
128
|
+
template: Template
|
129
|
+
ttl_seconds_after_finished: Optional[int] = Field(default=60 * 60 * 24) # 24 hours
|
130
|
+
|
131
|
+
|
132
|
+
class K8sJobExecutor(GenericJobExecutor):
|
133
|
+
service_name: str = "k8s-job"
|
134
|
+
config_path: Optional[str] = None
|
135
|
+
job_spec: Spec
|
136
|
+
mock: bool = False
|
137
|
+
|
138
|
+
# The location the mount of .run_log_store is mounted to in minikube
|
139
|
+
# ensure that minikube mount $HOME/workspace/runnable/.run_log_store:/volume/run_logs is executed first
|
140
|
+
# $HOME/workspace/runnable/.catalog:/volume/catalog
|
141
|
+
# Ensure that the docker build is done with eval $(minikube docker-env)
|
142
|
+
mini_k8s_run_log_location: str = Field(default="/volume/run_logs/")
|
143
|
+
mini_k8s_catalog_location: str = Field(default="/volume/catalog/")
|
144
|
+
|
145
|
+
_is_local: bool = PrivateAttr(default=False)
|
146
|
+
|
147
|
+
_container_log_location: str = PrivateAttr(default="/tmp/run_logs/")
|
148
|
+
_container_catalog_location: str = PrivateAttr(default="/tmp/catalog/")
|
149
|
+
_container_secrets_location: str = PrivateAttr(default="/tmp/dotenv")
|
150
|
+
|
151
|
+
_volumes: list[Volume] = []
|
152
|
+
_volume_mounts: list[VolumeMount] = []
|
153
|
+
|
154
|
+
model_config = ConfigDict(
|
155
|
+
alias_generator=to_camel,
|
156
|
+
populate_by_name=True,
|
157
|
+
from_attributes=True,
|
158
|
+
)
|
159
|
+
|
160
|
+
def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
|
161
|
+
"""
|
162
|
+
This method gets invoked by the CLI.
|
163
|
+
"""
|
164
|
+
self._set_up_run_log()
|
165
|
+
|
166
|
+
# Call the container job
|
167
|
+
job_log = self._context.run_log_store.create_job_log()
|
168
|
+
self._context.run_log_store.add_job_log(
|
169
|
+
run_id=self._context.run_id, job_log=job_log
|
170
|
+
)
|
171
|
+
# create volumes and volume mounts for the job
|
172
|
+
self._create_volumes()
|
173
|
+
self.submit_k8s_job(job)
|
174
|
+
|
175
|
+
def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
|
176
|
+
"""
|
177
|
+
Focusses on execution of the job.
|
178
|
+
"""
|
179
|
+
logger.info("Trying to execute job")
|
180
|
+
self._use_volumes()
|
181
|
+
|
182
|
+
job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
|
183
|
+
|
184
|
+
attempt_log = job.execute_command(
|
185
|
+
attempt_number=self.step_attempt_number,
|
186
|
+
mock=self.mock,
|
187
|
+
)
|
188
|
+
|
189
|
+
job_log.status = attempt_log.status
|
190
|
+
job_log.attempts.append(attempt_log)
|
191
|
+
|
192
|
+
data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
|
193
|
+
catalog_settings=catalog_settings
|
194
|
+
)
|
195
|
+
logger.debug(f"data_catalogs_put: {data_catalogs_put}")
|
196
|
+
|
197
|
+
job_log.add_data_catalogs(data_catalogs_put or [])
|
198
|
+
|
199
|
+
console.print("Summary of job")
|
200
|
+
console.print(job_log.get_summary())
|
201
|
+
|
202
|
+
self._context.run_log_store.add_job_log(
|
203
|
+
run_id=self._context.run_id, job_log=job_log
|
204
|
+
)
|
205
|
+
|
206
|
+
@property
|
207
|
+
def _client(self):
|
208
|
+
if self.config_path:
|
209
|
+
k8s_config.load_kube_config(config_file=self.config_path)
|
210
|
+
else:
|
211
|
+
# https://github.com/kubernetes-client/python/blob/master/kubernetes/base/config/__init__.py
|
212
|
+
k8s_config.load_config()
|
213
|
+
return client
|
214
|
+
|
215
|
+
def submit_k8s_job(self, task: BaseTaskType):
|
216
|
+
if self.job_spec.template.spec.container.volume_mounts:
|
217
|
+
self._volume_mounts += self.job_spec.template.spec.container.volume_mounts
|
218
|
+
|
219
|
+
container_volume_mounts = [
|
220
|
+
self._client.V1VolumeMount(**vol.model_dump())
|
221
|
+
for vol in self._volume_mounts
|
222
|
+
]
|
223
|
+
|
224
|
+
command = utils.get_job_execution_command()
|
225
|
+
|
226
|
+
container_env = [
|
227
|
+
self._client.V1EnvVar(**env.model_dump(by_alias=True))
|
228
|
+
for env in self.job_spec.template.spec.container.env
|
229
|
+
]
|
230
|
+
|
231
|
+
base_container = self._client.V1Container(
|
232
|
+
command=shlex.split(command),
|
233
|
+
env=container_env,
|
234
|
+
name="default",
|
235
|
+
volume_mounts=container_volume_mounts,
|
236
|
+
**self.job_spec.template.spec.container.model_dump(
|
237
|
+
exclude_none=True, exclude={"volume_mounts", "command", "env"}
|
238
|
+
),
|
239
|
+
)
|
240
|
+
|
241
|
+
if self.job_spec.template.spec.volumes:
|
242
|
+
self._volumes += self.job_spec.template.spec.volumes
|
243
|
+
|
244
|
+
spec_volumes = [
|
245
|
+
self._client.V1Volume(**vol.model_dump(by_alias=True))
|
246
|
+
for vol in self._volumes
|
247
|
+
]
|
248
|
+
|
249
|
+
tolerations = None
|
250
|
+
if self.job_spec.template.spec.tolerations:
|
251
|
+
tolerations = [
|
252
|
+
self._client.V1Toleration(**toleration)
|
253
|
+
for toleration in self.job_spec.template.spec.tolerations
|
254
|
+
]
|
255
|
+
|
256
|
+
pod_spec = self._client.V1PodSpec(
|
257
|
+
containers=[base_container],
|
258
|
+
# volumes=[vol.model_dump(by_alias=True) for vol in self._volumes],
|
259
|
+
volumes=spec_volumes,
|
260
|
+
tolerations=tolerations,
|
261
|
+
**self.job_spec.template.spec.model_dump(
|
262
|
+
exclude_none=True, exclude={"container", "volumes", "tolerations"}
|
263
|
+
),
|
264
|
+
)
|
265
|
+
|
266
|
+
pod_template_metadata = None
|
267
|
+
if self.job_spec.template.metadata:
|
268
|
+
pod_template_metadata = self._client.V1ObjectMeta(
|
269
|
+
**self.job_spec.template.metadata.model_dump(exclude_none=True)
|
270
|
+
)
|
271
|
+
|
272
|
+
pod_template = self._client.V1PodTemplateSpec(
|
273
|
+
spec=pod_spec,
|
274
|
+
metadata=pod_template_metadata,
|
275
|
+
)
|
276
|
+
|
277
|
+
job_spec = client.V1JobSpec(
|
278
|
+
template=pod_template,
|
279
|
+
**self.job_spec.model_dump(exclude_none=True, exclude={"template"}),
|
280
|
+
)
|
281
|
+
|
282
|
+
job = client.V1Job(
|
283
|
+
api_version="batch/v1",
|
284
|
+
kind="Job",
|
285
|
+
metadata=client.V1ObjectMeta(name=self._context.run_id),
|
286
|
+
spec=job_spec,
|
287
|
+
)
|
288
|
+
|
289
|
+
logger.info(f"Submitting job: {job.__dict__}")
|
290
|
+
|
291
|
+
try:
|
292
|
+
k8s_batch = self._client.BatchV1Api()
|
293
|
+
response = k8s_batch.create_namespaced_job(
|
294
|
+
body=job,
|
295
|
+
namespace="default",
|
296
|
+
_preload_content=False,
|
297
|
+
pretty=True,
|
298
|
+
)
|
299
|
+
logger.debug(f"Kubernetes job response: {response}")
|
300
|
+
except Exception as e:
|
301
|
+
logger.exception(e)
|
302
|
+
print(e)
|
303
|
+
raise
|
304
|
+
|
305
|
+
def _create_volumes(self):
|
306
|
+
match self._context.run_log_store.service_name:
|
307
|
+
case "file-system":
|
308
|
+
self._volumes.append(
|
309
|
+
# When you do: # minikube mount $HOME:/tmp/run_logs
|
310
|
+
# This .run_log_store is mounted to /tmp/run_logs of minikube
|
311
|
+
# You then are creating a volume that is mounted to /tmp/run_logs in the container
|
312
|
+
# You are then referring to it.
|
313
|
+
# https://stackoverflow.com/questions/57411456/minikube-mounted-host-folders-are-not-working
|
314
|
+
Volume(
|
315
|
+
name="run-logs",
|
316
|
+
host_path=HostPath(path=self.mini_k8s_run_log_location),
|
317
|
+
)
|
318
|
+
)
|
319
|
+
self._volume_mounts.append(
|
320
|
+
VolumeMount(
|
321
|
+
name="run-logs", mount_path=self._container_log_location
|
322
|
+
)
|
323
|
+
)
|
324
|
+
case "chunked-fs":
|
325
|
+
self._volumes.append(
|
326
|
+
Volume(
|
327
|
+
name="run-logs",
|
328
|
+
host_path=HostPath(path=self.mini_k8s_run_log_location),
|
329
|
+
)
|
330
|
+
)
|
331
|
+
self._volume_mounts.append(
|
332
|
+
VolumeMount(
|
333
|
+
name="run-logs", mount_path=self._container_log_location
|
334
|
+
)
|
335
|
+
)
|
336
|
+
|
337
|
+
match self._context.catalog_handler.service_name:
|
338
|
+
case "file-system":
|
339
|
+
self._volumes.append(
|
340
|
+
Volume(
|
341
|
+
name="catalog",
|
342
|
+
host_path=HostPath(path=self.mini_k8s_catalog_location),
|
343
|
+
)
|
344
|
+
)
|
345
|
+
self._volume_mounts.append(
|
346
|
+
VolumeMount(
|
347
|
+
name="catalog", mount_path=self._container_catalog_location
|
348
|
+
)
|
349
|
+
)
|
350
|
+
|
351
|
+
def _use_volumes(self):
|
352
|
+
match self._context.run_log_store.service_name:
|
353
|
+
case "file-system":
|
354
|
+
self._context.run_log_store.log_folder = self._container_log_location
|
355
|
+
case "chunked-fs":
|
356
|
+
self._context.run_log_store.log_folder = self._container_log_location
|
357
|
+
|
358
|
+
match self._context.catalog_handler.service_name:
|
359
|
+
case "file-system":
|
360
|
+
self._context.catalog_handler.catalog_location = (
|
361
|
+
self._container_catalog_location
|
362
|
+
)
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#Follow this as a template https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1JobSpec.md
|
2
|
+
|
3
|
+
jobSpec:
|
4
|
+
activeDeadlineSeconds: Optional[int]
|
5
|
+
selector: Optional[LabelSelector]
|
6
|
+
ttlSecondsAfterFinished: Optional[int]
|
7
|
+
template:
|
8
|
+
metadata:
|
9
|
+
annotations: Optional[Dict[str, str]]
|
10
|
+
generate_name: Optional[str] = run_id
|
11
|
+
namespace: Optional[str] = "default"
|
12
|
+
spec:
|
13
|
+
activeDeadlineSeconds: Optional[int]
|
14
|
+
nodeSelector: Optional[Dict[str, str]]
|
15
|
+
tolerations: Optional[List[Toleration]]
|
16
|
+
volumes: Optional[List[str]]
|
17
|
+
serviceAccountName: Optional[str]
|
18
|
+
restartPolicy: Optional[str] = Choose from [Always, OnFailure, Never]
|
19
|
+
container:
|
20
|
+
command: List[str]
|
21
|
+
env:
|
22
|
+
- name: str
|
23
|
+
value: str
|
24
|
+
image: str
|
25
|
+
imagePullPolicy: Optional[str] = choose from [Always, Never, IfNotPresent]
|
26
|
+
resources:
|
27
|
+
limits:
|
28
|
+
cpu: str
|
29
|
+
memory: str
|
30
|
+
gpu: str
|
31
|
+
requests:
|
32
|
+
cpu: str
|
33
|
+
memory: str
|
34
|
+
gpu: str
|
35
|
+
volumeMounts:
|
36
|
+
- name: str
|
37
|
+
mountPath: str
|
@@ -0,0 +1,61 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
from extensions.job_executor import GenericJobExecutor
|
5
|
+
from runnable import console, defaults
|
6
|
+
from runnable.datastore import DataCatalog
|
7
|
+
from runnable.tasks import BaseTaskType
|
8
|
+
|
9
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
10
|
+
|
11
|
+
|
12
|
+
class LocalJobExecutor(GenericJobExecutor):
|
13
|
+
"""
|
14
|
+
The LocalJobExecutor is a job executor that runs the job locally.
|
15
|
+
"""
|
16
|
+
|
17
|
+
service_name: str = "local"
|
18
|
+
mock: bool = False
|
19
|
+
|
20
|
+
def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
|
21
|
+
"""
|
22
|
+
This method gets invoked by the CLI.
|
23
|
+
"""
|
24
|
+
self._set_up_run_log()
|
25
|
+
|
26
|
+
job_log = self._context.run_log_store.create_job_log()
|
27
|
+
self._context.run_log_store.add_job_log(
|
28
|
+
run_id=self._context.run_id, job_log=job_log
|
29
|
+
)
|
30
|
+
|
31
|
+
self.execute_job(job, catalog_settings=catalog_settings)
|
32
|
+
|
33
|
+
def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
|
34
|
+
"""
|
35
|
+
Focusses on execution of the job.
|
36
|
+
"""
|
37
|
+
logger.info("Trying to execute job")
|
38
|
+
|
39
|
+
job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
|
40
|
+
|
41
|
+
attempt_log = job.execute_command(
|
42
|
+
attempt_number=self.step_attempt_number,
|
43
|
+
mock=self.mock,
|
44
|
+
)
|
45
|
+
|
46
|
+
job_log.status = attempt_log.status
|
47
|
+
job_log.attempts.append(attempt_log)
|
48
|
+
|
49
|
+
data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
|
50
|
+
catalog_settings=catalog_settings
|
51
|
+
)
|
52
|
+
logger.debug(f"data_catalogs_put: {data_catalogs_put}")
|
53
|
+
|
54
|
+
job_log.add_data_catalogs(data_catalogs_put or [])
|
55
|
+
|
56
|
+
console.print("Summary of job")
|
57
|
+
console.print(job_log.get_summary())
|
58
|
+
|
59
|
+
self._context.run_log_store.add_job_log(
|
60
|
+
run_id=self._context.run_id, job_log=job_log
|
61
|
+
)
|
@@ -0,0 +1,192 @@
|
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Dict, List, Optional
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
from extensions.job_executor import GenericJobExecutor
|
8
|
+
from runnable import console, defaults, utils
|
9
|
+
from runnable.datastore import DataCatalog
|
10
|
+
from runnable.tasks import BaseTaskType
|
11
|
+
|
12
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
13
|
+
|
14
|
+
|
15
|
+
class LocalContainerJobExecutor(GenericJobExecutor):
|
16
|
+
"""
|
17
|
+
The LocalJobExecutor is a job executor that runs the job locally.
|
18
|
+
"""
|
19
|
+
|
20
|
+
service_name: str = "local-container"
|
21
|
+
docker_image: str
|
22
|
+
mock: bool = False
|
23
|
+
auto_remove_container: bool = True
|
24
|
+
environment: Dict[str, str] = Field(default_factory=dict)
|
25
|
+
|
26
|
+
_is_local: bool = False
|
27
|
+
|
28
|
+
_container_log_location = "/tmp/run_logs/"
|
29
|
+
_container_catalog_location = "/tmp/catalog/"
|
30
|
+
_container_secrets_location = "/tmp/dotenv"
|
31
|
+
_volumes: Dict[str, Dict[str, str]] = {}
|
32
|
+
|
33
|
+
def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
|
34
|
+
"""
|
35
|
+
This method gets invoked by the CLI.
|
36
|
+
"""
|
37
|
+
self._set_up_run_log()
|
38
|
+
self._mount_volumes()
|
39
|
+
|
40
|
+
# Call the container job
|
41
|
+
job_log = self._context.run_log_store.create_job_log()
|
42
|
+
self._context.run_log_store.add_job_log(
|
43
|
+
run_id=self._context.run_id, job_log=job_log
|
44
|
+
)
|
45
|
+
self.spin_container()
|
46
|
+
|
47
|
+
def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
|
48
|
+
"""
|
49
|
+
Focusses on execution of the job.
|
50
|
+
"""
|
51
|
+
self._use_volumes()
|
52
|
+
logger.info("Trying to execute job")
|
53
|
+
|
54
|
+
job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
|
55
|
+
|
56
|
+
attempt_log = job.execute_command(
|
57
|
+
attempt_number=self.step_attempt_number,
|
58
|
+
mock=self.mock,
|
59
|
+
)
|
60
|
+
|
61
|
+
job_log.status = attempt_log.status
|
62
|
+
job_log.attempts.append(attempt_log)
|
63
|
+
|
64
|
+
data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
|
65
|
+
catalog_settings=catalog_settings
|
66
|
+
)
|
67
|
+
logger.debug(f"data_catalogs_put: {data_catalogs_put}")
|
68
|
+
|
69
|
+
job_log.add_data_catalogs(data_catalogs_put or [])
|
70
|
+
|
71
|
+
console.print("Summary of job")
|
72
|
+
console.print(job_log.get_summary())
|
73
|
+
|
74
|
+
self._context.run_log_store.add_job_log(
|
75
|
+
run_id=self._context.run_id, job_log=job_log
|
76
|
+
)
|
77
|
+
|
78
|
+
def spin_container(self):
|
79
|
+
"""
|
80
|
+
This method spins up the container
|
81
|
+
"""
|
82
|
+
import docker # pylint: disable=C0415
|
83
|
+
|
84
|
+
try:
|
85
|
+
client = docker.from_env()
|
86
|
+
api_client = docker.APIClient()
|
87
|
+
except Exception as ex:
|
88
|
+
logger.exception("Could not get access to docker")
|
89
|
+
raise Exception(
|
90
|
+
"Could not get the docker socket file, do you have docker installed?"
|
91
|
+
) from ex
|
92
|
+
|
93
|
+
try:
|
94
|
+
command = utils.get_job_execution_command()
|
95
|
+
logger.info(f"Running the command {command}")
|
96
|
+
print(command)
|
97
|
+
|
98
|
+
docker_image = self.docker_image
|
99
|
+
environment = self.environment
|
100
|
+
|
101
|
+
container = client.containers.create(
|
102
|
+
image=docker_image,
|
103
|
+
command=command,
|
104
|
+
auto_remove=False,
|
105
|
+
volumes=self._volumes,
|
106
|
+
network_mode="host",
|
107
|
+
environment=environment,
|
108
|
+
)
|
109
|
+
|
110
|
+
# print(container.__dict__)
|
111
|
+
|
112
|
+
container.start()
|
113
|
+
stream = api_client.logs(
|
114
|
+
container=container.id, timestamps=True, stream=True, follow=True
|
115
|
+
)
|
116
|
+
while True:
|
117
|
+
try:
|
118
|
+
output = next(stream).decode("utf-8")
|
119
|
+
output = output.strip("\r\n")
|
120
|
+
logger.info(output)
|
121
|
+
print(output)
|
122
|
+
except StopIteration:
|
123
|
+
logger.info("Docker Run completed")
|
124
|
+
break
|
125
|
+
|
126
|
+
exit_status = api_client.inspect_container(container.id)["State"][
|
127
|
+
"ExitCode"
|
128
|
+
]
|
129
|
+
|
130
|
+
if self.auto_remove_container:
|
131
|
+
container.remove(force=True)
|
132
|
+
|
133
|
+
if exit_status != 0:
|
134
|
+
msg = f"Docker command failed with exit code {exit_status}"
|
135
|
+
raise Exception(msg)
|
136
|
+
|
137
|
+
except Exception as _e:
|
138
|
+
logger.exception("Problems with spinning/running the container")
|
139
|
+
raise _e
|
140
|
+
|
141
|
+
def _mount_volumes(self):
|
142
|
+
"""
|
143
|
+
Mount the volumes for the container
|
144
|
+
"""
|
145
|
+
match self._context.run_log_store.service_name:
|
146
|
+
case "file-system":
|
147
|
+
write_to = self._context.run_log_store.log_folder
|
148
|
+
self._volumes[str(Path(write_to).resolve())] = {
|
149
|
+
"bind": f"{self._container_log_location}",
|
150
|
+
"mode": "rw",
|
151
|
+
}
|
152
|
+
case "chunked-fs":
|
153
|
+
write_to = self._context.run_log_store.log_folder
|
154
|
+
self._volumes[str(Path(write_to).resolve())] = {
|
155
|
+
"bind": f"{self._container_log_location}",
|
156
|
+
"mode": "rw",
|
157
|
+
}
|
158
|
+
|
159
|
+
match self._context.catalog_handler.service_name:
|
160
|
+
case "file-system":
|
161
|
+
catalog_location = self._context.catalog_handler.catalog_location
|
162
|
+
self._volumes[str(Path(catalog_location).resolve())] = {
|
163
|
+
"bind": f"{self._container_catalog_location}",
|
164
|
+
"mode": "rw",
|
165
|
+
}
|
166
|
+
|
167
|
+
match self._context.secrets_handler.service_name:
|
168
|
+
case "dotenv":
|
169
|
+
secrets_location = self._context.secrets_handler.location
|
170
|
+
self._volumes[str(Path(secrets_location).resolve())] = {
|
171
|
+
"bind": f"{self._container_secrets_location}",
|
172
|
+
"mode": "ro",
|
173
|
+
}
|
174
|
+
|
175
|
+
def _use_volumes(self):
|
176
|
+
match self._context.run_log_store.service_name:
|
177
|
+
case "file-system":
|
178
|
+
self._context.run_log_store.log_folder = self._container_log_location
|
179
|
+
case "chunked-fs":
|
180
|
+
self._context.run_log_store.log_folder = self._container_log_location
|
181
|
+
|
182
|
+
match self._context.catalog_handler.service_name:
|
183
|
+
case "file-system":
|
184
|
+
self._context.catalog_handler.catalog_location = (
|
185
|
+
self._container_catalog_location
|
186
|
+
)
|
187
|
+
|
188
|
+
match self._context.secrets_handler.service_name:
|
189
|
+
case "dotenv":
|
190
|
+
self._context.secrets_handler.location = (
|
191
|
+
self._container_secrets_location
|
192
|
+
)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
[project]
|
2
|
+
name = "job_executor"
|
3
|
+
version = "0.0.0"
|
4
|
+
description = "Add your description here"
|
5
|
+
readme = "README.md"
|
6
|
+
requires-python = ">=3.10"
|
7
|
+
dependencies = []
|
8
|
+
|
9
|
+
|
10
|
+
[build-system]
|
11
|
+
requires = ["hatchling"]
|
12
|
+
build-backend = "hatchling.build"
|
13
|
+
|
14
|
+
|
15
|
+
[tool.hatch.build.targets.wheel]
|
16
|
+
packages = ["."]
|
File without changes
|