runnable 0.17.1__py3-none-any.whl → 0.19.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/file_system.py +253 -0
  5. extensions/catalog/pyproject.toml +14 -0
  6. extensions/job_executor/README.md +0 -0
  7. extensions/job_executor/__init__.py +160 -0
  8. extensions/job_executor/k8s.py +484 -0
  9. extensions/job_executor/k8s_job_spec.yaml +37 -0
  10. extensions/job_executor/local.py +61 -0
  11. extensions/job_executor/local_container.py +192 -0
  12. extensions/job_executor/pyproject.toml +16 -0
  13. extensions/nodes/README.md +0 -0
  14. extensions/nodes/nodes.py +954 -0
  15. extensions/nodes/pyproject.toml +15 -0
  16. extensions/pipeline_executor/README.md +0 -0
  17. extensions/pipeline_executor/__init__.py +644 -0
  18. extensions/pipeline_executor/argo.py +1307 -0
  19. extensions/pipeline_executor/argo_specification.yaml +51 -0
  20. extensions/pipeline_executor/local.py +62 -0
  21. extensions/pipeline_executor/local_container.py +362 -0
  22. extensions/pipeline_executor/mocked.py +161 -0
  23. extensions/pipeline_executor/pyproject.toml +16 -0
  24. extensions/pipeline_executor/retry.py +180 -0
  25. extensions/run_log_store/README.md +0 -0
  26. extensions/run_log_store/__init__.py +0 -0
  27. extensions/run_log_store/chunked_fs.py +113 -0
  28. extensions/run_log_store/db/implementation_FF.py +163 -0
  29. extensions/run_log_store/db/integration_FF.py +0 -0
  30. extensions/run_log_store/file_system.py +145 -0
  31. extensions/run_log_store/generic_chunked.py +599 -0
  32. extensions/run_log_store/pyproject.toml +15 -0
  33. extensions/secrets/README.md +0 -0
  34. extensions/secrets/dotenv.py +62 -0
  35. extensions/secrets/pyproject.toml +15 -0
  36. runnable/__init__.py +1 -0
  37. runnable/catalog.py +1 -2
  38. runnable/entrypoints.py +1 -5
  39. runnable/executor.py +1 -1
  40. runnable/parameters.py +0 -9
  41. runnable/utils.py +5 -25
  42. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/METADATA +1 -7
  43. runnable-0.19.0.dist-info/RECORD +58 -0
  44. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/entry_points.txt +1 -0
  45. runnable-0.17.1.dist-info/RECORD +0 -23
  46. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/WHEEL +0 -0
  47. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,484 @@
1
+ import logging
2
+ import shlex
3
+ from enum import Enum
4
+ from typing import Annotated, List, Optional
5
+
6
+ from kubernetes import client
7
+ from kubernetes import config as k8s_config
8
+ from pydantic import BaseModel, ConfigDict, Field, PlainSerializer, PrivateAttr
9
+ from pydantic.alias_generators import to_camel
10
+
11
+ from extensions.job_executor import GenericJobExecutor
12
+ from runnable import console, defaults, utils
13
+ from runnable.datastore import DataCatalog
14
+ from runnable.tasks import BaseTaskType
15
+
16
+ logger = logging.getLogger(defaults.NAME)
17
+
18
+
19
+ class Operator(str, Enum):
20
+ NOT_IN = "NotIn"
21
+ EXISTS = "Exists"
22
+ DOES_NOT_EXIST = "DoesNotExist"
23
+
24
+
25
+ class RestartPolicy(str, Enum):
26
+ ALWAYS = "Always"
27
+ ON_FAILURE = "OnFailure"
28
+ NEVER = "Never"
29
+
30
+
31
+ class ImagePullPolicy(str, Enum):
32
+ ALWAYS = "Always"
33
+ IF_NOT_PRESENT = "IfNotPresent"
34
+ NEVER = "Never"
35
+
36
+
37
+ class LabelSelectorRequirement(BaseModel):
38
+ key: str
39
+ operator: Operator
40
+ values: list[str]
41
+
42
+
43
+ class LabelSelector(BaseModel):
44
+ match_expressions: list[LabelSelectorRequirement]
45
+ match_labels: dict[str, str]
46
+
47
+
48
+ class ObjectMetaData(BaseModel):
49
+ generate_name: Optional[str]
50
+ annotations: Optional[dict[str, str]]
51
+ namespace: Optional[str] = "default"
52
+
53
+
54
+ class EnvVar(BaseModel):
55
+ name: str
56
+ value: str
57
+
58
+
59
+ class Request(BaseModel):
60
+ """
61
+ The default requests
62
+ """
63
+
64
+ memory: str = "1Gi"
65
+ cpu: str = "250m"
66
+
67
+
68
+ VendorGPU = Annotated[
69
+ Optional[int],
70
+ PlainSerializer(lambda x: str(x), return_type=str, when_used="unless-none"),
71
+ ]
72
+
73
+
74
+ class Limit(Request):
75
+ """
76
+ The default limits
77
+ """
78
+
79
+ gpu: VendorGPU = Field(default=None, serialization_alias="nvidia.com/gpu")
80
+
81
+
82
+ class Resources(BaseModel):
83
+ limits: Limit = Limit()
84
+ requests: Request = Request()
85
+
86
+
87
+ class VolumeMount(BaseModel):
88
+ name: str
89
+ mount_path: str
90
+
91
+
92
+ class Container(BaseModel):
93
+ image: str
94
+ env: list[EnvVar] = Field(default_factory=list)
95
+ image_pull_policy: ImagePullPolicy = ImagePullPolicy.NEVER
96
+ resources: Resources = Resources()
97
+ volume_mounts: Optional[list[VolumeMount]] = Field(default_factory=lambda: [])
98
+
99
+
100
+ class HostPath(BaseModel):
101
+ path: str
102
+
103
+
104
+ class HostPathVolume(BaseModel):
105
+ name: str
106
+ host_path: HostPath
107
+
108
+
109
+ class PVCClaim(BaseModel):
110
+ claim_name: str
111
+
112
+ model_config = ConfigDict(
113
+ alias_generator=to_camel,
114
+ populate_by_name=True,
115
+ from_attributes=True,
116
+ )
117
+
118
+
119
+ class PVCVolume(BaseModel):
120
+ name: str
121
+ persistent_volume_claim: PVCClaim
122
+
123
+
124
+ class K8sTemplateSpec(BaseModel):
125
+ active_deadline_seconds: int = Field(default=60 * 60 * 2) # 2 hours
126
+ node_selector: Optional[dict[str, str]] = None
127
+ tolerations: Optional[list[dict[str, str]]] = None
128
+ volumes: Optional[list[HostPathVolume | PVCVolume]] = Field(
129
+ default_factory=lambda: []
130
+ )
131
+ service_account_name: Optional[str] = "default"
132
+ restart_policy: RestartPolicy = RestartPolicy.NEVER
133
+ container: Container
134
+
135
+
136
+ class K8sTemplate(BaseModel):
137
+ spec: K8sTemplateSpec
138
+ metadata: Optional[ObjectMetaData] = None
139
+
140
+
141
+ class Spec(BaseModel):
142
+ active_deadline_seconds: Optional[int] = Field(default=60 * 60 * 2) # 2 hours
143
+ backoff_limit: int = 6
144
+ selector: Optional[LabelSelector] = None
145
+ template: K8sTemplate
146
+ ttl_seconds_after_finished: Optional[int] = Field(default=60 * 60 * 24) # 24 hours
147
+
148
+
149
+ class GenericK8sJobExecutor(GenericJobExecutor):
150
+ service_name: str = "k8s-job"
151
+ config_path: Optional[str] = None
152
+ job_spec: Spec
153
+ mock: bool = False
154
+ namespace: str = Field(default="default")
155
+
156
+ _is_local: bool = PrivateAttr(default=False)
157
+ _volume_mounts: list[VolumeMount] = PrivateAttr(default_factory=lambda: [])
158
+ _volumes: list[HostPathVolume | PVCVolume] = PrivateAttr(default_factory=lambda: [])
159
+
160
+ _container_log_location: str = PrivateAttr(default="/tmp/run_logs/")
161
+ _container_catalog_location: str = PrivateAttr(default="/tmp/catalog/")
162
+ _container_secrets_location: str = PrivateAttr(default="/tmp/dotenv")
163
+
164
+ model_config = ConfigDict(
165
+ alias_generator=to_camel,
166
+ populate_by_name=True,
167
+ from_attributes=True,
168
+ )
169
+
170
+ def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
171
+ """
172
+ This method gets invoked by the CLI.
173
+ """
174
+ self._set_up_run_log()
175
+
176
+ # Call the container job
177
+ job_log = self._context.run_log_store.create_job_log()
178
+ self._context.run_log_store.add_job_log(
179
+ run_id=self._context.run_id, job_log=job_log
180
+ )
181
+ # create volumes and volume mounts for the job
182
+ self._create_volumes()
183
+ self.submit_k8s_job(job)
184
+
185
+ def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
186
+ """
187
+ Focusses on execution of the job.
188
+ """
189
+ logger.info("Trying to execute job")
190
+ self._use_volumes()
191
+
192
+ job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
193
+
194
+ attempt_log = job.execute_command(
195
+ attempt_number=self.step_attempt_number,
196
+ mock=self.mock,
197
+ )
198
+
199
+ job_log.status = attempt_log.status
200
+ job_log.attempts.append(attempt_log)
201
+
202
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
203
+ catalog_settings=catalog_settings
204
+ )
205
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
206
+
207
+ job_log.add_data_catalogs(data_catalogs_put or [])
208
+
209
+ console.print("Summary of job")
210
+ console.print(job_log.get_summary())
211
+
212
+ self._context.run_log_store.add_job_log(
213
+ run_id=self._context.run_id, job_log=job_log
214
+ )
215
+
216
+ @property
217
+ def _client(self):
218
+ if self.config_path:
219
+ k8s_config.load_kube_config(config_file=self.config_path)
220
+ else:
221
+ # https://github.com/kubernetes-client/python/blob/master/kubernetes/base/config/__init__.py
222
+ k8s_config.load_config()
223
+ return client
224
+
225
+ def submit_k8s_job(self, task: BaseTaskType):
226
+ if self.job_spec.template.spec.container.volume_mounts:
227
+ self._volume_mounts += self.job_spec.template.spec.container.volume_mounts
228
+
229
+ container_volume_mounts = [
230
+ self._client.V1VolumeMount(**vol.model_dump())
231
+ for vol in self._volume_mounts
232
+ ]
233
+
234
+ command = utils.get_job_execution_command()
235
+
236
+ container_env = [
237
+ self._client.V1EnvVar(**env.model_dump(by_alias=True))
238
+ for env in self.job_spec.template.spec.container.env
239
+ ]
240
+
241
+ base_container = self._client.V1Container(
242
+ command=shlex.split(command),
243
+ env=container_env,
244
+ name="default",
245
+ volume_mounts=container_volume_mounts,
246
+ **self.job_spec.template.spec.container.model_dump(
247
+ exclude_none=True, exclude={"volume_mounts", "command", "env"}
248
+ ),
249
+ )
250
+
251
+ if self.job_spec.template.spec.volumes:
252
+ self._volumes += self.job_spec.template.spec.volumes
253
+
254
+ spec_volumes = [
255
+ self._client.V1Volume(**vol.model_dump(by_alias=True))
256
+ for vol in self._volumes
257
+ ]
258
+
259
+ tolerations = None
260
+ if self.job_spec.template.spec.tolerations:
261
+ tolerations = [
262
+ self._client.V1Toleration(**toleration)
263
+ for toleration in self.job_spec.template.spec.tolerations
264
+ ]
265
+
266
+ pod_spec = self._client.V1PodSpec(
267
+ containers=[base_container],
268
+ # volumes=[vol.model_dump(by_alias=True) for vol in self._volumes],
269
+ volumes=spec_volumes,
270
+ tolerations=tolerations,
271
+ **self.job_spec.template.spec.model_dump(
272
+ exclude_none=True, exclude={"container", "volumes", "tolerations"}
273
+ ),
274
+ )
275
+
276
+ pod_template_metadata = None
277
+ if self.job_spec.template.metadata:
278
+ pod_template_metadata = self._client.V1ObjectMeta(
279
+ **self.job_spec.template.metadata.model_dump(exclude_none=True)
280
+ )
281
+
282
+ pod_template = self._client.V1PodTemplateSpec(
283
+ spec=pod_spec,
284
+ metadata=pod_template_metadata,
285
+ )
286
+
287
+ job_spec = client.V1JobSpec(
288
+ template=pod_template,
289
+ **self.job_spec.model_dump(exclude_none=True, exclude={"template"}),
290
+ )
291
+
292
+ job = client.V1Job(
293
+ api_version="batch/v1",
294
+ kind="Job",
295
+ metadata=client.V1ObjectMeta(name=self._context.run_id),
296
+ spec=job_spec,
297
+ )
298
+
299
+ logger.info(f"Submitting job: {job.__dict__}")
300
+ if self.mock:
301
+ print(job.__dict__)
302
+ return
303
+
304
+ try:
305
+ k8s_batch = self._client.BatchV1Api()
306
+ response = k8s_batch.create_namespaced_job(
307
+ body=job,
308
+ _preload_content=False,
309
+ pretty=True,
310
+ namespace=self.namespace,
311
+ )
312
+ logger.debug(f"Kubernetes job response: {response}")
313
+ except Exception as e:
314
+ logger.exception(e)
315
+ print(e)
316
+ raise
317
+
318
+ def _create_volumes(self): ...
319
+
320
+ def _use_volumes(self):
321
+ match self._context.run_log_store.service_name:
322
+ case "file-system":
323
+ self._context.run_log_store.log_folder = self._container_log_location
324
+ case "chunked-fs":
325
+ self._context.run_log_store.log_folder = self._container_log_location
326
+
327
+ match self._context.catalog_handler.service_name:
328
+ case "file-system":
329
+ self._context.catalog_handler.catalog_location = (
330
+ self._container_catalog_location
331
+ )
332
+
333
+
334
+ class MiniK8sJobExecutor(GenericK8sJobExecutor):
335
+ service_name: str = "k8s-job"
336
+ config_path: Optional[str] = None
337
+ job_spec: Spec
338
+ mock: bool = False
339
+
340
+ # The location the mount of .run_log_store is mounted to in minikube
341
+ # ensure that minikube mount $HOME/workspace/runnable/.run_log_store:/volume/run_logs is executed first
342
+ # $HOME/workspace/runnable/.catalog:/volume/catalog
343
+ # Ensure that the docker build is done with eval $(minikube docker-env)
344
+ mini_k8s_run_log_location: str = Field(default="/volume/run_logs/")
345
+ mini_k8s_catalog_location: str = Field(default="/volume/catalog/")
346
+
347
+ _is_local: bool = PrivateAttr(default=False)
348
+
349
+ model_config = ConfigDict(
350
+ alias_generator=to_camel,
351
+ populate_by_name=True,
352
+ from_attributes=True,
353
+ )
354
+
355
+ def _create_volumes(self):
356
+ match self._context.run_log_store.service_name:
357
+ case "file-system":
358
+ self._volumes.append(
359
+ # When you do: # minikube mount $HOME:/tmp/run_logs
360
+ # This .run_log_store is mounted to /tmp/run_logs of minikube
361
+ # You then are creating a volume that is mounted to /tmp/run_logs in the container
362
+ # You are then referring to it.
363
+ # https://stackoverflow.com/questions/57411456/minikube-mounted-host-folders-are-not-working
364
+ HostPathVolume(
365
+ name="run-logs",
366
+ host_path=HostPath(path=self.mini_k8s_run_log_location),
367
+ )
368
+ )
369
+ self._volume_mounts.append(
370
+ VolumeMount(
371
+ name="run-logs", mount_path=self._container_log_location
372
+ )
373
+ )
374
+ case "chunked-fs":
375
+ self._volumes.append(
376
+ HostPathVolume(
377
+ name="run-logs",
378
+ host_path=HostPath(path=self.mini_k8s_run_log_location),
379
+ )
380
+ )
381
+ self._volume_mounts.append(
382
+ VolumeMount(
383
+ name="run-logs", mount_path=self._container_log_location
384
+ )
385
+ )
386
+
387
+ match self._context.catalog_handler.service_name:
388
+ case "file-system":
389
+ self._volumes.append(
390
+ HostPathVolume(
391
+ name="catalog",
392
+ host_path=HostPath(path=self.mini_k8s_catalog_location),
393
+ )
394
+ )
395
+ self._volume_mounts.append(
396
+ VolumeMount(
397
+ name="catalog", mount_path=self._container_catalog_location
398
+ )
399
+ )
400
+
401
+
402
+ class K8sJobExecutor(GenericK8sJobExecutor):
403
+ service_name: str = "k8s-job"
404
+ config_path: Optional[str] = None
405
+ job_spec: Spec
406
+ mock: bool = False
407
+ pvc_claim_name: str
408
+
409
+ # change the spec to pull image if not present
410
+ def model_post_init(self, __context):
411
+ self.job_spec.template.spec.container.image_pull_policy = ImagePullPolicy.ALWAYS
412
+
413
+ _is_local: bool = PrivateAttr(default=False)
414
+
415
+ model_config = ConfigDict(
416
+ alias_generator=to_camel,
417
+ populate_by_name=True,
418
+ from_attributes=True,
419
+ )
420
+
421
+ def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
422
+ self._use_volumes()
423
+ self._set_up_run_log()
424
+
425
+ job_log = self._context.run_log_store.create_job_log()
426
+ self._context.run_log_store.add_job_log(
427
+ run_id=self._context.run_id, job_log=job_log
428
+ )
429
+
430
+ job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
431
+
432
+ attempt_log = job.execute_command(
433
+ attempt_number=self.step_attempt_number,
434
+ mock=self.mock,
435
+ )
436
+
437
+ job_log.status = attempt_log.status
438
+ job_log.attempts.append(attempt_log)
439
+
440
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
441
+ catalog_settings=catalog_settings
442
+ )
443
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
444
+
445
+ job_log.add_data_catalogs(data_catalogs_put or [])
446
+
447
+ console.print("Summary of job")
448
+ console.print(job_log.get_summary())
449
+
450
+ self._context.run_log_store.add_job_log(
451
+ run_id=self._context.run_id, job_log=job_log
452
+ )
453
+
454
+ def _create_volumes(self):
455
+ self._volumes.append(
456
+ PVCVolume(
457
+ name=self.pvc_claim_name,
458
+ persistent_volume_claim=PVCClaim(claim_name=self.pvc_claim_name),
459
+ )
460
+ )
461
+ match self._context.run_log_store.service_name:
462
+ case "file-system":
463
+ self._volume_mounts.append(
464
+ VolumeMount(
465
+ name=self.pvc_claim_name,
466
+ mount_path=self._container_log_location,
467
+ )
468
+ )
469
+ case "chunked-fs":
470
+ self._volume_mounts.append(
471
+ VolumeMount(
472
+ name=self.pvc_claim_name,
473
+ mount_path=self._container_log_location,
474
+ )
475
+ )
476
+
477
+ match self._context.catalog_handler.service_name:
478
+ case "file-system":
479
+ self._volume_mounts.append(
480
+ VolumeMount(
481
+ name=self.pvc_claim_name,
482
+ mount_path=self._container_catalog_location,
483
+ )
484
+ )
@@ -0,0 +1,37 @@
1
+ #Follow this as a template https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1JobSpec.md
2
+
3
+ jobSpec:
4
+ activeDeadlineSeconds: Optional[int]
5
+ selector: Optional[LabelSelector]
6
+ ttlSecondsAfterFinished: Optional[int]
7
+ template:
8
+ metadata:
9
+ annotations: Optional[Dict[str, str]]
10
+ generate_name: Optional[str] = run_id
11
+ namespace: Optional[str] = "default"
12
+ spec:
13
+ activeDeadlineSeconds: Optional[int]
14
+ nodeSelector: Optional[Dict[str, str]]
15
+ tolerations: Optional[List[Toleration]]
16
+ volumes: Optional[List[str]]
17
+ serviceAccountName: Optional[str]
18
+ restartPolicy: Optional[str] = Choose from [Always, OnFailure, Never]
19
+ container:
20
+ command: List[str]
21
+ env:
22
+ - name: str
23
+ value: str
24
+ image: str
25
+ imagePullPolicy: Optional[str] = choose from [Always, Never, IfNotPresent]
26
+ resources:
27
+ limits:
28
+ cpu: str
29
+ memory: str
30
+ gpu: str
31
+ requests:
32
+ cpu: str
33
+ memory: str
34
+ gpu: str
35
+ volumeMounts:
36
+ - name: str
37
+ mountPath: str
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from typing import List, Optional
3
+
4
+ from extensions.job_executor import GenericJobExecutor
5
+ from runnable import console, defaults
6
+ from runnable.datastore import DataCatalog
7
+ from runnable.tasks import BaseTaskType
8
+
9
+ logger = logging.getLogger(defaults.LOGGER_NAME)
10
+
11
+
12
+ class LocalJobExecutor(GenericJobExecutor):
13
+ """
14
+ The LocalJobExecutor is a job executor that runs the job locally.
15
+ """
16
+
17
+ service_name: str = "local"
18
+ mock: bool = False
19
+
20
+ def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
21
+ """
22
+ This method gets invoked by the CLI.
23
+ """
24
+ self._set_up_run_log()
25
+
26
+ job_log = self._context.run_log_store.create_job_log()
27
+ self._context.run_log_store.add_job_log(
28
+ run_id=self._context.run_id, job_log=job_log
29
+ )
30
+
31
+ self.execute_job(job, catalog_settings=catalog_settings)
32
+
33
+ def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
34
+ """
35
+ Focusses on execution of the job.
36
+ """
37
+ logger.info("Trying to execute job")
38
+
39
+ job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
40
+
41
+ attempt_log = job.execute_command(
42
+ attempt_number=self.step_attempt_number,
43
+ mock=self.mock,
44
+ )
45
+
46
+ job_log.status = attempt_log.status
47
+ job_log.attempts.append(attempt_log)
48
+
49
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
50
+ catalog_settings=catalog_settings
51
+ )
52
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
53
+
54
+ job_log.add_data_catalogs(data_catalogs_put or [])
55
+
56
+ console.print("Summary of job")
57
+ console.print(job_log.get_summary())
58
+
59
+ self._context.run_log_store.add_job_log(
60
+ run_id=self._context.run_id, job_log=job_log
61
+ )