runnable 0.17.1__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/file_system.py +253 -0
  5. extensions/catalog/pyproject.toml +14 -0
  6. extensions/job_executor/README.md +0 -0
  7. extensions/job_executor/__init__.py +160 -0
  8. extensions/job_executor/k8s.py +484 -0
  9. extensions/job_executor/k8s_job_spec.yaml +37 -0
  10. extensions/job_executor/local.py +61 -0
  11. extensions/job_executor/local_container.py +192 -0
  12. extensions/job_executor/pyproject.toml +16 -0
  13. extensions/nodes/README.md +0 -0
  14. extensions/nodes/nodes.py +954 -0
  15. extensions/nodes/pyproject.toml +15 -0
  16. extensions/pipeline_executor/README.md +0 -0
  17. extensions/pipeline_executor/__init__.py +644 -0
  18. extensions/pipeline_executor/argo.py +1307 -0
  19. extensions/pipeline_executor/argo_specification.yaml +51 -0
  20. extensions/pipeline_executor/local.py +62 -0
  21. extensions/pipeline_executor/local_container.py +362 -0
  22. extensions/pipeline_executor/mocked.py +161 -0
  23. extensions/pipeline_executor/pyproject.toml +16 -0
  24. extensions/pipeline_executor/retry.py +180 -0
  25. extensions/run_log_store/README.md +0 -0
  26. extensions/run_log_store/__init__.py +0 -0
  27. extensions/run_log_store/chunked_fs.py +113 -0
  28. extensions/run_log_store/db/implementation_FF.py +163 -0
  29. extensions/run_log_store/db/integration_FF.py +0 -0
  30. extensions/run_log_store/file_system.py +145 -0
  31. extensions/run_log_store/generic_chunked.py +599 -0
  32. extensions/run_log_store/pyproject.toml +15 -0
  33. extensions/secrets/README.md +0 -0
  34. extensions/secrets/dotenv.py +62 -0
  35. extensions/secrets/pyproject.toml +15 -0
  36. runnable/__init__.py +1 -0
  37. runnable/catalog.py +1 -2
  38. runnable/entrypoints.py +1 -5
  39. runnable/executor.py +1 -1
  40. runnable/parameters.py +0 -9
  41. runnable/utils.py +5 -25
  42. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/METADATA +1 -7
  43. runnable-0.19.0.dist-info/RECORD +58 -0
  44. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/entry_points.txt +1 -0
  45. runnable-0.17.1.dist-info/RECORD +0 -23
  46. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/WHEEL +0 -0
  47. {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,484 @@
1
+ import logging
2
+ import shlex
3
+ from enum import Enum
4
+ from typing import Annotated, List, Optional
5
+
6
+ from kubernetes import client
7
+ from kubernetes import config as k8s_config
8
+ from pydantic import BaseModel, ConfigDict, Field, PlainSerializer, PrivateAttr
9
+ from pydantic.alias_generators import to_camel
10
+
11
+ from extensions.job_executor import GenericJobExecutor
12
+ from runnable import console, defaults, utils
13
+ from runnable.datastore import DataCatalog
14
+ from runnable.tasks import BaseTaskType
15
+
16
+ logger = logging.getLogger(defaults.NAME)
17
+
18
+
19
+ class Operator(str, Enum):
20
+ NOT_IN = "NotIn"
21
+ EXISTS = "Exists"
22
+ DOES_NOT_EXIST = "DoesNotExist"
23
+
24
+
25
+ class RestartPolicy(str, Enum):
26
+ ALWAYS = "Always"
27
+ ON_FAILURE = "OnFailure"
28
+ NEVER = "Never"
29
+
30
+
31
+ class ImagePullPolicy(str, Enum):
32
+ ALWAYS = "Always"
33
+ IF_NOT_PRESENT = "IfNotPresent"
34
+ NEVER = "Never"
35
+
36
+
37
+ class LabelSelectorRequirement(BaseModel):
38
+ key: str
39
+ operator: Operator
40
+ values: list[str]
41
+
42
+
43
+ class LabelSelector(BaseModel):
44
+ match_expressions: list[LabelSelectorRequirement]
45
+ match_labels: dict[str, str]
46
+
47
+
48
+ class ObjectMetaData(BaseModel):
49
+ generate_name: Optional[str]
50
+ annotations: Optional[dict[str, str]]
51
+ namespace: Optional[str] = "default"
52
+
53
+
54
+ class EnvVar(BaseModel):
55
+ name: str
56
+ value: str
57
+
58
+
59
+ class Request(BaseModel):
60
+ """
61
+ The default requests
62
+ """
63
+
64
+ memory: str = "1Gi"
65
+ cpu: str = "250m"
66
+
67
+
68
+ VendorGPU = Annotated[
69
+ Optional[int],
70
+ PlainSerializer(lambda x: str(x), return_type=str, when_used="unless-none"),
71
+ ]
72
+
73
+
74
+ class Limit(Request):
75
+ """
76
+ The default limits
77
+ """
78
+
79
+ gpu: VendorGPU = Field(default=None, serialization_alias="nvidia.com/gpu")
80
+
81
+
82
+ class Resources(BaseModel):
83
+ limits: Limit = Limit()
84
+ requests: Request = Request()
85
+
86
+
87
+ class VolumeMount(BaseModel):
88
+ name: str
89
+ mount_path: str
90
+
91
+
92
+ class Container(BaseModel):
93
+ image: str
94
+ env: list[EnvVar] = Field(default_factory=list)
95
+ image_pull_policy: ImagePullPolicy = ImagePullPolicy.NEVER
96
+ resources: Resources = Resources()
97
+ volume_mounts: Optional[list[VolumeMount]] = Field(default_factory=lambda: [])
98
+
99
+
100
+ class HostPath(BaseModel):
101
+ path: str
102
+
103
+
104
+ class HostPathVolume(BaseModel):
105
+ name: str
106
+ host_path: HostPath
107
+
108
+
109
+ class PVCClaim(BaseModel):
110
+ claim_name: str
111
+
112
+ model_config = ConfigDict(
113
+ alias_generator=to_camel,
114
+ populate_by_name=True,
115
+ from_attributes=True,
116
+ )
117
+
118
+
119
+ class PVCVolume(BaseModel):
120
+ name: str
121
+ persistent_volume_claim: PVCClaim
122
+
123
+
124
+ class K8sTemplateSpec(BaseModel):
125
+ active_deadline_seconds: int = Field(default=60 * 60 * 2) # 2 hours
126
+ node_selector: Optional[dict[str, str]] = None
127
+ tolerations: Optional[list[dict[str, str]]] = None
128
+ volumes: Optional[list[HostPathVolume | PVCVolume]] = Field(
129
+ default_factory=lambda: []
130
+ )
131
+ service_account_name: Optional[str] = "default"
132
+ restart_policy: RestartPolicy = RestartPolicy.NEVER
133
+ container: Container
134
+
135
+
136
+ class K8sTemplate(BaseModel):
137
+ spec: K8sTemplateSpec
138
+ metadata: Optional[ObjectMetaData] = None
139
+
140
+
141
+ class Spec(BaseModel):
142
+ active_deadline_seconds: Optional[int] = Field(default=60 * 60 * 2) # 2 hours
143
+ backoff_limit: int = 6
144
+ selector: Optional[LabelSelector] = None
145
+ template: K8sTemplate
146
+ ttl_seconds_after_finished: Optional[int] = Field(default=60 * 60 * 24) # 24 hours
147
+
148
+
149
+ class GenericK8sJobExecutor(GenericJobExecutor):
150
+ service_name: str = "k8s-job"
151
+ config_path: Optional[str] = None
152
+ job_spec: Spec
153
+ mock: bool = False
154
+ namespace: str = Field(default="default")
155
+
156
+ _is_local: bool = PrivateAttr(default=False)
157
+ _volume_mounts: list[VolumeMount] = PrivateAttr(default_factory=lambda: [])
158
+ _volumes: list[HostPathVolume | PVCVolume] = PrivateAttr(default_factory=lambda: [])
159
+
160
+ _container_log_location: str = PrivateAttr(default="/tmp/run_logs/")
161
+ _container_catalog_location: str = PrivateAttr(default="/tmp/catalog/")
162
+ _container_secrets_location: str = PrivateAttr(default="/tmp/dotenv")
163
+
164
+ model_config = ConfigDict(
165
+ alias_generator=to_camel,
166
+ populate_by_name=True,
167
+ from_attributes=True,
168
+ )
169
+
170
+ def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
171
+ """
172
+ This method gets invoked by the CLI.
173
+ """
174
+ self._set_up_run_log()
175
+
176
+ # Call the container job
177
+ job_log = self._context.run_log_store.create_job_log()
178
+ self._context.run_log_store.add_job_log(
179
+ run_id=self._context.run_id, job_log=job_log
180
+ )
181
+ # create volumes and volume mounts for the job
182
+ self._create_volumes()
183
+ self.submit_k8s_job(job)
184
+
185
+ def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
186
+ """
187
+ Focusses on execution of the job.
188
+ """
189
+ logger.info("Trying to execute job")
190
+ self._use_volumes()
191
+
192
+ job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
193
+
194
+ attempt_log = job.execute_command(
195
+ attempt_number=self.step_attempt_number,
196
+ mock=self.mock,
197
+ )
198
+
199
+ job_log.status = attempt_log.status
200
+ job_log.attempts.append(attempt_log)
201
+
202
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
203
+ catalog_settings=catalog_settings
204
+ )
205
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
206
+
207
+ job_log.add_data_catalogs(data_catalogs_put or [])
208
+
209
+ console.print("Summary of job")
210
+ console.print(job_log.get_summary())
211
+
212
+ self._context.run_log_store.add_job_log(
213
+ run_id=self._context.run_id, job_log=job_log
214
+ )
215
+
216
+ @property
217
+ def _client(self):
218
+ if self.config_path:
219
+ k8s_config.load_kube_config(config_file=self.config_path)
220
+ else:
221
+ # https://github.com/kubernetes-client/python/blob/master/kubernetes/base/config/__init__.py
222
+ k8s_config.load_config()
223
+ return client
224
+
225
+ def submit_k8s_job(self, task: BaseTaskType):
226
+ if self.job_spec.template.spec.container.volume_mounts:
227
+ self._volume_mounts += self.job_spec.template.spec.container.volume_mounts
228
+
229
+ container_volume_mounts = [
230
+ self._client.V1VolumeMount(**vol.model_dump())
231
+ for vol in self._volume_mounts
232
+ ]
233
+
234
+ command = utils.get_job_execution_command()
235
+
236
+ container_env = [
237
+ self._client.V1EnvVar(**env.model_dump(by_alias=True))
238
+ for env in self.job_spec.template.spec.container.env
239
+ ]
240
+
241
+ base_container = self._client.V1Container(
242
+ command=shlex.split(command),
243
+ env=container_env,
244
+ name="default",
245
+ volume_mounts=container_volume_mounts,
246
+ **self.job_spec.template.spec.container.model_dump(
247
+ exclude_none=True, exclude={"volume_mounts", "command", "env"}
248
+ ),
249
+ )
250
+
251
+ if self.job_spec.template.spec.volumes:
252
+ self._volumes += self.job_spec.template.spec.volumes
253
+
254
+ spec_volumes = [
255
+ self._client.V1Volume(**vol.model_dump(by_alias=True))
256
+ for vol in self._volumes
257
+ ]
258
+
259
+ tolerations = None
260
+ if self.job_spec.template.spec.tolerations:
261
+ tolerations = [
262
+ self._client.V1Toleration(**toleration)
263
+ for toleration in self.job_spec.template.spec.tolerations
264
+ ]
265
+
266
+ pod_spec = self._client.V1PodSpec(
267
+ containers=[base_container],
268
+ # volumes=[vol.model_dump(by_alias=True) for vol in self._volumes],
269
+ volumes=spec_volumes,
270
+ tolerations=tolerations,
271
+ **self.job_spec.template.spec.model_dump(
272
+ exclude_none=True, exclude={"container", "volumes", "tolerations"}
273
+ ),
274
+ )
275
+
276
+ pod_template_metadata = None
277
+ if self.job_spec.template.metadata:
278
+ pod_template_metadata = self._client.V1ObjectMeta(
279
+ **self.job_spec.template.metadata.model_dump(exclude_none=True)
280
+ )
281
+
282
+ pod_template = self._client.V1PodTemplateSpec(
283
+ spec=pod_spec,
284
+ metadata=pod_template_metadata,
285
+ )
286
+
287
+ job_spec = client.V1JobSpec(
288
+ template=pod_template,
289
+ **self.job_spec.model_dump(exclude_none=True, exclude={"template"}),
290
+ )
291
+
292
+ job = client.V1Job(
293
+ api_version="batch/v1",
294
+ kind="Job",
295
+ metadata=client.V1ObjectMeta(name=self._context.run_id),
296
+ spec=job_spec,
297
+ )
298
+
299
+ logger.info(f"Submitting job: {job.__dict__}")
300
+ if self.mock:
301
+ print(job.__dict__)
302
+ return
303
+
304
+ try:
305
+ k8s_batch = self._client.BatchV1Api()
306
+ response = k8s_batch.create_namespaced_job(
307
+ body=job,
308
+ _preload_content=False,
309
+ pretty=True,
310
+ namespace=self.namespace,
311
+ )
312
+ logger.debug(f"Kubernetes job response: {response}")
313
+ except Exception as e:
314
+ logger.exception(e)
315
+ print(e)
316
+ raise
317
+
318
+ def _create_volumes(self): ...
319
+
320
+ def _use_volumes(self):
321
+ match self._context.run_log_store.service_name:
322
+ case "file-system":
323
+ self._context.run_log_store.log_folder = self._container_log_location
324
+ case "chunked-fs":
325
+ self._context.run_log_store.log_folder = self._container_log_location
326
+
327
+ match self._context.catalog_handler.service_name:
328
+ case "file-system":
329
+ self._context.catalog_handler.catalog_location = (
330
+ self._container_catalog_location
331
+ )
332
+
333
+
334
+ class MiniK8sJobExecutor(GenericK8sJobExecutor):
335
+ service_name: str = "k8s-job"
336
+ config_path: Optional[str] = None
337
+ job_spec: Spec
338
+ mock: bool = False
339
+
340
+ # The location the mount of .run_log_store is mounted to in minikube
341
+ # ensure that minikube mount $HOME/workspace/runnable/.run_log_store:/volume/run_logs is executed first
342
+ # $HOME/workspace/runnable/.catalog:/volume/catalog
343
+ # Ensure that the docker build is done with eval $(minikube docker-env)
344
+ mini_k8s_run_log_location: str = Field(default="/volume/run_logs/")
345
+ mini_k8s_catalog_location: str = Field(default="/volume/catalog/")
346
+
347
+ _is_local: bool = PrivateAttr(default=False)
348
+
349
+ model_config = ConfigDict(
350
+ alias_generator=to_camel,
351
+ populate_by_name=True,
352
+ from_attributes=True,
353
+ )
354
+
355
+ def _create_volumes(self):
356
+ match self._context.run_log_store.service_name:
357
+ case "file-system":
358
+ self._volumes.append(
359
+ # When you do: # minikube mount $HOME:/tmp/run_logs
360
+ # This .run_log_store is mounted to /tmp/run_logs of minikube
361
+ # You then are creating a volume that is mounted to /tmp/run_logs in the container
362
+ # You are then referring to it.
363
+ # https://stackoverflow.com/questions/57411456/minikube-mounted-host-folders-are-not-working
364
+ HostPathVolume(
365
+ name="run-logs",
366
+ host_path=HostPath(path=self.mini_k8s_run_log_location),
367
+ )
368
+ )
369
+ self._volume_mounts.append(
370
+ VolumeMount(
371
+ name="run-logs", mount_path=self._container_log_location
372
+ )
373
+ )
374
+ case "chunked-fs":
375
+ self._volumes.append(
376
+ HostPathVolume(
377
+ name="run-logs",
378
+ host_path=HostPath(path=self.mini_k8s_run_log_location),
379
+ )
380
+ )
381
+ self._volume_mounts.append(
382
+ VolumeMount(
383
+ name="run-logs", mount_path=self._container_log_location
384
+ )
385
+ )
386
+
387
+ match self._context.catalog_handler.service_name:
388
+ case "file-system":
389
+ self._volumes.append(
390
+ HostPathVolume(
391
+ name="catalog",
392
+ host_path=HostPath(path=self.mini_k8s_catalog_location),
393
+ )
394
+ )
395
+ self._volume_mounts.append(
396
+ VolumeMount(
397
+ name="catalog", mount_path=self._container_catalog_location
398
+ )
399
+ )
400
+
401
+
402
+ class K8sJobExecutor(GenericK8sJobExecutor):
403
+ service_name: str = "k8s-job"
404
+ config_path: Optional[str] = None
405
+ job_spec: Spec
406
+ mock: bool = False
407
+ pvc_claim_name: str
408
+
409
+ # change the spec to pull image if not present
410
+ def model_post_init(self, __context):
411
+ self.job_spec.template.spec.container.image_pull_policy = ImagePullPolicy.ALWAYS
412
+
413
+ _is_local: bool = PrivateAttr(default=False)
414
+
415
+ model_config = ConfigDict(
416
+ alias_generator=to_camel,
417
+ populate_by_name=True,
418
+ from_attributes=True,
419
+ )
420
+
421
+ def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
422
+ self._use_volumes()
423
+ self._set_up_run_log()
424
+
425
+ job_log = self._context.run_log_store.create_job_log()
426
+ self._context.run_log_store.add_job_log(
427
+ run_id=self._context.run_id, job_log=job_log
428
+ )
429
+
430
+ job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
431
+
432
+ attempt_log = job.execute_command(
433
+ attempt_number=self.step_attempt_number,
434
+ mock=self.mock,
435
+ )
436
+
437
+ job_log.status = attempt_log.status
438
+ job_log.attempts.append(attempt_log)
439
+
440
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
441
+ catalog_settings=catalog_settings
442
+ )
443
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
444
+
445
+ job_log.add_data_catalogs(data_catalogs_put or [])
446
+
447
+ console.print("Summary of job")
448
+ console.print(job_log.get_summary())
449
+
450
+ self._context.run_log_store.add_job_log(
451
+ run_id=self._context.run_id, job_log=job_log
452
+ )
453
+
454
+ def _create_volumes(self):
455
+ self._volumes.append(
456
+ PVCVolume(
457
+ name=self.pvc_claim_name,
458
+ persistent_volume_claim=PVCClaim(claim_name=self.pvc_claim_name),
459
+ )
460
+ )
461
+ match self._context.run_log_store.service_name:
462
+ case "file-system":
463
+ self._volume_mounts.append(
464
+ VolumeMount(
465
+ name=self.pvc_claim_name,
466
+ mount_path=self._container_log_location,
467
+ )
468
+ )
469
+ case "chunked-fs":
470
+ self._volume_mounts.append(
471
+ VolumeMount(
472
+ name=self.pvc_claim_name,
473
+ mount_path=self._container_log_location,
474
+ )
475
+ )
476
+
477
+ match self._context.catalog_handler.service_name:
478
+ case "file-system":
479
+ self._volume_mounts.append(
480
+ VolumeMount(
481
+ name=self.pvc_claim_name,
482
+ mount_path=self._container_catalog_location,
483
+ )
484
+ )
@@ -0,0 +1,37 @@
1
+ #Follow this as a template https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1JobSpec.md
2
+
3
+ jobSpec:
4
+ activeDeadlineSeconds: Optional[int]
5
+ selector: Optional[LabelSelector]
6
+ ttlSecondsAfterFinished: Optional[int]
7
+ template:
8
+ metadata:
9
+ annotations: Optional[Dict[str, str]]
10
+ generate_name: Optional[str] = run_id
11
+ namespace: Optional[str] = "default"
12
+ spec:
13
+ activeDeadlineSeconds: Optional[int]
14
+ nodeSelector: Optional[Dict[str, str]]
15
+ tolerations: Optional[List[Toleration]]
16
+ volumes: Optional[List[str]]
17
+ serviceAccountName: Optional[str]
18
+ restartPolicy: Optional[str] = Choose from [Always, OnFailure, Never]
19
+ container:
20
+ command: List[str]
21
+ env:
22
+ - name: str
23
+ value: str
24
+ image: str
25
+ imagePullPolicy: Optional[str] = choose from [Always, Never, IfNotPresent]
26
+ resources:
27
+ limits:
28
+ cpu: str
29
+ memory: str
30
+ gpu: str
31
+ requests:
32
+ cpu: str
33
+ memory: str
34
+ gpu: str
35
+ volumeMounts:
36
+ - name: str
37
+ mountPath: str
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from typing import List, Optional
3
+
4
+ from extensions.job_executor import GenericJobExecutor
5
+ from runnable import console, defaults
6
+ from runnable.datastore import DataCatalog
7
+ from runnable.tasks import BaseTaskType
8
+
9
+ logger = logging.getLogger(defaults.LOGGER_NAME)
10
+
11
+
12
+ class LocalJobExecutor(GenericJobExecutor):
13
+ """
14
+ The LocalJobExecutor is a job executor that runs the job locally.
15
+ """
16
+
17
+ service_name: str = "local"
18
+ mock: bool = False
19
+
20
+ def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
21
+ """
22
+ This method gets invoked by the CLI.
23
+ """
24
+ self._set_up_run_log()
25
+
26
+ job_log = self._context.run_log_store.create_job_log()
27
+ self._context.run_log_store.add_job_log(
28
+ run_id=self._context.run_id, job_log=job_log
29
+ )
30
+
31
+ self.execute_job(job, catalog_settings=catalog_settings)
32
+
33
+ def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
34
+ """
35
+ Focusses on execution of the job.
36
+ """
37
+ logger.info("Trying to execute job")
38
+
39
+ job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
40
+
41
+ attempt_log = job.execute_command(
42
+ attempt_number=self.step_attempt_number,
43
+ mock=self.mock,
44
+ )
45
+
46
+ job_log.status = attempt_log.status
47
+ job_log.attempts.append(attempt_log)
48
+
49
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
50
+ catalog_settings=catalog_settings
51
+ )
52
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
53
+
54
+ job_log.add_data_catalogs(data_catalogs_put or [])
55
+
56
+ console.print("Summary of job")
57
+ console.print(job_log.get_summary())
58
+
59
+ self._context.run_log_store.add_job_log(
60
+ run_id=self._context.run_id, job_log=job_log
61
+ )