runnable 0.13.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +1 -12
- runnable/catalog.py +29 -5
- runnable/cli.py +268 -215
- runnable/context.py +10 -3
- runnable/datastore.py +212 -53
- runnable/defaults.py +13 -55
- runnable/entrypoints.py +270 -183
- runnable/exceptions.py +28 -2
- runnable/executor.py +133 -86
- runnable/graph.py +37 -13
- runnable/nodes.py +50 -22
- runnable/parameters.py +27 -8
- runnable/pickler.py +1 -1
- runnable/sdk.py +230 -66
- runnable/secrets.py +3 -1
- runnable/tasks.py +99 -41
- runnable/utils.py +59 -39
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/METADATA +28 -31
- runnable-0.16.0.dist-info/RECORD +23 -0
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/WHEEL +1 -1
- runnable-0.16.0.dist-info/entry_points.txt +45 -0
- runnable/extensions/__init__.py +0 -0
- runnable/extensions/catalog/__init__.py +0 -21
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +0 -234
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
- runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
- runnable/extensions/executor/__init__.py +0 -649
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +0 -1194
- runnable/extensions/executor/argo/specification.yaml +0 -51
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
- runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
- runnable/extensions/executor/local.py +0 -69
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +0 -446
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +0 -154
- runnable/extensions/executor/retry/__init__.py +0 -0
- runnable/extensions/executor/retry/implementation.py +0 -168
- runnable/extensions/nodes.py +0 -870
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
- runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +0 -140
- runnable/extensions/run_log_store/generic_chunked.py +0 -557
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +0 -100
- runnable/integration.py +0 -192
- runnable-0.13.0.dist-info/RECORD +0 -63
- runnable-0.13.0.dist-info/entry_points.txt +0 -41
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info/licenses}/LICENSE +0 -0
runnable/datastore.py
CHANGED
@@ -18,14 +18,13 @@ from typing import (
|
|
18
18
|
|
19
19
|
from pydantic import BaseModel, Field, computed_field
|
20
20
|
|
21
|
-
import runnable.context as context
|
22
21
|
from runnable import defaults, exceptions
|
23
22
|
|
24
23
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
25
24
|
|
26
25
|
|
27
26
|
JSONType = Union[
|
28
|
-
|
27
|
+
Union[None, bool, str, float, int, List[Any], Dict[str, Any]]
|
29
28
|
] # This is actually JSONType, but pydantic doesn't support TypeAlias yet
|
30
29
|
|
31
30
|
|
@@ -56,14 +55,11 @@ class DataCatalog(BaseModel, extra="allow"):
|
|
56
55
|
return other.name == self.name
|
57
56
|
|
58
57
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
Once the map state is complete, we can set the reduce to true and have the value as
|
65
|
-
the reduced value. Its either a list or a custom function return.
|
66
|
-
"""
|
58
|
+
# The theory behind reduced:
|
59
|
+
# parameters returned by steps in map node are only reduced by the end of the map step, fan-in.
|
60
|
+
# If they are accessed within the map step, the value should be the value returned by the step in the map step.
|
61
|
+
# Once the map state is complete, we can set the reduce to true and have the value as
|
62
|
+
# the reduced value. Its either a list or a custom function return.
|
67
63
|
|
68
64
|
|
69
65
|
class JsonParameter(BaseModel):
|
@@ -125,7 +121,9 @@ class ObjectParameter(BaseModel):
|
|
125
121
|
os.remove(self.file_name) # Remove after loading
|
126
122
|
|
127
123
|
|
128
|
-
Parameter = Annotated[
|
124
|
+
Parameter = Annotated[
|
125
|
+
Union[JsonParameter, ObjectParameter, MetricParameter], Field(discriminator="kind")
|
126
|
+
]
|
129
127
|
|
130
128
|
|
131
129
|
class StepAttempt(BaseModel):
|
@@ -157,8 +155,12 @@ class CodeIdentity(BaseModel, extra="allow"):
|
|
157
155
|
|
158
156
|
code_identifier: Optional[str] = "" # GIT sha code or docker image id
|
159
157
|
code_identifier_type: Optional[str] = "" # git or docker
|
160
|
-
code_identifier_dependable: Optional[bool] =
|
161
|
-
|
158
|
+
code_identifier_dependable: Optional[bool] = (
|
159
|
+
False # If git, checks if the tree is clean.
|
160
|
+
)
|
161
|
+
code_identifier_url: Optional[str] = (
|
162
|
+
"" # The git remote url or docker repository url
|
163
|
+
)
|
162
164
|
code_identifier_message: Optional[str] = "" # Any optional message
|
163
165
|
|
164
166
|
|
@@ -185,18 +187,28 @@ class StepLog(BaseModel):
|
|
185
187
|
summary: Dict[str, Any] = {}
|
186
188
|
|
187
189
|
summary["Name"] = self.internal_name
|
188
|
-
summary["Input catalog content"] = [
|
190
|
+
summary["Input catalog content"] = [
|
191
|
+
dc.name for dc in self.data_catalog if dc.stage == "get"
|
192
|
+
]
|
189
193
|
summary["Available parameters"] = [
|
190
|
-
(p, v.description)
|
194
|
+
(p, v.description)
|
195
|
+
for attempt in self.attempts
|
196
|
+
for p, v in attempt.input_parameters.items()
|
191
197
|
]
|
192
198
|
|
193
|
-
summary["Output catalog content"] = [
|
199
|
+
summary["Output catalog content"] = [
|
200
|
+
dc.name for dc in self.data_catalog if dc.stage == "put"
|
201
|
+
]
|
194
202
|
summary["Output parameters"] = [
|
195
|
-
(p, v.description)
|
203
|
+
(p, v.description)
|
204
|
+
for attempt in self.attempts
|
205
|
+
for p, v in attempt.output_parameters.items()
|
196
206
|
]
|
197
207
|
|
198
208
|
summary["Metrics"] = [
|
199
|
-
(p, v.description)
|
209
|
+
(p, v.description)
|
210
|
+
for attempt in self.attempts
|
211
|
+
for p, v in attempt.user_defined_metrics.items()
|
200
212
|
]
|
201
213
|
|
202
214
|
cis = []
|
@@ -288,6 +300,73 @@ class BranchLog(BaseModel):
|
|
288
300
|
StepLog.model_rebuild()
|
289
301
|
|
290
302
|
|
303
|
+
class JobLog(BaseModel):
|
304
|
+
"""
|
305
|
+
The data class capturing the data of a job
|
306
|
+
This should be treated as a step log
|
307
|
+
"""
|
308
|
+
|
309
|
+
status: str = defaults.FAIL
|
310
|
+
message: str = ""
|
311
|
+
mock: bool = False
|
312
|
+
code_identities: List[CodeIdentity] = Field(default_factory=list)
|
313
|
+
attempts: List[StepAttempt] = Field(default_factory=list)
|
314
|
+
data_catalog: List[DataCatalog] = Field(default_factory=list)
|
315
|
+
|
316
|
+
def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
|
317
|
+
"""
|
318
|
+
Add the data catalogs as asked by the user
|
319
|
+
|
320
|
+
Args:
|
321
|
+
dict_catalogs ([DataCatalog]): A list of data catalog items
|
322
|
+
"""
|
323
|
+
|
324
|
+
if not self.data_catalog:
|
325
|
+
self.data_catalog = []
|
326
|
+
for data_catalog in data_catalogs:
|
327
|
+
self.data_catalog.append(data_catalog)
|
328
|
+
|
329
|
+
def get_summary(self) -> Dict[str, Any]:
|
330
|
+
"""
|
331
|
+
Summarize the step log to log
|
332
|
+
"""
|
333
|
+
summary: Dict[str, Any] = {}
|
334
|
+
|
335
|
+
summary["Available parameters"] = [
|
336
|
+
(p, v.description)
|
337
|
+
for attempt in self.attempts
|
338
|
+
for p, v in attempt.input_parameters.items()
|
339
|
+
]
|
340
|
+
|
341
|
+
summary["Output catalog content"] = [
|
342
|
+
dc.name for dc in self.data_catalog if dc.stage == "put"
|
343
|
+
]
|
344
|
+
summary["Output parameters"] = [
|
345
|
+
(p, v.description)
|
346
|
+
for attempt in self.attempts
|
347
|
+
for p, v in attempt.output_parameters.items()
|
348
|
+
]
|
349
|
+
|
350
|
+
summary["Metrics"] = [
|
351
|
+
(p, v.description)
|
352
|
+
for attempt in self.attempts
|
353
|
+
for p, v in attempt.user_defined_metrics.items()
|
354
|
+
]
|
355
|
+
|
356
|
+
cis = []
|
357
|
+
for ci in self.code_identities:
|
358
|
+
message = f"{ci.code_identifier_type}:{ci.code_identifier}"
|
359
|
+
if not ci.code_identifier_dependable:
|
360
|
+
message += " but is not dependable"
|
361
|
+
cis.append(message)
|
362
|
+
|
363
|
+
summary["Code identities"] = cis
|
364
|
+
|
365
|
+
summary["status"] = self.status
|
366
|
+
|
367
|
+
return summary
|
368
|
+
|
369
|
+
|
291
370
|
class RunLog(BaseModel):
|
292
371
|
"""
|
293
372
|
The data captured as part of Run Log
|
@@ -298,6 +377,7 @@ class RunLog(BaseModel):
|
|
298
377
|
tag: Optional[str] = ""
|
299
378
|
status: str = defaults.FAIL
|
300
379
|
steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
|
380
|
+
job: Optional[JobLog] = None
|
301
381
|
parameters: Dict[str, Parameter] = Field(default_factory=dict)
|
302
382
|
run_config: Dict[str, Any] = Field(default_factory=dict)
|
303
383
|
|
@@ -312,10 +392,18 @@ class RunLog(BaseModel):
|
|
312
392
|
summary["Catalog Location"] = _context.catalog_handler.get_summary()
|
313
393
|
summary["Full Run log present at: "] = _context.run_log_store.get_summary()
|
314
394
|
|
315
|
-
run_log = _context.run_log_store.get_run_log_by_id(
|
395
|
+
run_log = _context.run_log_store.get_run_log_by_id(
|
396
|
+
run_id=_context.run_id, full=True
|
397
|
+
)
|
316
398
|
|
317
|
-
summary["Final Parameters"] = {
|
318
|
-
|
399
|
+
summary["Final Parameters"] = {
|
400
|
+
p: v.description for p, v in run_log.parameters.items()
|
401
|
+
}
|
402
|
+
summary["Collected metrics"] = {
|
403
|
+
p: v.description
|
404
|
+
for p, v in run_log.parameters.items()
|
405
|
+
if v.kind == "metric"
|
406
|
+
}
|
319
407
|
|
320
408
|
return summary
|
321
409
|
|
@@ -338,7 +426,9 @@ class RunLog(BaseModel):
|
|
338
426
|
|
339
427
|
return list(set(data_catalogs))
|
340
428
|
|
341
|
-
def search_branch_by_internal_name(
|
429
|
+
def search_branch_by_internal_name(
|
430
|
+
self, i_name: str
|
431
|
+
) -> Tuple[Union[BranchLog, RunLog], Union[StepLog, None]]:
|
342
432
|
"""
|
343
433
|
Given a branch internal name, search for it in the run log.
|
344
434
|
|
@@ -385,7 +475,9 @@ class RunLog(BaseModel):
|
|
385
475
|
|
386
476
|
raise exceptions.BranchLogNotFoundError(self.run_id, i_name)
|
387
477
|
|
388
|
-
def search_step_by_internal_name(
|
478
|
+
def search_step_by_internal_name(
|
479
|
+
self, i_name: str
|
480
|
+
) -> Tuple[StepLog, Union[BranchLog, None]]:
|
389
481
|
"""
|
390
482
|
Given a steps internal name, search for the step name.
|
391
483
|
|
@@ -415,7 +507,9 @@ class RunLog(BaseModel):
|
|
415
507
|
# Its odd, so we are in brach name
|
416
508
|
current_branch = current_step.branches[".".join(dot_path[: i + 1])] # type: ignore
|
417
509
|
current_steps = current_branch.steps
|
418
|
-
logger.debug(
|
510
|
+
logger.debug(
|
511
|
+
f"Finding step log for {i_name} in branch: {current_branch}"
|
512
|
+
)
|
419
513
|
else:
|
420
514
|
# Its even, so we are in step, we start here!
|
421
515
|
current_step = current_steps[".".join(dot_path[: i + 1])]
|
@@ -428,10 +522,6 @@ class RunLog(BaseModel):
|
|
428
522
|
raise exceptions.StepLogNotFoundError(self.run_id, i_name)
|
429
523
|
|
430
524
|
|
431
|
-
# All outside modules should interact with dataclasses using the RunLogStore to promote extensibility
|
432
|
-
# If you want to customize dataclass, extend BaseRunLogStore and implement the methods as per the specification
|
433
|
-
|
434
|
-
|
435
525
|
class BaseRunLogStore(ABC, BaseModel):
|
436
526
|
"""
|
437
527
|
The base class of a Run Log Store with many common methods implemented.
|
@@ -441,13 +531,29 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
441
531
|
service_type: str = "run_log_store"
|
442
532
|
|
443
533
|
@abstractmethod
|
444
|
-
def get_summary(self) -> Dict[str, Any]:
|
445
|
-
...
|
534
|
+
def get_summary(self) -> Dict[str, Any]: ...
|
446
535
|
|
447
536
|
@property
|
448
537
|
def _context(self):
|
449
538
|
return context.run_context
|
450
539
|
|
540
|
+
"""
|
541
|
+
Retrieves a Job log from the database using the config and the job_id
|
542
|
+
|
543
|
+
Args:
|
544
|
+
job_id (str): The job_id of the job
|
545
|
+
|
546
|
+
Returns:
|
547
|
+
JobLog: The JobLog object identified by the job_id
|
548
|
+
|
549
|
+
Logically the method should:
|
550
|
+
* Returns the job_log defined by id from the data store defined by the config
|
551
|
+
|
552
|
+
Raises:
|
553
|
+
NotImplementedError: This is a base class and therefore has no default implementation
|
554
|
+
JobLogNotFoundError: If the job log for job_id is not found in the datastore
|
555
|
+
"""
|
556
|
+
|
451
557
|
@abstractmethod
|
452
558
|
def create_run_log(
|
453
559
|
self,
|
@@ -457,7 +563,6 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
457
563
|
tag: str = "",
|
458
564
|
original_run_id: str = "",
|
459
565
|
status: str = defaults.CREATED,
|
460
|
-
**kwargs,
|
461
566
|
):
|
462
567
|
"""
|
463
568
|
Creates a Run Log object by using the config
|
@@ -473,7 +578,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
473
578
|
raise NotImplementedError
|
474
579
|
|
475
580
|
@abstractmethod
|
476
|
-
def get_run_log_by_id(self, run_id: str, full: bool = False
|
581
|
+
def get_run_log_by_id(self, run_id: str, full: bool = False) -> RunLog:
|
477
582
|
"""
|
478
583
|
Retrieves a Run log from the database using the config and the run_id
|
479
584
|
|
@@ -495,7 +600,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
495
600
|
raise NotImplementedError
|
496
601
|
|
497
602
|
@abstractmethod
|
498
|
-
def put_run_log(self, run_log: RunLog
|
603
|
+
def put_run_log(self, run_log: RunLog):
|
499
604
|
"""
|
500
605
|
Puts the Run Log in the database as defined by the config
|
501
606
|
|
@@ -523,7 +628,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
523
628
|
run_log.status = status
|
524
629
|
self.put_run_log(run_log)
|
525
630
|
|
526
|
-
def get_parameters(self, run_id: str
|
631
|
+
def get_parameters(self, run_id: str) -> Dict[str, Parameter]:
|
527
632
|
"""
|
528
633
|
Get the parameters from the Run log defined by the run_id
|
529
634
|
|
@@ -542,7 +647,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
542
647
|
run_log = self.get_run_log_by_id(run_id=run_id)
|
543
648
|
return run_log.parameters
|
544
649
|
|
545
|
-
def set_parameters(self, run_id: str, parameters: Dict[str, Parameter]
|
650
|
+
def set_parameters(self, run_id: str, parameters: Dict[str, Parameter]):
|
546
651
|
"""
|
547
652
|
Update the parameters of the Run log with the new parameters
|
548
653
|
|
@@ -563,7 +668,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
563
668
|
run_log.parameters.update(parameters)
|
564
669
|
self.put_run_log(run_log=run_log)
|
565
670
|
|
566
|
-
def get_run_config(self, run_id: str
|
671
|
+
def get_run_config(self, run_id: str) -> dict:
|
567
672
|
"""
|
568
673
|
Given a run_id, return the run_config used to perform the run.
|
569
674
|
|
@@ -577,7 +682,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
577
682
|
run_log = self.get_run_log_by_id(run_id=run_id)
|
578
683
|
return run_log.run_config
|
579
684
|
|
580
|
-
def set_run_config(self, run_id: str, run_config: dict
|
685
|
+
def set_run_config(self, run_id: str, run_config: dict):
|
581
686
|
"""Set the run config used to run the run_id
|
582
687
|
|
583
688
|
Args:
|
@@ -589,7 +694,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
589
694
|
run_log.run_config.update(run_config)
|
590
695
|
self.put_run_log(run_log=run_log)
|
591
696
|
|
592
|
-
def create_step_log(self, name: str, internal_name: str
|
697
|
+
def create_step_log(self, name: str, internal_name: str):
|
593
698
|
"""
|
594
699
|
Create a step log by the name and internal name
|
595
700
|
|
@@ -607,7 +712,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
607
712
|
logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
|
608
713
|
return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
|
609
714
|
|
610
|
-
def get_step_log(self, internal_name: str, run_id: str
|
715
|
+
def get_step_log(self, internal_name: str, run_id: str) -> StepLog:
|
611
716
|
"""
|
612
717
|
Get a step log from the datastore for run_id and the internal naming of the step log
|
613
718
|
|
@@ -629,12 +734,14 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
629
734
|
RunLogNotFoundError: If the run log for run_id is not found in the datastore
|
630
735
|
StepLogNotFoundError: If the step log for internal_name is not found in the datastore for run_id
|
631
736
|
"""
|
632
|
-
logger.info(
|
737
|
+
logger.info(
|
738
|
+
f"{self.service_name} Getting the step log: {internal_name} of {run_id}"
|
739
|
+
)
|
633
740
|
run_log = self.get_run_log_by_id(run_id=run_id)
|
634
741
|
step_log, _ = run_log.search_step_by_internal_name(internal_name)
|
635
742
|
return step_log
|
636
743
|
|
637
|
-
def add_step_log(self, step_log: StepLog, run_id: str
|
744
|
+
def add_step_log(self, step_log: StepLog, run_id: str):
|
638
745
|
"""
|
639
746
|
Add the step log in the run log as identified by the run_id in the datastore
|
640
747
|
|
@@ -664,7 +771,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
664
771
|
branch.steps[step_log.internal_name] = step_log
|
665
772
|
self.put_run_log(run_log=run_log)
|
666
773
|
|
667
|
-
def create_branch_log(self, internal_branch_name: str
|
774
|
+
def create_branch_log(self, internal_branch_name: str) -> BranchLog:
|
668
775
|
"""
|
669
776
|
Creates a uncommitted branch log object by the internal name given
|
670
777
|
|
@@ -675,10 +782,14 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
675
782
|
BranchLog: Uncommitted and initialized with defaults BranchLog object
|
676
783
|
"""
|
677
784
|
# Create a new BranchLog
|
678
|
-
logger.info(
|
785
|
+
logger.info(
|
786
|
+
f"{self.service_name} Creating a Branch Log : {internal_branch_name}"
|
787
|
+
)
|
679
788
|
return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
|
680
789
|
|
681
|
-
def get_branch_log(
|
790
|
+
def get_branch_log(
|
791
|
+
self, internal_branch_name: str, run_id: str
|
792
|
+
) -> Union[BranchLog, RunLog]:
|
682
793
|
"""
|
683
794
|
Returns the branch log by the internal branch name for the run id
|
684
795
|
|
@@ -697,7 +808,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
697
808
|
branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
|
698
809
|
return branch
|
699
810
|
|
700
|
-
def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str
|
811
|
+
def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str):
|
701
812
|
"""
|
702
813
|
The method should:
|
703
814
|
# Get the run log
|
@@ -729,8 +840,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
729
840
|
step.branches[internal_branch_name] = branch_log # type: ignore
|
730
841
|
self.put_run_log(run_log)
|
731
842
|
|
732
|
-
|
733
|
-
def create_code_identity(self, **kwargs) -> CodeIdentity:
|
843
|
+
def create_code_identity(self) -> CodeIdentity:
|
734
844
|
"""
|
735
845
|
Creates an uncommitted Code identity class
|
736
846
|
|
@@ -740,7 +850,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
740
850
|
logger.info(f"{self.service_name} Creating Code identity")
|
741
851
|
return CodeIdentity()
|
742
852
|
|
743
|
-
def create_data_catalog(self, name: str
|
853
|
+
def create_data_catalog(self, name: str) -> DataCatalog:
|
744
854
|
"""
|
745
855
|
Create a uncommitted data catalog object
|
746
856
|
|
@@ -753,6 +863,45 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
753
863
|
logger.info(f"{self.service_name} Creating Data Catalog for {name}")
|
754
864
|
return DataCatalog(name=name)
|
755
865
|
|
866
|
+
def create_job_log(self) -> JobLog:
|
867
|
+
"""
|
868
|
+
Creates a Job log and adds it to the db
|
869
|
+
|
870
|
+
Refer to BaseRunLogStore.create_job_log
|
871
|
+
"""
|
872
|
+
logger.info(f"{self.service_name} Creating a Job Log and adding it to DB")
|
873
|
+
return JobLog(status=defaults.CREATED)
|
874
|
+
|
875
|
+
def get_job_log(self, run_id: str) -> JobLog:
|
876
|
+
"""
|
877
|
+
Returns the run_log defined by id
|
878
|
+
|
879
|
+
Raises Exception if not found
|
880
|
+
"""
|
881
|
+
logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
|
882
|
+
run_log = self.get_run_log_by_id(run_id)
|
883
|
+
|
884
|
+
try:
|
885
|
+
assert run_log.job
|
886
|
+
except AssertionError as exc:
|
887
|
+
raise exceptions.JobLogNotFoundError(run_id) from exc
|
888
|
+
|
889
|
+
return run_log.job
|
890
|
+
|
891
|
+
def add_job_log(self, run_id: str, job_log: JobLog):
|
892
|
+
"""
|
893
|
+
Adds the job log to the run log
|
894
|
+
|
895
|
+
Args:
|
896
|
+
run_id (str): The run_id of the run
|
897
|
+
job_log (JobLog): The job log to add to the run log
|
898
|
+
"""
|
899
|
+
logger.info(f"{self.service_name} Adding the job log to DB for: {run_id}")
|
900
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
901
|
+
run_log.job = job_log
|
902
|
+
run_log.status = job_log.status
|
903
|
+
self.put_run_log(run_log=run_log)
|
904
|
+
|
756
905
|
|
757
906
|
class BufferRunLogstore(BaseRunLogStore):
|
758
907
|
"""
|
@@ -775,7 +924,13 @@ class BufferRunLogstore(BaseRunLogStore):
|
|
775
924
|
"""
|
776
925
|
|
777
926
|
service_name: str = "buffered"
|
778
|
-
|
927
|
+
|
928
|
+
run_log: Optional[RunLog] = Field(
|
929
|
+
default=None, exclude=True
|
930
|
+
) # For a buffered Run Log, this is the database
|
931
|
+
job_log: Optional[JobLog] = Field(
|
932
|
+
default=None, exclude=True
|
933
|
+
) # For a buffered Run Log, this is the database
|
779
934
|
|
780
935
|
def get_summary(self) -> Dict[str, Any]:
|
781
936
|
summary = {"Type": self.service_name, "Location": "Not persisted"}
|
@@ -790,7 +945,6 @@ class BufferRunLogstore(BaseRunLogStore):
|
|
790
945
|
tag: str = "",
|
791
946
|
original_run_id: str = "",
|
792
947
|
status: str = defaults.CREATED,
|
793
|
-
**kwargs,
|
794
948
|
) -> RunLog:
|
795
949
|
"""
|
796
950
|
# Creates a Run log
|
@@ -809,7 +963,7 @@ class BufferRunLogstore(BaseRunLogStore):
|
|
809
963
|
)
|
810
964
|
return self.run_log
|
811
965
|
|
812
|
-
def get_run_log_by_id(self, run_id: str, full: bool = False
|
966
|
+
def get_run_log_by_id(self, run_id: str, full: bool = False):
|
813
967
|
"""
|
814
968
|
# Returns the run_log defined by id
|
815
969
|
# Raises Exception if not found
|
@@ -821,10 +975,15 @@ class BufferRunLogstore(BaseRunLogStore):
|
|
821
975
|
|
822
976
|
raise exceptions.RunLogNotFoundError(run_id)
|
823
977
|
|
824
|
-
def put_run_log(self, run_log: RunLog
|
978
|
+
def put_run_log(self, run_log: RunLog):
|
825
979
|
"""
|
826
980
|
# Puts the run log in the db
|
827
981
|
# Raises Exception if not found
|
828
982
|
"""
|
829
|
-
logger.info(
|
983
|
+
logger.info(
|
984
|
+
f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
|
985
|
+
)
|
830
986
|
self.run_log = run_log
|
987
|
+
|
988
|
+
|
989
|
+
import runnable.context as context # noqa: F401, E402
|
runnable/defaults.py
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
-
from
|
2
|
-
|
3
|
-
|
1
|
+
from typing import (
|
2
|
+
Any,
|
3
|
+
Dict,
|
4
|
+
Mapping,
|
5
|
+
Optional,
|
6
|
+
TypedDict, # type: ignore[unused-ignore]
|
7
|
+
Union,
|
8
|
+
)
|
4
9
|
|
5
10
|
from rich.style import Style
|
6
11
|
from typing_extensions import TypeAlias
|
@@ -12,16 +17,6 @@ LOGGER_NAME = "runnable"
|
|
12
17
|
LOG_LEVEL = "WARNING"
|
13
18
|
|
14
19
|
|
15
|
-
class EXECUTION_PLAN(Enum):
|
16
|
-
"""
|
17
|
-
The possible execution plans for a runnable job.
|
18
|
-
"""
|
19
|
-
|
20
|
-
CHAINED = "chained" # 121 relationship between run log and the dag.
|
21
|
-
UNCHAINED = "unchained" # Only captures execution of steps, no relation.
|
22
|
-
INTERACTIVE = "interactive" # used for interactive sessions
|
23
|
-
|
24
|
-
|
25
20
|
# Type definitions
|
26
21
|
class ServiceConfig(TypedDict):
|
27
22
|
type: str
|
@@ -32,8 +27,7 @@ class RunnableConfig(TypedDict, total=False):
|
|
32
27
|
run_log_store: Optional[ServiceConfig]
|
33
28
|
secrets: Optional[ServiceConfig]
|
34
29
|
catalog: Optional[ServiceConfig]
|
35
|
-
|
36
|
-
experiment_tracker: Optional[ServiceConfig]
|
30
|
+
pipeline_executor: Optional[ServiceConfig]
|
37
31
|
pickler: Optional[ServiceConfig]
|
38
32
|
|
39
33
|
|
@@ -46,7 +40,6 @@ RUNNABLE_RUN_TAG = "RUNNABLE_RUN_TAG"
|
|
46
40
|
|
47
41
|
# Interaction settings
|
48
42
|
TRACK_PREFIX = "RUNNABLE_TRACK_"
|
49
|
-
STEP_INDICATOR = "_STEP_"
|
50
43
|
PARAMETER_PREFIX = "RUNNABLE_PRM_"
|
51
44
|
MAP_VARIABLE = "RUNNABLE_MAP_VARIABLE"
|
52
45
|
VARIABLE_PREFIX = "RUNNABLE_VAR_"
|
@@ -67,18 +60,14 @@ TRIGGERED = "TRIGGERED"
|
|
67
60
|
|
68
61
|
# Node and Command settings
|
69
62
|
COMMAND_TYPE = "python"
|
70
|
-
NODE_SPEC_FILE = "node_spec.yaml"
|
71
63
|
COMMAND_FRIENDLY_CHARACTER = "%"
|
72
|
-
DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/runnable/"
|
73
|
-
DEFAULT_CONTAINER_DATA_PATH = "data/"
|
74
|
-
DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json"
|
75
64
|
|
76
65
|
# Default services
|
77
|
-
|
66
|
+
DEFAULT_PIPELINE_EXECUTOR = ServiceConfig(type="local", config={})
|
67
|
+
DEFAULT_JOB_EXECUTOR = ServiceConfig(type="local", config={})
|
78
68
|
DEFAULT_RUN_LOG_STORE = ServiceConfig(type="file-system", config={})
|
79
69
|
DEFAULT_CATALOG = ServiceConfig(type="file-system", config={})
|
80
70
|
DEFAULT_SECRETS = ServiceConfig(type="env-secrets", config={})
|
81
|
-
DEFAULT_EXPERIMENT_TRACKER = ServiceConfig(type="do-nothing", config={})
|
82
71
|
DEFAULT_PICKLER = ServiceConfig(type="pickle", config={})
|
83
72
|
|
84
73
|
# Map state
|
@@ -110,41 +99,10 @@ COMPUTE_DATA_FOLDER = "."
|
|
110
99
|
# Secrets settings
|
111
100
|
DOTENV_FILE_LOCATION = ".env"
|
112
101
|
|
113
|
-
|
114
|
-
# Docker settings
|
115
|
-
DOCKERFILE_NAME = "Dockerfile"
|
116
|
-
DOCKERFILE_CONTENT = r"""# Python 3.8 Image without Dependecies
|
117
|
-
FROM python:3.8
|
118
|
-
|
119
|
-
LABEL maintainer="mesanthu@gmail.com"
|
120
|
-
|
121
|
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
122
|
-
git \
|
123
|
-
&& rm -rf /var/lib/apt/lists/*
|
124
|
-
|
125
|
-
${INSTALL_STYLE}
|
126
|
-
|
127
|
-
ENV VIRTUAL_ENV=/opt/venv
|
128
|
-
RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV
|
129
|
-
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
130
|
-
|
131
|
-
${COPY_CONTENT}
|
132
|
-
WORKDIR /app
|
133
|
-
|
134
|
-
${INSTALL_REQUIREMENTS}
|
135
|
-
"""
|
136
|
-
GIT_ARCHIVE_NAME = "git_tracked"
|
137
102
|
LEN_SHA_FOR_TAG = 8
|
138
103
|
|
139
|
-
|
140
|
-
|
141
|
-
"""
|
142
|
-
The possible container entrypoint types.
|
143
|
-
"""
|
144
|
-
|
145
|
-
USER = "user"
|
146
|
-
SYSTEM = "system"
|
147
|
-
|
104
|
+
# JOB CONFIG
|
105
|
+
DEFAULT_JOB_NAME = "job"
|
148
106
|
|
149
107
|
## Logging settings
|
150
108
|
|