runnable 0.13.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- runnable/__init__.py +1 -12
- runnable/catalog.py +29 -5
- runnable/cli.py +268 -215
- runnable/context.py +10 -3
- runnable/datastore.py +212 -53
- runnable/defaults.py +13 -55
- runnable/entrypoints.py +270 -183
- runnable/exceptions.py +28 -2
- runnable/executor.py +133 -86
- runnable/graph.py +37 -13
- runnable/nodes.py +50 -22
- runnable/parameters.py +27 -8
- runnable/pickler.py +1 -1
- runnable/sdk.py +230 -66
- runnable/secrets.py +3 -1
- runnable/tasks.py +99 -41
- runnable/utils.py +59 -39
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/METADATA +28 -31
- runnable-0.16.0.dist-info/RECORD +23 -0
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/WHEEL +1 -1
- runnable-0.16.0.dist-info/entry_points.txt +45 -0
- runnable/extensions/__init__.py +0 -0
- runnable/extensions/catalog/__init__.py +0 -21
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +0 -234
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
- runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
- runnable/extensions/executor/__init__.py +0 -649
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +0 -1194
- runnable/extensions/executor/argo/specification.yaml +0 -51
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
- runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
- runnable/extensions/executor/local.py +0 -69
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +0 -446
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +0 -154
- runnable/extensions/executor/retry/__init__.py +0 -0
- runnable/extensions/executor/retry/implementation.py +0 -168
- runnable/extensions/nodes.py +0 -870
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
- runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +0 -140
- runnable/extensions/run_log_store/generic_chunked.py +0 -557
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +0 -100
- runnable/integration.py +0 -192
- runnable-0.13.0.dist-info/RECORD +0 -63
- runnable-0.13.0.dist-info/entry_points.txt +0 -41
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info/licenses}/LICENSE +0 -0
runnable/datastore.py
CHANGED
@@ -18,14 +18,13 @@ from typing import (
|
|
18
18
|
|
19
19
|
from pydantic import BaseModel, Field, computed_field
|
20
20
|
|
21
|
-
import runnable.context as context
|
22
21
|
from runnable import defaults, exceptions
|
23
22
|
|
24
23
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
25
24
|
|
26
25
|
|
27
26
|
JSONType = Union[
|
28
|
-
|
27
|
+
Union[None, bool, str, float, int, List[Any], Dict[str, Any]]
|
29
28
|
] # This is actually JSONType, but pydantic doesn't support TypeAlias yet
|
30
29
|
|
31
30
|
|
@@ -56,14 +55,11 @@ class DataCatalog(BaseModel, extra="allow"):
|
|
56
55
|
return other.name == self.name
|
57
56
|
|
58
57
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
Once the map state is complete, we can set the reduce to true and have the value as
|
65
|
-
the reduced value. Its either a list or a custom function return.
|
66
|
-
"""
|
58
|
+
# The theory behind reduced:
|
59
|
+
# parameters returned by steps in map node are only reduced by the end of the map step, fan-in.
|
60
|
+
# If they are accessed within the map step, the value should be the value returned by the step in the map step.
|
61
|
+
# Once the map state is complete, we can set the reduce to true and have the value as
|
62
|
+
# the reduced value. Its either a list or a custom function return.
|
67
63
|
|
68
64
|
|
69
65
|
class JsonParameter(BaseModel):
|
@@ -125,7 +121,9 @@ class ObjectParameter(BaseModel):
|
|
125
121
|
os.remove(self.file_name) # Remove after loading
|
126
122
|
|
127
123
|
|
128
|
-
Parameter = Annotated[
|
124
|
+
Parameter = Annotated[
|
125
|
+
Union[JsonParameter, ObjectParameter, MetricParameter], Field(discriminator="kind")
|
126
|
+
]
|
129
127
|
|
130
128
|
|
131
129
|
class StepAttempt(BaseModel):
|
@@ -157,8 +155,12 @@ class CodeIdentity(BaseModel, extra="allow"):
|
|
157
155
|
|
158
156
|
code_identifier: Optional[str] = "" # GIT sha code or docker image id
|
159
157
|
code_identifier_type: Optional[str] = "" # git or docker
|
160
|
-
code_identifier_dependable: Optional[bool] =
|
161
|
-
|
158
|
+
code_identifier_dependable: Optional[bool] = (
|
159
|
+
False # If git, checks if the tree is clean.
|
160
|
+
)
|
161
|
+
code_identifier_url: Optional[str] = (
|
162
|
+
"" # The git remote url or docker repository url
|
163
|
+
)
|
162
164
|
code_identifier_message: Optional[str] = "" # Any optional message
|
163
165
|
|
164
166
|
|
@@ -185,18 +187,28 @@ class StepLog(BaseModel):
|
|
185
187
|
summary: Dict[str, Any] = {}
|
186
188
|
|
187
189
|
summary["Name"] = self.internal_name
|
188
|
-
summary["Input catalog content"] = [
|
190
|
+
summary["Input catalog content"] = [
|
191
|
+
dc.name for dc in self.data_catalog if dc.stage == "get"
|
192
|
+
]
|
189
193
|
summary["Available parameters"] = [
|
190
|
-
(p, v.description)
|
194
|
+
(p, v.description)
|
195
|
+
for attempt in self.attempts
|
196
|
+
for p, v in attempt.input_parameters.items()
|
191
197
|
]
|
192
198
|
|
193
|
-
summary["Output catalog content"] = [
|
199
|
+
summary["Output catalog content"] = [
|
200
|
+
dc.name for dc in self.data_catalog if dc.stage == "put"
|
201
|
+
]
|
194
202
|
summary["Output parameters"] = [
|
195
|
-
(p, v.description)
|
203
|
+
(p, v.description)
|
204
|
+
for attempt in self.attempts
|
205
|
+
for p, v in attempt.output_parameters.items()
|
196
206
|
]
|
197
207
|
|
198
208
|
summary["Metrics"] = [
|
199
|
-
(p, v.description)
|
209
|
+
(p, v.description)
|
210
|
+
for attempt in self.attempts
|
211
|
+
for p, v in attempt.user_defined_metrics.items()
|
200
212
|
]
|
201
213
|
|
202
214
|
cis = []
|
@@ -288,6 +300,73 @@ class BranchLog(BaseModel):
|
|
288
300
|
StepLog.model_rebuild()
|
289
301
|
|
290
302
|
|
303
|
+
class JobLog(BaseModel):
|
304
|
+
"""
|
305
|
+
The data class capturing the data of a job
|
306
|
+
This should be treated as a step log
|
307
|
+
"""
|
308
|
+
|
309
|
+
status: str = defaults.FAIL
|
310
|
+
message: str = ""
|
311
|
+
mock: bool = False
|
312
|
+
code_identities: List[CodeIdentity] = Field(default_factory=list)
|
313
|
+
attempts: List[StepAttempt] = Field(default_factory=list)
|
314
|
+
data_catalog: List[DataCatalog] = Field(default_factory=list)
|
315
|
+
|
316
|
+
def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
|
317
|
+
"""
|
318
|
+
Add the data catalogs as asked by the user
|
319
|
+
|
320
|
+
Args:
|
321
|
+
dict_catalogs ([DataCatalog]): A list of data catalog items
|
322
|
+
"""
|
323
|
+
|
324
|
+
if not self.data_catalog:
|
325
|
+
self.data_catalog = []
|
326
|
+
for data_catalog in data_catalogs:
|
327
|
+
self.data_catalog.append(data_catalog)
|
328
|
+
|
329
|
+
def get_summary(self) -> Dict[str, Any]:
|
330
|
+
"""
|
331
|
+
Summarize the step log to log
|
332
|
+
"""
|
333
|
+
summary: Dict[str, Any] = {}
|
334
|
+
|
335
|
+
summary["Available parameters"] = [
|
336
|
+
(p, v.description)
|
337
|
+
for attempt in self.attempts
|
338
|
+
for p, v in attempt.input_parameters.items()
|
339
|
+
]
|
340
|
+
|
341
|
+
summary["Output catalog content"] = [
|
342
|
+
dc.name for dc in self.data_catalog if dc.stage == "put"
|
343
|
+
]
|
344
|
+
summary["Output parameters"] = [
|
345
|
+
(p, v.description)
|
346
|
+
for attempt in self.attempts
|
347
|
+
for p, v in attempt.output_parameters.items()
|
348
|
+
]
|
349
|
+
|
350
|
+
summary["Metrics"] = [
|
351
|
+
(p, v.description)
|
352
|
+
for attempt in self.attempts
|
353
|
+
for p, v in attempt.user_defined_metrics.items()
|
354
|
+
]
|
355
|
+
|
356
|
+
cis = []
|
357
|
+
for ci in self.code_identities:
|
358
|
+
message = f"{ci.code_identifier_type}:{ci.code_identifier}"
|
359
|
+
if not ci.code_identifier_dependable:
|
360
|
+
message += " but is not dependable"
|
361
|
+
cis.append(message)
|
362
|
+
|
363
|
+
summary["Code identities"] = cis
|
364
|
+
|
365
|
+
summary["status"] = self.status
|
366
|
+
|
367
|
+
return summary
|
368
|
+
|
369
|
+
|
291
370
|
class RunLog(BaseModel):
|
292
371
|
"""
|
293
372
|
The data captured as part of Run Log
|
@@ -298,6 +377,7 @@ class RunLog(BaseModel):
|
|
298
377
|
tag: Optional[str] = ""
|
299
378
|
status: str = defaults.FAIL
|
300
379
|
steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
|
380
|
+
job: Optional[JobLog] = None
|
301
381
|
parameters: Dict[str, Parameter] = Field(default_factory=dict)
|
302
382
|
run_config: Dict[str, Any] = Field(default_factory=dict)
|
303
383
|
|
@@ -312,10 +392,18 @@ class RunLog(BaseModel):
|
|
312
392
|
summary["Catalog Location"] = _context.catalog_handler.get_summary()
|
313
393
|
summary["Full Run log present at: "] = _context.run_log_store.get_summary()
|
314
394
|
|
315
|
-
run_log = _context.run_log_store.get_run_log_by_id(
|
395
|
+
run_log = _context.run_log_store.get_run_log_by_id(
|
396
|
+
run_id=_context.run_id, full=True
|
397
|
+
)
|
316
398
|
|
317
|
-
summary["Final Parameters"] = {
|
318
|
-
|
399
|
+
summary["Final Parameters"] = {
|
400
|
+
p: v.description for p, v in run_log.parameters.items()
|
401
|
+
}
|
402
|
+
summary["Collected metrics"] = {
|
403
|
+
p: v.description
|
404
|
+
for p, v in run_log.parameters.items()
|
405
|
+
if v.kind == "metric"
|
406
|
+
}
|
319
407
|
|
320
408
|
return summary
|
321
409
|
|
@@ -338,7 +426,9 @@ class RunLog(BaseModel):
|
|
338
426
|
|
339
427
|
return list(set(data_catalogs))
|
340
428
|
|
341
|
-
def search_branch_by_internal_name(
|
429
|
+
def search_branch_by_internal_name(
|
430
|
+
self, i_name: str
|
431
|
+
) -> Tuple[Union[BranchLog, RunLog], Union[StepLog, None]]:
|
342
432
|
"""
|
343
433
|
Given a branch internal name, search for it in the run log.
|
344
434
|
|
@@ -385,7 +475,9 @@ class RunLog(BaseModel):
|
|
385
475
|
|
386
476
|
raise exceptions.BranchLogNotFoundError(self.run_id, i_name)
|
387
477
|
|
388
|
-
def search_step_by_internal_name(
|
478
|
+
def search_step_by_internal_name(
|
479
|
+
self, i_name: str
|
480
|
+
) -> Tuple[StepLog, Union[BranchLog, None]]:
|
389
481
|
"""
|
390
482
|
Given a steps internal name, search for the step name.
|
391
483
|
|
@@ -415,7 +507,9 @@ class RunLog(BaseModel):
|
|
415
507
|
# Its odd, so we are in brach name
|
416
508
|
current_branch = current_step.branches[".".join(dot_path[: i + 1])] # type: ignore
|
417
509
|
current_steps = current_branch.steps
|
418
|
-
logger.debug(
|
510
|
+
logger.debug(
|
511
|
+
f"Finding step log for {i_name} in branch: {current_branch}"
|
512
|
+
)
|
419
513
|
else:
|
420
514
|
# Its even, so we are in step, we start here!
|
421
515
|
current_step = current_steps[".".join(dot_path[: i + 1])]
|
@@ -428,10 +522,6 @@ class RunLog(BaseModel):
|
|
428
522
|
raise exceptions.StepLogNotFoundError(self.run_id, i_name)
|
429
523
|
|
430
524
|
|
431
|
-
# All outside modules should interact with dataclasses using the RunLogStore to promote extensibility
|
432
|
-
# If you want to customize dataclass, extend BaseRunLogStore and implement the methods as per the specification
|
433
|
-
|
434
|
-
|
435
525
|
class BaseRunLogStore(ABC, BaseModel):
|
436
526
|
"""
|
437
527
|
The base class of a Run Log Store with many common methods implemented.
|
@@ -441,13 +531,29 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
441
531
|
service_type: str = "run_log_store"
|
442
532
|
|
443
533
|
@abstractmethod
|
444
|
-
def get_summary(self) -> Dict[str, Any]:
|
445
|
-
...
|
534
|
+
def get_summary(self) -> Dict[str, Any]: ...
|
446
535
|
|
447
536
|
@property
|
448
537
|
def _context(self):
|
449
538
|
return context.run_context
|
450
539
|
|
540
|
+
"""
|
541
|
+
Retrieves a Job log from the database using the config and the job_id
|
542
|
+
|
543
|
+
Args:
|
544
|
+
job_id (str): The job_id of the job
|
545
|
+
|
546
|
+
Returns:
|
547
|
+
JobLog: The JobLog object identified by the job_id
|
548
|
+
|
549
|
+
Logically the method should:
|
550
|
+
* Returns the job_log defined by id from the data store defined by the config
|
551
|
+
|
552
|
+
Raises:
|
553
|
+
NotImplementedError: This is a base class and therefore has no default implementation
|
554
|
+
JobLogNotFoundError: If the job log for job_id is not found in the datastore
|
555
|
+
"""
|
556
|
+
|
451
557
|
@abstractmethod
|
452
558
|
def create_run_log(
|
453
559
|
self,
|
@@ -457,7 +563,6 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
457
563
|
tag: str = "",
|
458
564
|
original_run_id: str = "",
|
459
565
|
status: str = defaults.CREATED,
|
460
|
-
**kwargs,
|
461
566
|
):
|
462
567
|
"""
|
463
568
|
Creates a Run Log object by using the config
|
@@ -473,7 +578,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
473
578
|
raise NotImplementedError
|
474
579
|
|
475
580
|
@abstractmethod
|
476
|
-
def get_run_log_by_id(self, run_id: str, full: bool = False
|
581
|
+
def get_run_log_by_id(self, run_id: str, full: bool = False) -> RunLog:
|
477
582
|
"""
|
478
583
|
Retrieves a Run log from the database using the config and the run_id
|
479
584
|
|
@@ -495,7 +600,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
495
600
|
raise NotImplementedError
|
496
601
|
|
497
602
|
@abstractmethod
|
498
|
-
def put_run_log(self, run_log: RunLog
|
603
|
+
def put_run_log(self, run_log: RunLog):
|
499
604
|
"""
|
500
605
|
Puts the Run Log in the database as defined by the config
|
501
606
|
|
@@ -523,7 +628,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
523
628
|
run_log.status = status
|
524
629
|
self.put_run_log(run_log)
|
525
630
|
|
526
|
-
def get_parameters(self, run_id: str
|
631
|
+
def get_parameters(self, run_id: str) -> Dict[str, Parameter]:
|
527
632
|
"""
|
528
633
|
Get the parameters from the Run log defined by the run_id
|
529
634
|
|
@@ -542,7 +647,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
542
647
|
run_log = self.get_run_log_by_id(run_id=run_id)
|
543
648
|
return run_log.parameters
|
544
649
|
|
545
|
-
def set_parameters(self, run_id: str, parameters: Dict[str, Parameter]
|
650
|
+
def set_parameters(self, run_id: str, parameters: Dict[str, Parameter]):
|
546
651
|
"""
|
547
652
|
Update the parameters of the Run log with the new parameters
|
548
653
|
|
@@ -563,7 +668,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
563
668
|
run_log.parameters.update(parameters)
|
564
669
|
self.put_run_log(run_log=run_log)
|
565
670
|
|
566
|
-
def get_run_config(self, run_id: str
|
671
|
+
def get_run_config(self, run_id: str) -> dict:
|
567
672
|
"""
|
568
673
|
Given a run_id, return the run_config used to perform the run.
|
569
674
|
|
@@ -577,7 +682,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
577
682
|
run_log = self.get_run_log_by_id(run_id=run_id)
|
578
683
|
return run_log.run_config
|
579
684
|
|
580
|
-
def set_run_config(self, run_id: str, run_config: dict
|
685
|
+
def set_run_config(self, run_id: str, run_config: dict):
|
581
686
|
"""Set the run config used to run the run_id
|
582
687
|
|
583
688
|
Args:
|
@@ -589,7 +694,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
589
694
|
run_log.run_config.update(run_config)
|
590
695
|
self.put_run_log(run_log=run_log)
|
591
696
|
|
592
|
-
def create_step_log(self, name: str, internal_name: str
|
697
|
+
def create_step_log(self, name: str, internal_name: str):
|
593
698
|
"""
|
594
699
|
Create a step log by the name and internal name
|
595
700
|
|
@@ -607,7 +712,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
607
712
|
logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
|
608
713
|
return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
|
609
714
|
|
610
|
-
def get_step_log(self, internal_name: str, run_id: str
|
715
|
+
def get_step_log(self, internal_name: str, run_id: str) -> StepLog:
|
611
716
|
"""
|
612
717
|
Get a step log from the datastore for run_id and the internal naming of the step log
|
613
718
|
|
@@ -629,12 +734,14 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
629
734
|
RunLogNotFoundError: If the run log for run_id is not found in the datastore
|
630
735
|
StepLogNotFoundError: If the step log for internal_name is not found in the datastore for run_id
|
631
736
|
"""
|
632
|
-
logger.info(
|
737
|
+
logger.info(
|
738
|
+
f"{self.service_name} Getting the step log: {internal_name} of {run_id}"
|
739
|
+
)
|
633
740
|
run_log = self.get_run_log_by_id(run_id=run_id)
|
634
741
|
step_log, _ = run_log.search_step_by_internal_name(internal_name)
|
635
742
|
return step_log
|
636
743
|
|
637
|
-
def add_step_log(self, step_log: StepLog, run_id: str
|
744
|
+
def add_step_log(self, step_log: StepLog, run_id: str):
|
638
745
|
"""
|
639
746
|
Add the step log in the run log as identified by the run_id in the datastore
|
640
747
|
|
@@ -664,7 +771,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
664
771
|
branch.steps[step_log.internal_name] = step_log
|
665
772
|
self.put_run_log(run_log=run_log)
|
666
773
|
|
667
|
-
def create_branch_log(self, internal_branch_name: str
|
774
|
+
def create_branch_log(self, internal_branch_name: str) -> BranchLog:
|
668
775
|
"""
|
669
776
|
Creates a uncommitted branch log object by the internal name given
|
670
777
|
|
@@ -675,10 +782,14 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
675
782
|
BranchLog: Uncommitted and initialized with defaults BranchLog object
|
676
783
|
"""
|
677
784
|
# Create a new BranchLog
|
678
|
-
logger.info(
|
785
|
+
logger.info(
|
786
|
+
f"{self.service_name} Creating a Branch Log : {internal_branch_name}"
|
787
|
+
)
|
679
788
|
return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
|
680
789
|
|
681
|
-
def get_branch_log(
|
790
|
+
def get_branch_log(
|
791
|
+
self, internal_branch_name: str, run_id: str
|
792
|
+
) -> Union[BranchLog, RunLog]:
|
682
793
|
"""
|
683
794
|
Returns the branch log by the internal branch name for the run id
|
684
795
|
|
@@ -697,7 +808,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
697
808
|
branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
|
698
809
|
return branch
|
699
810
|
|
700
|
-
def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str
|
811
|
+
def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str):
|
701
812
|
"""
|
702
813
|
The method should:
|
703
814
|
# Get the run log
|
@@ -729,8 +840,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
729
840
|
step.branches[internal_branch_name] = branch_log # type: ignore
|
730
841
|
self.put_run_log(run_log)
|
731
842
|
|
732
|
-
|
733
|
-
def create_code_identity(self, **kwargs) -> CodeIdentity:
|
843
|
+
def create_code_identity(self) -> CodeIdentity:
|
734
844
|
"""
|
735
845
|
Creates an uncommitted Code identity class
|
736
846
|
|
@@ -740,7 +850,7 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
740
850
|
logger.info(f"{self.service_name} Creating Code identity")
|
741
851
|
return CodeIdentity()
|
742
852
|
|
743
|
-
def create_data_catalog(self, name: str
|
853
|
+
def create_data_catalog(self, name: str) -> DataCatalog:
|
744
854
|
"""
|
745
855
|
Create a uncommitted data catalog object
|
746
856
|
|
@@ -753,6 +863,45 @@ class BaseRunLogStore(ABC, BaseModel):
|
|
753
863
|
logger.info(f"{self.service_name} Creating Data Catalog for {name}")
|
754
864
|
return DataCatalog(name=name)
|
755
865
|
|
866
|
+
def create_job_log(self) -> JobLog:
|
867
|
+
"""
|
868
|
+
Creates a Job log and adds it to the db
|
869
|
+
|
870
|
+
Refer to BaseRunLogStore.create_job_log
|
871
|
+
"""
|
872
|
+
logger.info(f"{self.service_name} Creating a Job Log and adding it to DB")
|
873
|
+
return JobLog(status=defaults.CREATED)
|
874
|
+
|
875
|
+
def get_job_log(self, run_id: str) -> JobLog:
|
876
|
+
"""
|
877
|
+
Returns the run_log defined by id
|
878
|
+
|
879
|
+
Raises Exception if not found
|
880
|
+
"""
|
881
|
+
logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
|
882
|
+
run_log = self.get_run_log_by_id(run_id)
|
883
|
+
|
884
|
+
try:
|
885
|
+
assert run_log.job
|
886
|
+
except AssertionError as exc:
|
887
|
+
raise exceptions.JobLogNotFoundError(run_id) from exc
|
888
|
+
|
889
|
+
return run_log.job
|
890
|
+
|
891
|
+
def add_job_log(self, run_id: str, job_log: JobLog):
|
892
|
+
"""
|
893
|
+
Adds the job log to the run log
|
894
|
+
|
895
|
+
Args:
|
896
|
+
run_id (str): The run_id of the run
|
897
|
+
job_log (JobLog): The job log to add to the run log
|
898
|
+
"""
|
899
|
+
logger.info(f"{self.service_name} Adding the job log to DB for: {run_id}")
|
900
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
901
|
+
run_log.job = job_log
|
902
|
+
run_log.status = job_log.status
|
903
|
+
self.put_run_log(run_log=run_log)
|
904
|
+
|
756
905
|
|
757
906
|
class BufferRunLogstore(BaseRunLogStore):
|
758
907
|
"""
|
@@ -775,7 +924,13 @@ class BufferRunLogstore(BaseRunLogStore):
|
|
775
924
|
"""
|
776
925
|
|
777
926
|
service_name: str = "buffered"
|
778
|
-
|
927
|
+
|
928
|
+
run_log: Optional[RunLog] = Field(
|
929
|
+
default=None, exclude=True
|
930
|
+
) # For a buffered Run Log, this is the database
|
931
|
+
job_log: Optional[JobLog] = Field(
|
932
|
+
default=None, exclude=True
|
933
|
+
) # For a buffered Run Log, this is the database
|
779
934
|
|
780
935
|
def get_summary(self) -> Dict[str, Any]:
|
781
936
|
summary = {"Type": self.service_name, "Location": "Not persisted"}
|
@@ -790,7 +945,6 @@ class BufferRunLogstore(BaseRunLogStore):
|
|
790
945
|
tag: str = "",
|
791
946
|
original_run_id: str = "",
|
792
947
|
status: str = defaults.CREATED,
|
793
|
-
**kwargs,
|
794
948
|
) -> RunLog:
|
795
949
|
"""
|
796
950
|
# Creates a Run log
|
@@ -809,7 +963,7 @@ class BufferRunLogstore(BaseRunLogStore):
|
|
809
963
|
)
|
810
964
|
return self.run_log
|
811
965
|
|
812
|
-
def get_run_log_by_id(self, run_id: str, full: bool = False
|
966
|
+
def get_run_log_by_id(self, run_id: str, full: bool = False):
|
813
967
|
"""
|
814
968
|
# Returns the run_log defined by id
|
815
969
|
# Raises Exception if not found
|
@@ -821,10 +975,15 @@ class BufferRunLogstore(BaseRunLogStore):
|
|
821
975
|
|
822
976
|
raise exceptions.RunLogNotFoundError(run_id)
|
823
977
|
|
824
|
-
def put_run_log(self, run_log: RunLog
|
978
|
+
def put_run_log(self, run_log: RunLog):
|
825
979
|
"""
|
826
980
|
# Puts the run log in the db
|
827
981
|
# Raises Exception if not found
|
828
982
|
"""
|
829
|
-
logger.info(
|
983
|
+
logger.info(
|
984
|
+
f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
|
985
|
+
)
|
830
986
|
self.run_log = run_log
|
987
|
+
|
988
|
+
|
989
|
+
import runnable.context as context # noqa: F401, E402
|
runnable/defaults.py
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
-
from
|
2
|
-
|
3
|
-
|
1
|
+
from typing import (
|
2
|
+
Any,
|
3
|
+
Dict,
|
4
|
+
Mapping,
|
5
|
+
Optional,
|
6
|
+
TypedDict, # type: ignore[unused-ignore]
|
7
|
+
Union,
|
8
|
+
)
|
4
9
|
|
5
10
|
from rich.style import Style
|
6
11
|
from typing_extensions import TypeAlias
|
@@ -12,16 +17,6 @@ LOGGER_NAME = "runnable"
|
|
12
17
|
LOG_LEVEL = "WARNING"
|
13
18
|
|
14
19
|
|
15
|
-
class EXECUTION_PLAN(Enum):
|
16
|
-
"""
|
17
|
-
The possible execution plans for a runnable job.
|
18
|
-
"""
|
19
|
-
|
20
|
-
CHAINED = "chained" # 121 relationship between run log and the dag.
|
21
|
-
UNCHAINED = "unchained" # Only captures execution of steps, no relation.
|
22
|
-
INTERACTIVE = "interactive" # used for interactive sessions
|
23
|
-
|
24
|
-
|
25
20
|
# Type definitions
|
26
21
|
class ServiceConfig(TypedDict):
|
27
22
|
type: str
|
@@ -32,8 +27,7 @@ class RunnableConfig(TypedDict, total=False):
|
|
32
27
|
run_log_store: Optional[ServiceConfig]
|
33
28
|
secrets: Optional[ServiceConfig]
|
34
29
|
catalog: Optional[ServiceConfig]
|
35
|
-
|
36
|
-
experiment_tracker: Optional[ServiceConfig]
|
30
|
+
pipeline_executor: Optional[ServiceConfig]
|
37
31
|
pickler: Optional[ServiceConfig]
|
38
32
|
|
39
33
|
|
@@ -46,7 +40,6 @@ RUNNABLE_RUN_TAG = "RUNNABLE_RUN_TAG"
|
|
46
40
|
|
47
41
|
# Interaction settings
|
48
42
|
TRACK_PREFIX = "RUNNABLE_TRACK_"
|
49
|
-
STEP_INDICATOR = "_STEP_"
|
50
43
|
PARAMETER_PREFIX = "RUNNABLE_PRM_"
|
51
44
|
MAP_VARIABLE = "RUNNABLE_MAP_VARIABLE"
|
52
45
|
VARIABLE_PREFIX = "RUNNABLE_VAR_"
|
@@ -67,18 +60,14 @@ TRIGGERED = "TRIGGERED"
|
|
67
60
|
|
68
61
|
# Node and Command settings
|
69
62
|
COMMAND_TYPE = "python"
|
70
|
-
NODE_SPEC_FILE = "node_spec.yaml"
|
71
63
|
COMMAND_FRIENDLY_CHARACTER = "%"
|
72
|
-
DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/runnable/"
|
73
|
-
DEFAULT_CONTAINER_DATA_PATH = "data/"
|
74
|
-
DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json"
|
75
64
|
|
76
65
|
# Default services
|
77
|
-
|
66
|
+
DEFAULT_PIPELINE_EXECUTOR = ServiceConfig(type="local", config={})
|
67
|
+
DEFAULT_JOB_EXECUTOR = ServiceConfig(type="local", config={})
|
78
68
|
DEFAULT_RUN_LOG_STORE = ServiceConfig(type="file-system", config={})
|
79
69
|
DEFAULT_CATALOG = ServiceConfig(type="file-system", config={})
|
80
70
|
DEFAULT_SECRETS = ServiceConfig(type="env-secrets", config={})
|
81
|
-
DEFAULT_EXPERIMENT_TRACKER = ServiceConfig(type="do-nothing", config={})
|
82
71
|
DEFAULT_PICKLER = ServiceConfig(type="pickle", config={})
|
83
72
|
|
84
73
|
# Map state
|
@@ -110,41 +99,10 @@ COMPUTE_DATA_FOLDER = "."
|
|
110
99
|
# Secrets settings
|
111
100
|
DOTENV_FILE_LOCATION = ".env"
|
112
101
|
|
113
|
-
|
114
|
-
# Docker settings
|
115
|
-
DOCKERFILE_NAME = "Dockerfile"
|
116
|
-
DOCKERFILE_CONTENT = r"""# Python 3.8 Image without Dependecies
|
117
|
-
FROM python:3.8
|
118
|
-
|
119
|
-
LABEL maintainer="mesanthu@gmail.com"
|
120
|
-
|
121
|
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
122
|
-
git \
|
123
|
-
&& rm -rf /var/lib/apt/lists/*
|
124
|
-
|
125
|
-
${INSTALL_STYLE}
|
126
|
-
|
127
|
-
ENV VIRTUAL_ENV=/opt/venv
|
128
|
-
RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV
|
129
|
-
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
130
|
-
|
131
|
-
${COPY_CONTENT}
|
132
|
-
WORKDIR /app
|
133
|
-
|
134
|
-
${INSTALL_REQUIREMENTS}
|
135
|
-
"""
|
136
|
-
GIT_ARCHIVE_NAME = "git_tracked"
|
137
102
|
LEN_SHA_FOR_TAG = 8
|
138
103
|
|
139
|
-
|
140
|
-
|
141
|
-
"""
|
142
|
-
The possible container entrypoint types.
|
143
|
-
"""
|
144
|
-
|
145
|
-
USER = "user"
|
146
|
-
SYSTEM = "system"
|
147
|
-
|
104
|
+
# JOB CONFIG
|
105
|
+
DEFAULT_JOB_NAME = "job"
|
148
106
|
|
149
107
|
## Logging settings
|
150
108
|
|