runnable 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
runnable/context.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict, Optional
1
+ from typing import Dict, List, Optional
2
2
 
3
3
  from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
4
4
  from rich.progress import Progress
@@ -9,6 +9,7 @@ from runnable.executor import BaseExecutor
9
9
  from runnable.graph import Graph
10
10
  from runnable.pickler import BasePickler
11
11
  from runnable.secrets import BaseSecrets
12
+ from runnable.tasks import BaseTaskType
12
13
 
13
14
 
14
15
  class Context(BaseModel):
@@ -22,15 +23,21 @@ class Context(BaseModel):
22
23
  model_config = ConfigDict(arbitrary_types_allowed=True)
23
24
 
24
25
  pipeline_file: Optional[str] = ""
26
+ job_definition_file: Optional[str] = ""
25
27
  parameters_file: Optional[str] = ""
26
28
  configuration_file: Optional[str] = ""
29
+ from_sdk: bool = False
27
30
 
28
- tag: str = ""
29
31
  run_id: str = ""
32
+
33
+ tag: str = ""
30
34
  variables: Dict[str, str] = {}
35
+
31
36
  dag: Optional[Graph] = None
32
37
  dag_hash: str = ""
33
- execution_plan: str = ""
38
+
39
+ job: Optional[BaseTaskType] = None
40
+ job_catalog_settings: Optional[List[str]] = []
34
41
 
35
42
 
36
43
  run_context = None # type: Context # type: ignore
runnable/datastore.py CHANGED
@@ -24,7 +24,7 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
24
24
 
25
25
 
26
26
  JSONType = Union[
27
- str, int, float, bool, List[Any], Dict[str, Any]
27
+ Union[None, bool, str, float, int, List[Any], Dict[str, Any]]
28
28
  ] # This is actually JSONType, but pydantic doesn't support TypeAlias yet
29
29
 
30
30
 
@@ -300,6 +300,73 @@ class BranchLog(BaseModel):
300
300
  StepLog.model_rebuild()
301
301
 
302
302
 
303
+ class JobLog(BaseModel):
304
+ """
305
+ The data class capturing the data of a job
306
+ This should be treated as a step log
307
+ """
308
+
309
+ status: str = defaults.FAIL
310
+ message: str = ""
311
+ mock: bool = False
312
+ code_identities: List[CodeIdentity] = Field(default_factory=list)
313
+ attempts: List[StepAttempt] = Field(default_factory=list)
314
+ data_catalog: List[DataCatalog] = Field(default_factory=list)
315
+
316
+ def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
317
+ """
318
+ Add the data catalogs as asked by the user
319
+
320
+ Args:
321
+ dict_catalogs ([DataCatalog]): A list of data catalog items
322
+ """
323
+
324
+ if not self.data_catalog:
325
+ self.data_catalog = []
326
+ for data_catalog in data_catalogs:
327
+ self.data_catalog.append(data_catalog)
328
+
329
+ def get_summary(self) -> Dict[str, Any]:
330
+ """
331
+ Summarize the step log to log
332
+ """
333
+ summary: Dict[str, Any] = {}
334
+
335
+ summary["Available parameters"] = [
336
+ (p, v.description)
337
+ for attempt in self.attempts
338
+ for p, v in attempt.input_parameters.items()
339
+ ]
340
+
341
+ summary["Output catalog content"] = [
342
+ dc.name for dc in self.data_catalog if dc.stage == "put"
343
+ ]
344
+ summary["Output parameters"] = [
345
+ (p, v.description)
346
+ for attempt in self.attempts
347
+ for p, v in attempt.output_parameters.items()
348
+ ]
349
+
350
+ summary["Metrics"] = [
351
+ (p, v.description)
352
+ for attempt in self.attempts
353
+ for p, v in attempt.user_defined_metrics.items()
354
+ ]
355
+
356
+ cis = []
357
+ for ci in self.code_identities:
358
+ message = f"{ci.code_identifier_type}:{ci.code_identifier}"
359
+ if not ci.code_identifier_dependable:
360
+ message += " but is not dependable"
361
+ cis.append(message)
362
+
363
+ summary["Code identities"] = cis
364
+
365
+ summary["status"] = self.status
366
+
367
+ return summary
368
+
369
+
303
370
  class RunLog(BaseModel):
304
371
  """
305
372
  The data captured as part of Run Log
@@ -310,6 +377,7 @@ class RunLog(BaseModel):
310
377
  tag: Optional[str] = ""
311
378
  status: str = defaults.FAIL
312
379
  steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
380
+ job: Optional[JobLog] = None
313
381
  parameters: Dict[str, Parameter] = Field(default_factory=dict)
314
382
  run_config: Dict[str, Any] = Field(default_factory=dict)
315
383
 
@@ -469,6 +537,23 @@ class BaseRunLogStore(ABC, BaseModel):
469
537
  def _context(self):
470
538
  return context.run_context
471
539
 
540
+ """
541
+ Retrieves a Job log from the database using the config and the job_id
542
+
543
+ Args:
544
+ job_id (str): The job_id of the job
545
+
546
+ Returns:
547
+ JobLog: The JobLog object identified by the job_id
548
+
549
+ Logically the method should:
550
+ * Returns the job_log defined by id from the data store defined by the config
551
+
552
+ Raises:
553
+ NotImplementedError: This is a base class and therefore has no default implementation
554
+ JobLogNotFoundError: If the job log for job_id is not found in the datastore
555
+ """
556
+
472
557
  @abstractmethod
473
558
  def create_run_log(
474
559
  self,
@@ -478,7 +563,6 @@ class BaseRunLogStore(ABC, BaseModel):
478
563
  tag: str = "",
479
564
  original_run_id: str = "",
480
565
  status: str = defaults.CREATED,
481
- **kwargs,
482
566
  ):
483
567
  """
484
568
  Creates a Run Log object by using the config
@@ -494,7 +578,7 @@ class BaseRunLogStore(ABC, BaseModel):
494
578
  raise NotImplementedError
495
579
 
496
580
  @abstractmethod
497
- def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog:
581
+ def get_run_log_by_id(self, run_id: str, full: bool = False) -> RunLog:
498
582
  """
499
583
  Retrieves a Run log from the database using the config and the run_id
500
584
 
@@ -516,7 +600,7 @@ class BaseRunLogStore(ABC, BaseModel):
516
600
  raise NotImplementedError
517
601
 
518
602
  @abstractmethod
519
- def put_run_log(self, run_log: RunLog, **kwargs):
603
+ def put_run_log(self, run_log: RunLog):
520
604
  """
521
605
  Puts the Run Log in the database as defined by the config
522
606
 
@@ -544,7 +628,7 @@ class BaseRunLogStore(ABC, BaseModel):
544
628
  run_log.status = status
545
629
  self.put_run_log(run_log)
546
630
 
547
- def get_parameters(self, run_id: str, **kwargs) -> Dict[str, Parameter]:
631
+ def get_parameters(self, run_id: str) -> Dict[str, Parameter]:
548
632
  """
549
633
  Get the parameters from the Run log defined by the run_id
550
634
 
@@ -563,7 +647,7 @@ class BaseRunLogStore(ABC, BaseModel):
563
647
  run_log = self.get_run_log_by_id(run_id=run_id)
564
648
  return run_log.parameters
565
649
 
566
- def set_parameters(self, run_id: str, parameters: Dict[str, Parameter], **kwargs):
650
+ def set_parameters(self, run_id: str, parameters: Dict[str, Parameter]):
567
651
  """
568
652
  Update the parameters of the Run log with the new parameters
569
653
 
@@ -584,7 +668,7 @@ class BaseRunLogStore(ABC, BaseModel):
584
668
  run_log.parameters.update(parameters)
585
669
  self.put_run_log(run_log=run_log)
586
670
 
587
- def get_run_config(self, run_id: str, **kwargs) -> dict:
671
+ def get_run_config(self, run_id: str) -> dict:
588
672
  """
589
673
  Given a run_id, return the run_config used to perform the run.
590
674
 
@@ -598,7 +682,7 @@ class BaseRunLogStore(ABC, BaseModel):
598
682
  run_log = self.get_run_log_by_id(run_id=run_id)
599
683
  return run_log.run_config
600
684
 
601
- def set_run_config(self, run_id: str, run_config: dict, **kwargs):
685
+ def set_run_config(self, run_id: str, run_config: dict):
602
686
  """Set the run config used to run the run_id
603
687
 
604
688
  Args:
@@ -610,7 +694,7 @@ class BaseRunLogStore(ABC, BaseModel):
610
694
  run_log.run_config.update(run_config)
611
695
  self.put_run_log(run_log=run_log)
612
696
 
613
- def create_step_log(self, name: str, internal_name: str, **kwargs):
697
+ def create_step_log(self, name: str, internal_name: str):
614
698
  """
615
699
  Create a step log by the name and internal name
616
700
 
@@ -628,7 +712,7 @@ class BaseRunLogStore(ABC, BaseModel):
628
712
  logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
629
713
  return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
630
714
 
631
- def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog:
715
+ def get_step_log(self, internal_name: str, run_id: str) -> StepLog:
632
716
  """
633
717
  Get a step log from the datastore for run_id and the internal naming of the step log
634
718
 
@@ -657,7 +741,7 @@ class BaseRunLogStore(ABC, BaseModel):
657
741
  step_log, _ = run_log.search_step_by_internal_name(internal_name)
658
742
  return step_log
659
743
 
660
- def add_step_log(self, step_log: StepLog, run_id: str, **kwargs):
744
+ def add_step_log(self, step_log: StepLog, run_id: str):
661
745
  """
662
746
  Add the step log in the run log as identified by the run_id in the datastore
663
747
 
@@ -687,7 +771,7 @@ class BaseRunLogStore(ABC, BaseModel):
687
771
  branch.steps[step_log.internal_name] = step_log
688
772
  self.put_run_log(run_log=run_log)
689
773
 
690
- def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog:
774
+ def create_branch_log(self, internal_branch_name: str) -> BranchLog:
691
775
  """
692
776
  Creates a uncommitted branch log object by the internal name given
693
777
 
@@ -704,7 +788,7 @@ class BaseRunLogStore(ABC, BaseModel):
704
788
  return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
705
789
 
706
790
  def get_branch_log(
707
- self, internal_branch_name: str, run_id: str, **kwargs
791
+ self, internal_branch_name: str, run_id: str
708
792
  ) -> Union[BranchLog, RunLog]:
709
793
  """
710
794
  Returns the branch log by the internal branch name for the run id
@@ -724,9 +808,7 @@ class BaseRunLogStore(ABC, BaseModel):
724
808
  branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
725
809
  return branch
726
810
 
727
- def add_branch_log(
728
- self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs
729
- ):
811
+ def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str):
730
812
  """
731
813
  The method should:
732
814
  # Get the run log
@@ -758,8 +840,7 @@ class BaseRunLogStore(ABC, BaseModel):
758
840
  step.branches[internal_branch_name] = branch_log # type: ignore
759
841
  self.put_run_log(run_log)
760
842
 
761
- #
762
- def create_code_identity(self, **kwargs) -> CodeIdentity:
843
+ def create_code_identity(self) -> CodeIdentity:
763
844
  """
764
845
  Creates an uncommitted Code identity class
765
846
 
@@ -769,7 +850,7 @@ class BaseRunLogStore(ABC, BaseModel):
769
850
  logger.info(f"{self.service_name} Creating Code identity")
770
851
  return CodeIdentity()
771
852
 
772
- def create_data_catalog(self, name: str, **kwargs) -> DataCatalog:
853
+ def create_data_catalog(self, name: str) -> DataCatalog:
773
854
  """
774
855
  Create a uncommitted data catalog object
775
856
 
@@ -782,6 +863,45 @@ class BaseRunLogStore(ABC, BaseModel):
782
863
  logger.info(f"{self.service_name} Creating Data Catalog for {name}")
783
864
  return DataCatalog(name=name)
784
865
 
866
+ def create_job_log(self) -> JobLog:
867
+ """
868
+ Creates a Job log and adds it to the db
869
+
870
+ Refer to BaseRunLogStore.create_job_log
871
+ """
872
+ logger.info(f"{self.service_name} Creating a Job Log and adding it to DB")
873
+ return JobLog(status=defaults.CREATED)
874
+
875
+ def get_job_log(self, run_id: str) -> JobLog:
876
+ """
877
+ Returns the run_log defined by id
878
+
879
+ Raises Exception if not found
880
+ """
881
+ logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
882
+ run_log = self.get_run_log_by_id(run_id)
883
+
884
+ try:
885
+ assert run_log.job
886
+ except AssertionError as exc:
887
+ raise exceptions.JobLogNotFoundError(run_id) from exc
888
+
889
+ return run_log.job
890
+
891
+ def add_job_log(self, run_id: str, job_log: JobLog):
892
+ """
893
+ Adds the job log to the run log
894
+
895
+ Args:
896
+ run_id (str): The run_id of the run
897
+ job_log (JobLog): The job log to add to the run log
898
+ """
899
+ logger.info(f"{self.service_name} Adding the job log to DB for: {run_id}")
900
+ run_log = self.get_run_log_by_id(run_id=run_id)
901
+ run_log.job = job_log
902
+ run_log.status = job_log.status
903
+ self.put_run_log(run_log=run_log)
904
+
785
905
 
786
906
  class BufferRunLogstore(BaseRunLogStore):
787
907
  """
@@ -804,9 +924,13 @@ class BufferRunLogstore(BaseRunLogStore):
804
924
  """
805
925
 
806
926
  service_name: str = "buffered"
927
+
807
928
  run_log: Optional[RunLog] = Field(
808
929
  default=None, exclude=True
809
930
  ) # For a buffered Run Log, this is the database
931
+ job_log: Optional[JobLog] = Field(
932
+ default=None, exclude=True
933
+ ) # For a buffered Run Log, this is the database
810
934
 
811
935
  def get_summary(self) -> Dict[str, Any]:
812
936
  summary = {"Type": self.service_name, "Location": "Not persisted"}
@@ -821,7 +945,6 @@ class BufferRunLogstore(BaseRunLogStore):
821
945
  tag: str = "",
822
946
  original_run_id: str = "",
823
947
  status: str = defaults.CREATED,
824
- **kwargs,
825
948
  ) -> RunLog:
826
949
  """
827
950
  # Creates a Run log
@@ -840,7 +963,7 @@ class BufferRunLogstore(BaseRunLogStore):
840
963
  )
841
964
  return self.run_log
842
965
 
843
- def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs):
966
+ def get_run_log_by_id(self, run_id: str, full: bool = False):
844
967
  """
845
968
  # Returns the run_log defined by id
846
969
  # Raises Exception if not found
@@ -852,7 +975,7 @@ class BufferRunLogstore(BaseRunLogStore):
852
975
 
853
976
  raise exceptions.RunLogNotFoundError(run_id)
854
977
 
855
- def put_run_log(self, run_log: RunLog, **kwargs):
978
+ def put_run_log(self, run_log: RunLog):
856
979
  """
857
980
  # Puts the run log in the db
858
981
  # Raises Exception if not found
runnable/defaults.py CHANGED
@@ -1,6 +1,11 @@
1
- from enum import Enum
2
- from typing import TypedDict # type: ignore[unused-ignore]
3
- from typing import Any, Dict, Mapping, Optional, Union
1
+ from typing import (
2
+ Any,
3
+ Dict,
4
+ Mapping,
5
+ Optional,
6
+ TypedDict, # type: ignore[unused-ignore]
7
+ Union,
8
+ )
4
9
 
5
10
  from rich.style import Style
6
11
  from typing_extensions import TypeAlias
@@ -12,16 +17,6 @@ LOGGER_NAME = "runnable"
12
17
  LOG_LEVEL = "WARNING"
13
18
 
14
19
 
15
- class EXECUTION_PLAN(Enum):
16
- """
17
- The possible execution plans for a runnable job.
18
- """
19
-
20
- CHAINED = "chained" #  121 relationship between run log and the dag.
21
- UNCHAINED = "unchained" # Only captures execution of steps, no relation.
22
- INTERACTIVE = "interactive" # used for interactive sessions
23
-
24
-
25
20
  # Type definitions
26
21
  class ServiceConfig(TypedDict):
27
22
  type: str
@@ -32,7 +27,7 @@ class RunnableConfig(TypedDict, total=False):
32
27
  run_log_store: Optional[ServiceConfig]
33
28
  secrets: Optional[ServiceConfig]
34
29
  catalog: Optional[ServiceConfig]
35
- executor: Optional[ServiceConfig]
30
+ pipeline_executor: Optional[ServiceConfig]
36
31
  pickler: Optional[ServiceConfig]
37
32
 
38
33
 
@@ -45,7 +40,6 @@ RUNNABLE_RUN_TAG = "RUNNABLE_RUN_TAG"
45
40
 
46
41
  # Interaction settings
47
42
  TRACK_PREFIX = "RUNNABLE_TRACK_"
48
- STEP_INDICATOR = "_STEP_"
49
43
  PARAMETER_PREFIX = "RUNNABLE_PRM_"
50
44
  MAP_VARIABLE = "RUNNABLE_MAP_VARIABLE"
51
45
  VARIABLE_PREFIX = "RUNNABLE_VAR_"
@@ -66,18 +60,14 @@ TRIGGERED = "TRIGGERED"
66
60
 
67
61
  # Node and Command settings
68
62
  COMMAND_TYPE = "python"
69
- NODE_SPEC_FILE = "node_spec.yaml"
70
63
  COMMAND_FRIENDLY_CHARACTER = "%"
71
- DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/runnable/"
72
- DEFAULT_CONTAINER_DATA_PATH = "data/"
73
- DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json"
74
64
 
75
65
  # Default services
76
- DEFAULT_EXECUTOR = ServiceConfig(type="local", config={})
66
+ DEFAULT_PIPELINE_EXECUTOR = ServiceConfig(type="local", config={})
67
+ DEFAULT_JOB_EXECUTOR = ServiceConfig(type="local", config={})
77
68
  DEFAULT_RUN_LOG_STORE = ServiceConfig(type="file-system", config={})
78
69
  DEFAULT_CATALOG = ServiceConfig(type="file-system", config={})
79
70
  DEFAULT_SECRETS = ServiceConfig(type="env-secrets", config={})
80
- DEFAULT_EXPERIMENT_TRACKER = ServiceConfig(type="do-nothing", config={})
81
71
  DEFAULT_PICKLER = ServiceConfig(type="pickle", config={})
82
72
 
83
73
  # Map state
@@ -109,41 +99,10 @@ COMPUTE_DATA_FOLDER = "."
109
99
  # Secrets settings
110
100
  DOTENV_FILE_LOCATION = ".env"
111
101
 
112
-
113
- # Docker settings
114
- DOCKERFILE_NAME = "Dockerfile"
115
- DOCKERFILE_CONTENT = r"""# Python 3.8 Image without Dependecies
116
- FROM python:3.8
117
-
118
- LABEL maintainer="mesanthu@gmail.com"
119
-
120
- RUN apt-get update && apt-get install -y --no-install-recommends \
121
- git \
122
- && rm -rf /var/lib/apt/lists/*
123
-
124
- ${INSTALL_STYLE}
125
-
126
- ENV VIRTUAL_ENV=/opt/venv
127
- RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV
128
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
129
-
130
- ${COPY_CONTENT}
131
- WORKDIR /app
132
-
133
- ${INSTALL_REQUIREMENTS}
134
- """
135
- GIT_ARCHIVE_NAME = "git_tracked"
136
102
  LEN_SHA_FOR_TAG = 8
137
103
 
138
-
139
- class ENTRYPOINT(Enum):
140
- """
141
- The possible container entrypoint types.
142
- """
143
-
144
- USER = "user"
145
- SYSTEM = "system"
146
-
104
+ # JOB CONFIG
105
+ DEFAULT_JOB_NAME = "job"
147
106
 
148
107
  ## Logging settings
149
108