runnable 0.14.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
runnable/context.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict, Optional
1
+ from typing import Any, Dict, List, Optional
2
2
 
3
3
  from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
4
4
  from rich.progress import Progress
@@ -9,6 +9,7 @@ from runnable.executor import BaseExecutor
9
9
  from runnable.graph import Graph
10
10
  from runnable.pickler import BasePickler
11
11
  from runnable.secrets import BaseSecrets
12
+ from runnable.tasks import BaseTaskType
12
13
 
13
14
 
14
15
  class Context(BaseModel):
@@ -22,15 +23,23 @@ class Context(BaseModel):
22
23
  model_config = ConfigDict(arbitrary_types_allowed=True)
23
24
 
24
25
  pipeline_file: Optional[str] = ""
26
+ job_definition_file: Optional[str] = ""
25
27
  parameters_file: Optional[str] = ""
26
28
  configuration_file: Optional[str] = ""
29
+ from_sdk: bool = False
27
30
 
28
- tag: str = ""
29
31
  run_id: str = ""
32
+ object_serialisation: bool = True
33
+ return_objects: Dict[str, Any] = {}
34
+
35
+ tag: str = ""
30
36
  variables: Dict[str, str] = {}
37
+
31
38
  dag: Optional[Graph] = None
32
39
  dag_hash: str = ""
33
- execution_plan: str = ""
40
+
41
+ job: Optional[BaseTaskType] = None
42
+ job_catalog_settings: Optional[List[str]] = []
34
43
 
35
44
 
36
45
  run_context = None # type: Context # type: ignore
runnable/datastore.py CHANGED
@@ -24,7 +24,7 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
24
24
 
25
25
 
26
26
  JSONType = Union[
27
- str, int, float, bool, List[Any], Dict[str, Any]
27
+ Union[None, bool, str, float, int, List[Any], Dict[str, Any]]
28
28
  ] # This is actually JSONType, but pydantic doesn't support TypeAlias yet
29
29
 
30
30
 
@@ -98,22 +98,33 @@ class ObjectParameter(BaseModel):
98
98
  @computed_field # type: ignore
99
99
  @property
100
100
  def description(self) -> str:
101
- return f"Pickled object stored in catalog as: {self.value}"
101
+ if context.run_context.object_serialisation:
102
+ return f"Pickled object stored in catalog as: {self.value}"
103
+
104
+ return f"Object stored in memory as: {self.value}"
102
105
 
103
106
  @property
104
107
  def file_name(self) -> str:
105
108
  return f"{self.value}{context.run_context.pickler.extension}"
106
109
 
107
110
  def get_value(self) -> Any:
108
- # Get the pickled object
109
- catalog_handler = context.run_context.catalog_handler
111
+ # If there was no serialisation, return the object from the return objects
112
+ if not context.run_context.object_serialisation:
113
+ return context.run_context.return_objects[self.value]
110
114
 
115
+ # If the object was serialised, get it from the catalog
116
+ catalog_handler = context.run_context.catalog_handler
111
117
  catalog_handler.get(name=self.file_name, run_id=context.run_context.run_id)
112
118
  obj = context.run_context.pickler.load(path=self.file_name)
113
119
  os.remove(self.file_name) # Remove after loading
114
120
  return obj
115
121
 
116
122
  def put_object(self, data: Any) -> None:
123
+ if not context.run_context.object_serialisation:
124
+ context.run_context.return_objects[self.value] = data
125
+ return
126
+
127
+ # If the object was serialised, put it in the catalog
117
128
  context.run_context.pickler.dump(data=data, path=self.file_name)
118
129
 
119
130
  catalog_handler = context.run_context.catalog_handler
@@ -300,6 +311,73 @@ class BranchLog(BaseModel):
300
311
  StepLog.model_rebuild()
301
312
 
302
313
 
314
+ class JobLog(BaseModel):
315
+ """
316
+ The data class capturing the data of a job
317
+ This should be treated as a step log
318
+ """
319
+
320
+ status: str = defaults.FAIL
321
+ message: str = ""
322
+ mock: bool = False
323
+ code_identities: List[CodeIdentity] = Field(default_factory=list)
324
+ attempts: List[StepAttempt] = Field(default_factory=list)
325
+ data_catalog: List[DataCatalog] = Field(default_factory=list)
326
+
327
+ def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
328
+ """
329
+ Add the data catalogs as asked by the user
330
+
331
+ Args:
332
+ dict_catalogs ([DataCatalog]): A list of data catalog items
333
+ """
334
+
335
+ if not self.data_catalog:
336
+ self.data_catalog = []
337
+ for data_catalog in data_catalogs:
338
+ self.data_catalog.append(data_catalog)
339
+
340
+ def get_summary(self) -> Dict[str, Any]:
341
+ """
342
+ Summarize the step log to log
343
+ """
344
+ summary: Dict[str, Any] = {}
345
+
346
+ summary["Available parameters"] = [
347
+ (p, v.description)
348
+ for attempt in self.attempts
349
+ for p, v in attempt.input_parameters.items()
350
+ ]
351
+
352
+ summary["Output catalog content"] = [
353
+ dc.name for dc in self.data_catalog if dc.stage == "put"
354
+ ]
355
+ summary["Output parameters"] = [
356
+ (p, v.description)
357
+ for attempt in self.attempts
358
+ for p, v in attempt.output_parameters.items()
359
+ ]
360
+
361
+ summary["Metrics"] = [
362
+ (p, v.description)
363
+ for attempt in self.attempts
364
+ for p, v in attempt.user_defined_metrics.items()
365
+ ]
366
+
367
+ cis = []
368
+ for ci in self.code_identities:
369
+ message = f"{ci.code_identifier_type}:{ci.code_identifier}"
370
+ if not ci.code_identifier_dependable:
371
+ message += " but is not dependable"
372
+ cis.append(message)
373
+
374
+ summary["Code identities"] = cis
375
+
376
+ summary["status"] = self.status
377
+
378
+ return summary
379
+
380
+
303
381
  class RunLog(BaseModel):
304
382
  """
305
383
  The data captured as part of Run Log
@@ -310,6 +388,7 @@ class RunLog(BaseModel):
310
388
  tag: Optional[str] = ""
311
389
  status: str = defaults.FAIL
312
390
  steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
391
+ job: Optional[JobLog] = None
313
392
  parameters: Dict[str, Parameter] = Field(default_factory=dict)
314
393
  run_config: Dict[str, Any] = Field(default_factory=dict)
315
394
 
@@ -469,6 +548,23 @@ class BaseRunLogStore(ABC, BaseModel):
469
548
  def _context(self):
470
549
  return context.run_context
471
550
 
551
+ """
552
+ Retrieves a Job log from the database using the config and the job_id
553
+
554
+ Args:
555
+ job_id (str): The job_id of the job
556
+
557
+ Returns:
558
+ JobLog: The JobLog object identified by the job_id
559
+
560
+ Logically the method should:
561
+ * Returns the job_log defined by id from the data store defined by the config
562
+
563
+ Raises:
564
+ NotImplementedError: This is a base class and therefore has no default implementation
565
+ JobLogNotFoundError: If the job log for job_id is not found in the datastore
566
+ """
567
+
472
568
  @abstractmethod
473
569
  def create_run_log(
474
570
  self,
@@ -478,7 +574,6 @@ class BaseRunLogStore(ABC, BaseModel):
478
574
  tag: str = "",
479
575
  original_run_id: str = "",
480
576
  status: str = defaults.CREATED,
481
- **kwargs,
482
577
  ):
483
578
  """
484
579
  Creates a Run Log object by using the config
@@ -494,7 +589,7 @@ class BaseRunLogStore(ABC, BaseModel):
494
589
  raise NotImplementedError
495
590
 
496
591
  @abstractmethod
497
- def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog:
592
+ def get_run_log_by_id(self, run_id: str, full: bool = False) -> RunLog:
498
593
  """
499
594
  Retrieves a Run log from the database using the config and the run_id
500
595
 
@@ -516,7 +611,7 @@ class BaseRunLogStore(ABC, BaseModel):
516
611
  raise NotImplementedError
517
612
 
518
613
  @abstractmethod
519
- def put_run_log(self, run_log: RunLog, **kwargs):
614
+ def put_run_log(self, run_log: RunLog):
520
615
  """
521
616
  Puts the Run Log in the database as defined by the config
522
617
 
@@ -544,7 +639,7 @@ class BaseRunLogStore(ABC, BaseModel):
544
639
  run_log.status = status
545
640
  self.put_run_log(run_log)
546
641
 
547
- def get_parameters(self, run_id: str, **kwargs) -> Dict[str, Parameter]:
642
+ def get_parameters(self, run_id: str) -> Dict[str, Parameter]:
548
643
  """
549
644
  Get the parameters from the Run log defined by the run_id
550
645
 
@@ -563,7 +658,7 @@ class BaseRunLogStore(ABC, BaseModel):
563
658
  run_log = self.get_run_log_by_id(run_id=run_id)
564
659
  return run_log.parameters
565
660
 
566
- def set_parameters(self, run_id: str, parameters: Dict[str, Parameter], **kwargs):
661
+ def set_parameters(self, run_id: str, parameters: Dict[str, Parameter]):
567
662
  """
568
663
  Update the parameters of the Run log with the new parameters
569
664
 
@@ -584,7 +679,7 @@ class BaseRunLogStore(ABC, BaseModel):
584
679
  run_log.parameters.update(parameters)
585
680
  self.put_run_log(run_log=run_log)
586
681
 
587
- def get_run_config(self, run_id: str, **kwargs) -> dict:
682
+ def get_run_config(self, run_id: str) -> dict:
588
683
  """
589
684
  Given a run_id, return the run_config used to perform the run.
590
685
 
@@ -598,7 +693,7 @@ class BaseRunLogStore(ABC, BaseModel):
598
693
  run_log = self.get_run_log_by_id(run_id=run_id)
599
694
  return run_log.run_config
600
695
 
601
- def set_run_config(self, run_id: str, run_config: dict, **kwargs):
696
+ def set_run_config(self, run_id: str, run_config: dict):
602
697
  """Set the run config used to run the run_id
603
698
 
604
699
  Args:
@@ -610,7 +705,7 @@ class BaseRunLogStore(ABC, BaseModel):
610
705
  run_log.run_config.update(run_config)
611
706
  self.put_run_log(run_log=run_log)
612
707
 
613
- def create_step_log(self, name: str, internal_name: str, **kwargs):
708
+ def create_step_log(self, name: str, internal_name: str):
614
709
  """
615
710
  Create a step log by the name and internal name
616
711
 
@@ -628,7 +723,7 @@ class BaseRunLogStore(ABC, BaseModel):
628
723
  logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
629
724
  return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
630
725
 
631
- def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog:
726
+ def get_step_log(self, internal_name: str, run_id: str) -> StepLog:
632
727
  """
633
728
  Get a step log from the datastore for run_id and the internal naming of the step log
634
729
 
@@ -657,7 +752,7 @@ class BaseRunLogStore(ABC, BaseModel):
657
752
  step_log, _ = run_log.search_step_by_internal_name(internal_name)
658
753
  return step_log
659
754
 
660
- def add_step_log(self, step_log: StepLog, run_id: str, **kwargs):
755
+ def add_step_log(self, step_log: StepLog, run_id: str):
661
756
  """
662
757
  Add the step log in the run log as identified by the run_id in the datastore
663
758
 
@@ -687,7 +782,7 @@ class BaseRunLogStore(ABC, BaseModel):
687
782
  branch.steps[step_log.internal_name] = step_log
688
783
  self.put_run_log(run_log=run_log)
689
784
 
690
- def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog:
785
+ def create_branch_log(self, internal_branch_name: str) -> BranchLog:
691
786
  """
692
787
  Creates a uncommitted branch log object by the internal name given
693
788
 
@@ -704,7 +799,7 @@ class BaseRunLogStore(ABC, BaseModel):
704
799
  return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
705
800
 
706
801
  def get_branch_log(
707
- self, internal_branch_name: str, run_id: str, **kwargs
802
+ self, internal_branch_name: str, run_id: str
708
803
  ) -> Union[BranchLog, RunLog]:
709
804
  """
710
805
  Returns the branch log by the internal branch name for the run id
@@ -724,9 +819,7 @@ class BaseRunLogStore(ABC, BaseModel):
724
819
  branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
725
820
  return branch
726
821
 
727
- def add_branch_log(
728
- self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs
729
- ):
822
+ def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str):
730
823
  """
731
824
  The method should:
732
825
  # Get the run log
@@ -758,8 +851,7 @@ class BaseRunLogStore(ABC, BaseModel):
758
851
  step.branches[internal_branch_name] = branch_log # type: ignore
759
852
  self.put_run_log(run_log)
760
853
 
761
- #
762
- def create_code_identity(self, **kwargs) -> CodeIdentity:
854
+ def create_code_identity(self) -> CodeIdentity:
763
855
  """
764
856
  Creates an uncommitted Code identity class
765
857
 
@@ -769,7 +861,7 @@ class BaseRunLogStore(ABC, BaseModel):
769
861
  logger.info(f"{self.service_name} Creating Code identity")
770
862
  return CodeIdentity()
771
863
 
772
- def create_data_catalog(self, name: str, **kwargs) -> DataCatalog:
864
+ def create_data_catalog(self, name: str) -> DataCatalog:
773
865
  """
774
866
  Create a uncommitted data catalog object
775
867
 
@@ -782,6 +874,45 @@ class BaseRunLogStore(ABC, BaseModel):
782
874
  logger.info(f"{self.service_name} Creating Data Catalog for {name}")
783
875
  return DataCatalog(name=name)
784
876
 
877
+ def create_job_log(self) -> JobLog:
878
+ """
879
+ Creates a Job log and adds it to the db
880
+
881
+ Refer to BaseRunLogStore.create_job_log
882
+ """
883
+ logger.info(f"{self.service_name} Creating a Job Log and adding it to DB")
884
+ return JobLog(status=defaults.CREATED)
885
+
886
+ def get_job_log(self, run_id: str) -> JobLog:
887
+ """
888
+ Returns the run_log defined by id
889
+
890
+ Raises Exception if not found
891
+ """
892
+ logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
893
+ run_log = self.get_run_log_by_id(run_id)
894
+
895
+ try:
896
+ assert run_log.job
897
+ except AssertionError as exc:
898
+ raise exceptions.JobLogNotFoundError(run_id) from exc
899
+
900
+ return run_log.job
901
+
902
+ def add_job_log(self, run_id: str, job_log: JobLog):
903
+ """
904
+ Adds the job log to the run log
905
+
906
+ Args:
907
+ run_id (str): The run_id of the run
908
+ job_log (JobLog): The job log to add to the run log
909
+ """
910
+ logger.info(f"{self.service_name} Adding the job log to DB for: {run_id}")
911
+ run_log = self.get_run_log_by_id(run_id=run_id)
912
+ run_log.job = job_log
913
+ run_log.status = job_log.status
914
+ self.put_run_log(run_log=run_log)
915
+
785
916
 
786
917
  class BufferRunLogstore(BaseRunLogStore):
787
918
  """
@@ -804,9 +935,13 @@ class BufferRunLogstore(BaseRunLogStore):
804
935
  """
805
936
 
806
937
  service_name: str = "buffered"
938
+
807
939
  run_log: Optional[RunLog] = Field(
808
940
  default=None, exclude=True
809
941
  ) # For a buffered Run Log, this is the database
942
+ job_log: Optional[JobLog] = Field(
943
+ default=None, exclude=True
944
+ ) # For a buffered Run Log, this is the database
810
945
 
811
946
  def get_summary(self) -> Dict[str, Any]:
812
947
  summary = {"Type": self.service_name, "Location": "Not persisted"}
@@ -821,7 +956,6 @@ class BufferRunLogstore(BaseRunLogStore):
821
956
  tag: str = "",
822
957
  original_run_id: str = "",
823
958
  status: str = defaults.CREATED,
824
- **kwargs,
825
959
  ) -> RunLog:
826
960
  """
827
961
  # Creates a Run log
@@ -840,7 +974,7 @@ class BufferRunLogstore(BaseRunLogStore):
840
974
  )
841
975
  return self.run_log
842
976
 
843
- def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs):
977
+ def get_run_log_by_id(self, run_id: str, full: bool = False):
844
978
  """
845
979
  # Returns the run_log defined by id
846
980
  # Raises Exception if not found
@@ -852,7 +986,7 @@ class BufferRunLogstore(BaseRunLogStore):
852
986
 
853
987
  raise exceptions.RunLogNotFoundError(run_id)
854
988
 
855
- def put_run_log(self, run_log: RunLog, **kwargs):
989
+ def put_run_log(self, run_log: RunLog):
856
990
  """
857
991
  # Puts the run log in the db
858
992
  # Raises Exception if not found
runnable/defaults.py CHANGED
@@ -1,6 +1,11 @@
1
- from enum import Enum
2
- from typing import TypedDict # type: ignore[unused-ignore]
3
- from typing import Any, Dict, Mapping, Optional, Union
1
+ from typing import (
2
+ Any,
3
+ Dict,
4
+ Mapping,
5
+ Optional,
6
+ TypedDict, # type: ignore[unused-ignore]
7
+ Union,
8
+ )
4
9
 
5
10
  from rich.style import Style
6
11
  from typing_extensions import TypeAlias
@@ -12,16 +17,6 @@ LOGGER_NAME = "runnable"
12
17
  LOG_LEVEL = "WARNING"
13
18
 
14
19
 
15
- class EXECUTION_PLAN(Enum):
16
- """
17
- The possible execution plans for a runnable job.
18
- """
19
-
20
- CHAINED = "chained" #  121 relationship between run log and the dag.
21
- UNCHAINED = "unchained" # Only captures execution of steps, no relation.
22
- INTERACTIVE = "interactive" # used for interactive sessions
23
-
24
-
25
20
  # Type definitions
26
21
  class ServiceConfig(TypedDict):
27
22
  type: str
@@ -32,7 +27,7 @@ class RunnableConfig(TypedDict, total=False):
32
27
  run_log_store: Optional[ServiceConfig]
33
28
  secrets: Optional[ServiceConfig]
34
29
  catalog: Optional[ServiceConfig]
35
- executor: Optional[ServiceConfig]
30
+ pipeline_executor: Optional[ServiceConfig]
36
31
  pickler: Optional[ServiceConfig]
37
32
 
38
33
 
@@ -45,7 +40,6 @@ RUNNABLE_RUN_TAG = "RUNNABLE_RUN_TAG"
45
40
 
46
41
  # Interaction settings
47
42
  TRACK_PREFIX = "RUNNABLE_TRACK_"
48
- STEP_INDICATOR = "_STEP_"
49
43
  PARAMETER_PREFIX = "RUNNABLE_PRM_"
50
44
  MAP_VARIABLE = "RUNNABLE_MAP_VARIABLE"
51
45
  VARIABLE_PREFIX = "RUNNABLE_VAR_"
@@ -66,18 +60,14 @@ TRIGGERED = "TRIGGERED"
66
60
 
67
61
  # Node and Command settings
68
62
  COMMAND_TYPE = "python"
69
- NODE_SPEC_FILE = "node_spec.yaml"
70
63
  COMMAND_FRIENDLY_CHARACTER = "%"
71
- DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/runnable/"
72
- DEFAULT_CONTAINER_DATA_PATH = "data/"
73
- DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json"
74
64
 
75
65
  # Default services
76
- DEFAULT_EXECUTOR = ServiceConfig(type="local", config={})
66
+ DEFAULT_PIPELINE_EXECUTOR = ServiceConfig(type="local", config={})
67
+ DEFAULT_JOB_EXECUTOR = ServiceConfig(type="local", config={})
77
68
  DEFAULT_RUN_LOG_STORE = ServiceConfig(type="file-system", config={})
78
69
  DEFAULT_CATALOG = ServiceConfig(type="file-system", config={})
79
70
  DEFAULT_SECRETS = ServiceConfig(type="env-secrets", config={})
80
- DEFAULT_EXPERIMENT_TRACKER = ServiceConfig(type="do-nothing", config={})
81
71
  DEFAULT_PICKLER = ServiceConfig(type="pickle", config={})
82
72
 
83
73
  # Map state
@@ -109,41 +99,10 @@ COMPUTE_DATA_FOLDER = "."
109
99
  # Secrets settings
110
100
  DOTENV_FILE_LOCATION = ".env"
111
101
 
112
-
113
- # Docker settings
114
- DOCKERFILE_NAME = "Dockerfile"
115
- DOCKERFILE_CONTENT = r"""# Python 3.8 Image without Dependecies
116
- FROM python:3.8
117
-
118
- LABEL maintainer="mesanthu@gmail.com"
119
-
120
- RUN apt-get update && apt-get install -y --no-install-recommends \
121
- git \
122
- && rm -rf /var/lib/apt/lists/*
123
-
124
- ${INSTALL_STYLE}
125
-
126
- ENV VIRTUAL_ENV=/opt/venv
127
- RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV
128
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
129
-
130
- ${COPY_CONTENT}
131
- WORKDIR /app
132
-
133
- ${INSTALL_REQUIREMENTS}
134
- """
135
- GIT_ARCHIVE_NAME = "git_tracked"
136
102
  LEN_SHA_FOR_TAG = 8
137
103
 
138
-
139
- class ENTRYPOINT(Enum):
140
- """
141
- The possible container entrypoint types.
142
- """
143
-
144
- USER = "user"
145
- SYSTEM = "system"
146
-
104
+ # JOB CONFIG
105
+ DEFAULT_JOB_NAME = "job"
147
106
 
148
107
  ## Logging settings
149
108