runnable 0.14.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
runnable/context.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict, Optional
1
+ from typing import Any, Dict, List, Optional
2
2
 
3
3
  from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
4
4
  from rich.progress import Progress
@@ -9,6 +9,7 @@ from runnable.executor import BaseExecutor
9
9
  from runnable.graph import Graph
10
10
  from runnable.pickler import BasePickler
11
11
  from runnable.secrets import BaseSecrets
12
+ from runnable.tasks import BaseTaskType
12
13
 
13
14
 
14
15
  class Context(BaseModel):
@@ -22,15 +23,23 @@ class Context(BaseModel):
22
23
  model_config = ConfigDict(arbitrary_types_allowed=True)
23
24
 
24
25
  pipeline_file: Optional[str] = ""
26
+ job_definition_file: Optional[str] = ""
25
27
  parameters_file: Optional[str] = ""
26
28
  configuration_file: Optional[str] = ""
29
+ from_sdk: bool = False
27
30
 
28
- tag: str = ""
29
31
  run_id: str = ""
32
+ object_serialisation: bool = True
33
+ return_objects: Dict[str, Any] = {}
34
+
35
+ tag: str = ""
30
36
  variables: Dict[str, str] = {}
37
+
31
38
  dag: Optional[Graph] = None
32
39
  dag_hash: str = ""
33
- execution_plan: str = ""
40
+
41
+ job: Optional[BaseTaskType] = None
42
+ job_catalog_settings: Optional[List[str]] = []
34
43
 
35
44
 
36
45
  run_context = None # type: Context # type: ignore
runnable/datastore.py CHANGED
@@ -24,7 +24,7 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
24
24
 
25
25
 
26
26
  JSONType = Union[
27
- str, int, float, bool, List[Any], Dict[str, Any]
27
+ Union[None, bool, str, float, int, List[Any], Dict[str, Any]]
28
28
  ] # This is actually JSONType, but pydantic doesn't support TypeAlias yet
29
29
 
30
30
 
@@ -98,22 +98,33 @@ class ObjectParameter(BaseModel):
98
98
  @computed_field # type: ignore
99
99
  @property
100
100
  def description(self) -> str:
101
- return f"Pickled object stored in catalog as: {self.value}"
101
+ if context.run_context.object_serialisation:
102
+ return f"Pickled object stored in catalog as: {self.value}"
103
+
104
+ return f"Object stored in memory as: {self.value}"
102
105
 
103
106
  @property
104
107
  def file_name(self) -> str:
105
108
  return f"{self.value}{context.run_context.pickler.extension}"
106
109
 
107
110
  def get_value(self) -> Any:
108
- # Get the pickled object
109
- catalog_handler = context.run_context.catalog_handler
111
+ # If there was no serialisation, return the object from the return objects
112
+ if not context.run_context.object_serialisation:
113
+ return context.run_context.return_objects[self.value]
110
114
 
115
+ # If the object was serialised, get it from the catalog
116
+ catalog_handler = context.run_context.catalog_handler
111
117
  catalog_handler.get(name=self.file_name, run_id=context.run_context.run_id)
112
118
  obj = context.run_context.pickler.load(path=self.file_name)
113
119
  os.remove(self.file_name) # Remove after loading
114
120
  return obj
115
121
 
116
122
  def put_object(self, data: Any) -> None:
123
+ if not context.run_context.object_serialisation:
124
+ context.run_context.return_objects[self.value] = data
125
+ return
126
+
127
+ # If the object was serialised, put it in the catalog
117
128
  context.run_context.pickler.dump(data=data, path=self.file_name)
118
129
 
119
130
  catalog_handler = context.run_context.catalog_handler
@@ -300,6 +311,73 @@ class BranchLog(BaseModel):
300
311
  StepLog.model_rebuild()
301
312
 
302
313
 
314
+ class JobLog(BaseModel):
315
+ """
316
+ The data class capturing the data of a job
317
+ This should be treated as a step log
318
+ """
319
+
320
+ status: str = defaults.FAIL
321
+ message: str = ""
322
+ mock: bool = False
323
+ code_identities: List[CodeIdentity] = Field(default_factory=list)
324
+ attempts: List[StepAttempt] = Field(default_factory=list)
325
+ data_catalog: List[DataCatalog] = Field(default_factory=list)
326
+
327
+ def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
328
+ """
329
+ Add the data catalogs as asked by the user
330
+
331
+ Args:
332
+ dict_catalogs ([DataCatalog]): A list of data catalog items
333
+ """
334
+
335
+ if not self.data_catalog:
336
+ self.data_catalog = []
337
+ for data_catalog in data_catalogs:
338
+ self.data_catalog.append(data_catalog)
339
+
340
+ def get_summary(self) -> Dict[str, Any]:
341
+ """
342
+ Summarize the step log to log
343
+ """
344
+ summary: Dict[str, Any] = {}
345
+
346
+ summary["Available parameters"] = [
347
+ (p, v.description)
348
+ for attempt in self.attempts
349
+ for p, v in attempt.input_parameters.items()
350
+ ]
351
+
352
+ summary["Output catalog content"] = [
353
+ dc.name for dc in self.data_catalog if dc.stage == "put"
354
+ ]
355
+ summary["Output parameters"] = [
356
+ (p, v.description)
357
+ for attempt in self.attempts
358
+ for p, v in attempt.output_parameters.items()
359
+ ]
360
+
361
+ summary["Metrics"] = [
362
+ (p, v.description)
363
+ for attempt in self.attempts
364
+ for p, v in attempt.user_defined_metrics.items()
365
+ ]
366
+
367
+ cis = []
368
+ for ci in self.code_identities:
369
+ message = f"{ci.code_identifier_type}:{ci.code_identifier}"
370
+ if not ci.code_identifier_dependable:
371
+ message += " but is not dependable"
372
+ cis.append(message)
373
+
374
+ summary["Code identities"] = cis
375
+
376
+ summary["status"] = self.status
377
+
378
+ return summary
379
+
380
+
303
381
  class RunLog(BaseModel):
304
382
  """
305
383
  The data captured as part of Run Log
@@ -310,6 +388,7 @@ class RunLog(BaseModel):
310
388
  tag: Optional[str] = ""
311
389
  status: str = defaults.FAIL
312
390
  steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
391
+ job: Optional[JobLog] = None
313
392
  parameters: Dict[str, Parameter] = Field(default_factory=dict)
314
393
  run_config: Dict[str, Any] = Field(default_factory=dict)
315
394
 
@@ -469,6 +548,23 @@ class BaseRunLogStore(ABC, BaseModel):
469
548
  def _context(self):
470
549
  return context.run_context
471
550
 
551
+ """
552
+ Retrieves a Job log from the database using the config and the job_id
553
+
554
+ Args:
555
+ job_id (str): The job_id of the job
556
+
557
+ Returns:
558
+ JobLog: The JobLog object identified by the job_id
559
+
560
+ Logically the method should:
561
+ * Returns the job_log defined by id from the data store defined by the config
562
+
563
+ Raises:
564
+ NotImplementedError: This is a base class and therefore has no default implementation
565
+ JobLogNotFoundError: If the job log for job_id is not found in the datastore
566
+ """
567
+
472
568
  @abstractmethod
473
569
  def create_run_log(
474
570
  self,
@@ -478,7 +574,6 @@ class BaseRunLogStore(ABC, BaseModel):
478
574
  tag: str = "",
479
575
  original_run_id: str = "",
480
576
  status: str = defaults.CREATED,
481
- **kwargs,
482
577
  ):
483
578
  """
484
579
  Creates a Run Log object by using the config
@@ -494,7 +589,7 @@ class BaseRunLogStore(ABC, BaseModel):
494
589
  raise NotImplementedError
495
590
 
496
591
  @abstractmethod
497
- def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog:
592
+ def get_run_log_by_id(self, run_id: str, full: bool = False) -> RunLog:
498
593
  """
499
594
  Retrieves a Run log from the database using the config and the run_id
500
595
 
@@ -516,7 +611,7 @@ class BaseRunLogStore(ABC, BaseModel):
516
611
  raise NotImplementedError
517
612
 
518
613
  @abstractmethod
519
- def put_run_log(self, run_log: RunLog, **kwargs):
614
+ def put_run_log(self, run_log: RunLog):
520
615
  """
521
616
  Puts the Run Log in the database as defined by the config
522
617
 
@@ -544,7 +639,7 @@ class BaseRunLogStore(ABC, BaseModel):
544
639
  run_log.status = status
545
640
  self.put_run_log(run_log)
546
641
 
547
- def get_parameters(self, run_id: str, **kwargs) -> Dict[str, Parameter]:
642
+ def get_parameters(self, run_id: str) -> Dict[str, Parameter]:
548
643
  """
549
644
  Get the parameters from the Run log defined by the run_id
550
645
 
@@ -563,7 +658,7 @@ class BaseRunLogStore(ABC, BaseModel):
563
658
  run_log = self.get_run_log_by_id(run_id=run_id)
564
659
  return run_log.parameters
565
660
 
566
- def set_parameters(self, run_id: str, parameters: Dict[str, Parameter], **kwargs):
661
+ def set_parameters(self, run_id: str, parameters: Dict[str, Parameter]):
567
662
  """
568
663
  Update the parameters of the Run log with the new parameters
569
664
 
@@ -584,7 +679,7 @@ class BaseRunLogStore(ABC, BaseModel):
584
679
  run_log.parameters.update(parameters)
585
680
  self.put_run_log(run_log=run_log)
586
681
 
587
- def get_run_config(self, run_id: str, **kwargs) -> dict:
682
+ def get_run_config(self, run_id: str) -> dict:
588
683
  """
589
684
  Given a run_id, return the run_config used to perform the run.
590
685
 
@@ -598,7 +693,7 @@ class BaseRunLogStore(ABC, BaseModel):
598
693
  run_log = self.get_run_log_by_id(run_id=run_id)
599
694
  return run_log.run_config
600
695
 
601
- def set_run_config(self, run_id: str, run_config: dict, **kwargs):
696
+ def set_run_config(self, run_id: str, run_config: dict):
602
697
  """Set the run config used to run the run_id
603
698
 
604
699
  Args:
@@ -610,7 +705,7 @@ class BaseRunLogStore(ABC, BaseModel):
610
705
  run_log.run_config.update(run_config)
611
706
  self.put_run_log(run_log=run_log)
612
707
 
613
- def create_step_log(self, name: str, internal_name: str, **kwargs):
708
+ def create_step_log(self, name: str, internal_name: str):
614
709
  """
615
710
  Create a step log by the name and internal name
616
711
 
@@ -628,7 +723,7 @@ class BaseRunLogStore(ABC, BaseModel):
628
723
  logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
629
724
  return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
630
725
 
631
- def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog:
726
+ def get_step_log(self, internal_name: str, run_id: str) -> StepLog:
632
727
  """
633
728
  Get a step log from the datastore for run_id and the internal naming of the step log
634
729
 
@@ -657,7 +752,7 @@ class BaseRunLogStore(ABC, BaseModel):
657
752
  step_log, _ = run_log.search_step_by_internal_name(internal_name)
658
753
  return step_log
659
754
 
660
- def add_step_log(self, step_log: StepLog, run_id: str, **kwargs):
755
+ def add_step_log(self, step_log: StepLog, run_id: str):
661
756
  """
662
757
  Add the step log in the run log as identified by the run_id in the datastore
663
758
 
@@ -687,7 +782,7 @@ class BaseRunLogStore(ABC, BaseModel):
687
782
  branch.steps[step_log.internal_name] = step_log
688
783
  self.put_run_log(run_log=run_log)
689
784
 
690
- def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog:
785
+ def create_branch_log(self, internal_branch_name: str) -> BranchLog:
691
786
  """
692
787
  Creates a uncommitted branch log object by the internal name given
693
788
 
@@ -704,7 +799,7 @@ class BaseRunLogStore(ABC, BaseModel):
704
799
  return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
705
800
 
706
801
  def get_branch_log(
707
- self, internal_branch_name: str, run_id: str, **kwargs
802
+ self, internal_branch_name: str, run_id: str
708
803
  ) -> Union[BranchLog, RunLog]:
709
804
  """
710
805
  Returns the branch log by the internal branch name for the run id
@@ -724,9 +819,7 @@ class BaseRunLogStore(ABC, BaseModel):
724
819
  branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
725
820
  return branch
726
821
 
727
- def add_branch_log(
728
- self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs
729
- ):
822
+ def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str):
730
823
  """
731
824
  The method should:
732
825
  # Get the run log
@@ -758,8 +851,7 @@ class BaseRunLogStore(ABC, BaseModel):
758
851
  step.branches[internal_branch_name] = branch_log # type: ignore
759
852
  self.put_run_log(run_log)
760
853
 
761
- #
762
- def create_code_identity(self, **kwargs) -> CodeIdentity:
854
+ def create_code_identity(self) -> CodeIdentity:
763
855
  """
764
856
  Creates an uncommitted Code identity class
765
857
 
@@ -769,7 +861,7 @@ class BaseRunLogStore(ABC, BaseModel):
769
861
  logger.info(f"{self.service_name} Creating Code identity")
770
862
  return CodeIdentity()
771
863
 
772
- def create_data_catalog(self, name: str, **kwargs) -> DataCatalog:
864
+ def create_data_catalog(self, name: str) -> DataCatalog:
773
865
  """
774
866
  Create a uncommitted data catalog object
775
867
 
@@ -782,6 +874,45 @@ class BaseRunLogStore(ABC, BaseModel):
782
874
  logger.info(f"{self.service_name} Creating Data Catalog for {name}")
783
875
  return DataCatalog(name=name)
784
876
 
877
+ def create_job_log(self) -> JobLog:
878
+ """
879
+ Creates a Job log and adds it to the db
880
+
881
+ Refer to BaseRunLogStore.create_job_log
882
+ """
883
+ logger.info(f"{self.service_name} Creating a Job Log and adding it to DB")
884
+ return JobLog(status=defaults.CREATED)
885
+
886
+ def get_job_log(self, run_id: str) -> JobLog:
887
+ """
888
+ Returns the run_log defined by id
889
+
890
+ Raises Exception if not found
891
+ """
892
+ logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
893
+ run_log = self.get_run_log_by_id(run_id)
894
+
895
+ try:
896
+ assert run_log.job
897
+ except AssertionError as exc:
898
+ raise exceptions.JobLogNotFoundError(run_id) from exc
899
+
900
+ return run_log.job
901
+
902
+ def add_job_log(self, run_id: str, job_log: JobLog):
903
+ """
904
+ Adds the job log to the run log
905
+
906
+ Args:
907
+ run_id (str): The run_id of the run
908
+ job_log (JobLog): The job log to add to the run log
909
+ """
910
+ logger.info(f"{self.service_name} Adding the job log to DB for: {run_id}")
911
+ run_log = self.get_run_log_by_id(run_id=run_id)
912
+ run_log.job = job_log
913
+ run_log.status = job_log.status
914
+ self.put_run_log(run_log=run_log)
915
+
785
916
 
786
917
  class BufferRunLogstore(BaseRunLogStore):
787
918
  """
@@ -804,9 +935,13 @@ class BufferRunLogstore(BaseRunLogStore):
804
935
  """
805
936
 
806
937
  service_name: str = "buffered"
938
+
807
939
  run_log: Optional[RunLog] = Field(
808
940
  default=None, exclude=True
809
941
  ) # For a buffered Run Log, this is the database
942
+ job_log: Optional[JobLog] = Field(
943
+ default=None, exclude=True
944
+ ) # For a buffered Run Log, this is the database
810
945
 
811
946
  def get_summary(self) -> Dict[str, Any]:
812
947
  summary = {"Type": self.service_name, "Location": "Not persisted"}
@@ -821,7 +956,6 @@ class BufferRunLogstore(BaseRunLogStore):
821
956
  tag: str = "",
822
957
  original_run_id: str = "",
823
958
  status: str = defaults.CREATED,
824
- **kwargs,
825
959
  ) -> RunLog:
826
960
  """
827
961
  # Creates a Run log
@@ -840,7 +974,7 @@ class BufferRunLogstore(BaseRunLogStore):
840
974
  )
841
975
  return self.run_log
842
976
 
843
- def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs):
977
+ def get_run_log_by_id(self, run_id: str, full: bool = False):
844
978
  """
845
979
  # Returns the run_log defined by id
846
980
  # Raises Exception if not found
@@ -852,7 +986,7 @@ class BufferRunLogstore(BaseRunLogStore):
852
986
 
853
987
  raise exceptions.RunLogNotFoundError(run_id)
854
988
 
855
- def put_run_log(self, run_log: RunLog, **kwargs):
989
+ def put_run_log(self, run_log: RunLog):
856
990
  """
857
991
  # Puts the run log in the db
858
992
  # Raises Exception if not found
runnable/defaults.py CHANGED
@@ -1,6 +1,11 @@
1
- from enum import Enum
2
- from typing import TypedDict # type: ignore[unused-ignore]
3
- from typing import Any, Dict, Mapping, Optional, Union
1
+ from typing import (
2
+ Any,
3
+ Dict,
4
+ Mapping,
5
+ Optional,
6
+ TypedDict, # type: ignore[unused-ignore]
7
+ Union,
8
+ )
4
9
 
5
10
  from rich.style import Style
6
11
  from typing_extensions import TypeAlias
@@ -12,16 +17,6 @@ LOGGER_NAME = "runnable"
12
17
  LOG_LEVEL = "WARNING"
13
18
 
14
19
 
15
- class EXECUTION_PLAN(Enum):
16
- """
17
- The possible execution plans for a runnable job.
18
- """
19
-
20
- CHAINED = "chained" #  121 relationship between run log and the dag.
21
- UNCHAINED = "unchained" # Only captures execution of steps, no relation.
22
- INTERACTIVE = "interactive" # used for interactive sessions
23
-
24
-
25
20
  # Type definitions
26
21
  class ServiceConfig(TypedDict):
27
22
  type: str
@@ -32,7 +27,7 @@ class RunnableConfig(TypedDict, total=False):
32
27
  run_log_store: Optional[ServiceConfig]
33
28
  secrets: Optional[ServiceConfig]
34
29
  catalog: Optional[ServiceConfig]
35
- executor: Optional[ServiceConfig]
30
+ pipeline_executor: Optional[ServiceConfig]
36
31
  pickler: Optional[ServiceConfig]
37
32
 
38
33
 
@@ -45,7 +40,6 @@ RUNNABLE_RUN_TAG = "RUNNABLE_RUN_TAG"
45
40
 
46
41
  # Interaction settings
47
42
  TRACK_PREFIX = "RUNNABLE_TRACK_"
48
- STEP_INDICATOR = "_STEP_"
49
43
  PARAMETER_PREFIX = "RUNNABLE_PRM_"
50
44
  MAP_VARIABLE = "RUNNABLE_MAP_VARIABLE"
51
45
  VARIABLE_PREFIX = "RUNNABLE_VAR_"
@@ -66,18 +60,14 @@ TRIGGERED = "TRIGGERED"
66
60
 
67
61
  # Node and Command settings
68
62
  COMMAND_TYPE = "python"
69
- NODE_SPEC_FILE = "node_spec.yaml"
70
63
  COMMAND_FRIENDLY_CHARACTER = "%"
71
- DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/runnable/"
72
- DEFAULT_CONTAINER_DATA_PATH = "data/"
73
- DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json"
74
64
 
75
65
  # Default services
76
- DEFAULT_EXECUTOR = ServiceConfig(type="local", config={})
66
+ DEFAULT_PIPELINE_EXECUTOR = ServiceConfig(type="local", config={})
67
+ DEFAULT_JOB_EXECUTOR = ServiceConfig(type="local", config={})
77
68
  DEFAULT_RUN_LOG_STORE = ServiceConfig(type="file-system", config={})
78
69
  DEFAULT_CATALOG = ServiceConfig(type="file-system", config={})
79
70
  DEFAULT_SECRETS = ServiceConfig(type="env-secrets", config={})
80
- DEFAULT_EXPERIMENT_TRACKER = ServiceConfig(type="do-nothing", config={})
81
71
  DEFAULT_PICKLER = ServiceConfig(type="pickle", config={})
82
72
 
83
73
  # Map state
@@ -109,41 +99,10 @@ COMPUTE_DATA_FOLDER = "."
109
99
  # Secrets settings
110
100
  DOTENV_FILE_LOCATION = ".env"
111
101
 
112
-
113
- # Docker settings
114
- DOCKERFILE_NAME = "Dockerfile"
115
- DOCKERFILE_CONTENT = r"""# Python 3.8 Image without Dependecies
116
- FROM python:3.8
117
-
118
- LABEL maintainer="mesanthu@gmail.com"
119
-
120
- RUN apt-get update && apt-get install -y --no-install-recommends \
121
- git \
122
- && rm -rf /var/lib/apt/lists/*
123
-
124
- ${INSTALL_STYLE}
125
-
126
- ENV VIRTUAL_ENV=/opt/venv
127
- RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV
128
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
129
-
130
- ${COPY_CONTENT}
131
- WORKDIR /app
132
-
133
- ${INSTALL_REQUIREMENTS}
134
- """
135
- GIT_ARCHIVE_NAME = "git_tracked"
136
102
  LEN_SHA_FOR_TAG = 8
137
103
 
138
-
139
- class ENTRYPOINT(Enum):
140
- """
141
- The possible container entrypoint types.
142
- """
143
-
144
- USER = "user"
145
- SYSTEM = "system"
146
-
104
+ # JOB CONFIG
105
+ DEFAULT_JOB_NAME = "job"
147
106
 
148
107
  ## Logging settings
149
108