runnable 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
runnable/context.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict, Optional
1
+ from typing import Dict, List, Optional
2
2
 
3
3
  from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
4
4
  from rich.progress import Progress
@@ -9,6 +9,7 @@ from runnable.executor import BaseExecutor
9
9
  from runnable.graph import Graph
10
10
  from runnable.pickler import BasePickler
11
11
  from runnable.secrets import BaseSecrets
12
+ from runnable.tasks import BaseTaskType
12
13
 
13
14
 
14
15
  class Context(BaseModel):
@@ -22,15 +23,21 @@ class Context(BaseModel):
22
23
  model_config = ConfigDict(arbitrary_types_allowed=True)
23
24
 
24
25
  pipeline_file: Optional[str] = ""
26
+ job_definition_file: Optional[str] = ""
25
27
  parameters_file: Optional[str] = ""
26
28
  configuration_file: Optional[str] = ""
29
+ from_sdk: bool = False
27
30
 
28
- tag: str = ""
29
31
  run_id: str = ""
32
+
33
+ tag: str = ""
30
34
  variables: Dict[str, str] = {}
35
+
31
36
  dag: Optional[Graph] = None
32
37
  dag_hash: str = ""
33
- execution_plan: str = ""
38
+
39
+ job: Optional[BaseTaskType] = None
40
+ job_catalog_settings: Optional[List[str]] = []
34
41
 
35
42
 
36
43
  run_context = None # type: Context # type: ignore
runnable/datastore.py CHANGED
@@ -24,7 +24,7 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
24
24
 
25
25
 
26
26
  JSONType = Union[
27
- str, int, float, bool, List[Any], Dict[str, Any]
27
+ Union[None, bool, str, float, int, List[Any], Dict[str, Any]]
28
28
  ] # This is actually JSONType, but pydantic doesn't support TypeAlias yet
29
29
 
30
30
 
@@ -300,6 +300,73 @@ class BranchLog(BaseModel):
300
300
  StepLog.model_rebuild()
301
301
 
302
302
 
303
+ class JobLog(BaseModel):
304
+ """
305
+ The data class capturing the data of a job
306
+ This should be treated as a step log
307
+ """
308
+
309
+ status: str = defaults.FAIL
310
+ message: str = ""
311
+ mock: bool = False
312
+ code_identities: List[CodeIdentity] = Field(default_factory=list)
313
+ attempts: List[StepAttempt] = Field(default_factory=list)
314
+ data_catalog: List[DataCatalog] = Field(default_factory=list)
315
+
316
+ def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
317
+ """
318
+ Add the data catalogs as asked by the user
319
+
320
+ Args:
321
+ dict_catalogs ([DataCatalog]): A list of data catalog items
322
+ """
323
+
324
+ if not self.data_catalog:
325
+ self.data_catalog = []
326
+ for data_catalog in data_catalogs:
327
+ self.data_catalog.append(data_catalog)
328
+
329
+ def get_summary(self) -> Dict[str, Any]:
330
+ """
331
+ Summarize the step log to log
332
+ """
333
+ summary: Dict[str, Any] = {}
334
+
335
+ summary["Available parameters"] = [
336
+ (p, v.description)
337
+ for attempt in self.attempts
338
+ for p, v in attempt.input_parameters.items()
339
+ ]
340
+
341
+ summary["Output catalog content"] = [
342
+ dc.name for dc in self.data_catalog if dc.stage == "put"
343
+ ]
344
+ summary["Output parameters"] = [
345
+ (p, v.description)
346
+ for attempt in self.attempts
347
+ for p, v in attempt.output_parameters.items()
348
+ ]
349
+
350
+ summary["Metrics"] = [
351
+ (p, v.description)
352
+ for attempt in self.attempts
353
+ for p, v in attempt.user_defined_metrics.items()
354
+ ]
355
+
356
+ cis = []
357
+ for ci in self.code_identities:
358
+ message = f"{ci.code_identifier_type}:{ci.code_identifier}"
359
+ if not ci.code_identifier_dependable:
360
+ message += " but is not dependable"
361
+ cis.append(message)
362
+
363
+ summary["Code identities"] = cis
364
+
365
+ summary["status"] = self.status
366
+
367
+ return summary
368
+
369
+
303
370
  class RunLog(BaseModel):
304
371
  """
305
372
  The data captured as part of Run Log
@@ -310,6 +377,7 @@ class RunLog(BaseModel):
310
377
  tag: Optional[str] = ""
311
378
  status: str = defaults.FAIL
312
379
  steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
380
+ job: Optional[JobLog] = None
313
381
  parameters: Dict[str, Parameter] = Field(default_factory=dict)
314
382
  run_config: Dict[str, Any] = Field(default_factory=dict)
315
383
 
@@ -469,6 +537,23 @@ class BaseRunLogStore(ABC, BaseModel):
469
537
  def _context(self):
470
538
  return context.run_context
471
539
 
540
+ """
541
+ Retrieves a Job log from the database using the config and the job_id
542
+
543
+ Args:
544
+ job_id (str): The job_id of the job
545
+
546
+ Returns:
547
+ JobLog: The JobLog object identified by the job_id
548
+
549
+ Logically the method should:
550
+ * Returns the job_log defined by id from the data store defined by the config
551
+
552
+ Raises:
553
+ NotImplementedError: This is a base class and therefore has no default implementation
554
+ JobLogNotFoundError: If the job log for job_id is not found in the datastore
555
+ """
556
+
472
557
  @abstractmethod
473
558
  def create_run_log(
474
559
  self,
@@ -478,7 +563,6 @@ class BaseRunLogStore(ABC, BaseModel):
478
563
  tag: str = "",
479
564
  original_run_id: str = "",
480
565
  status: str = defaults.CREATED,
481
- **kwargs,
482
566
  ):
483
567
  """
484
568
  Creates a Run Log object by using the config
@@ -494,7 +578,7 @@ class BaseRunLogStore(ABC, BaseModel):
494
578
  raise NotImplementedError
495
579
 
496
580
  @abstractmethod
497
- def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog:
581
+ def get_run_log_by_id(self, run_id: str, full: bool = False) -> RunLog:
498
582
  """
499
583
  Retrieves a Run log from the database using the config and the run_id
500
584
 
@@ -516,7 +600,7 @@ class BaseRunLogStore(ABC, BaseModel):
516
600
  raise NotImplementedError
517
601
 
518
602
  @abstractmethod
519
- def put_run_log(self, run_log: RunLog, **kwargs):
603
+ def put_run_log(self, run_log: RunLog):
520
604
  """
521
605
  Puts the Run Log in the database as defined by the config
522
606
 
@@ -544,7 +628,7 @@ class BaseRunLogStore(ABC, BaseModel):
544
628
  run_log.status = status
545
629
  self.put_run_log(run_log)
546
630
 
547
- def get_parameters(self, run_id: str, **kwargs) -> Dict[str, Parameter]:
631
+ def get_parameters(self, run_id: str) -> Dict[str, Parameter]:
548
632
  """
549
633
  Get the parameters from the Run log defined by the run_id
550
634
 
@@ -563,7 +647,7 @@ class BaseRunLogStore(ABC, BaseModel):
563
647
  run_log = self.get_run_log_by_id(run_id=run_id)
564
648
  return run_log.parameters
565
649
 
566
- def set_parameters(self, run_id: str, parameters: Dict[str, Parameter], **kwargs):
650
+ def set_parameters(self, run_id: str, parameters: Dict[str, Parameter]):
567
651
  """
568
652
  Update the parameters of the Run log with the new parameters
569
653
 
@@ -584,7 +668,7 @@ class BaseRunLogStore(ABC, BaseModel):
584
668
  run_log.parameters.update(parameters)
585
669
  self.put_run_log(run_log=run_log)
586
670
 
587
- def get_run_config(self, run_id: str, **kwargs) -> dict:
671
+ def get_run_config(self, run_id: str) -> dict:
588
672
  """
589
673
  Given a run_id, return the run_config used to perform the run.
590
674
 
@@ -598,7 +682,7 @@ class BaseRunLogStore(ABC, BaseModel):
598
682
  run_log = self.get_run_log_by_id(run_id=run_id)
599
683
  return run_log.run_config
600
684
 
601
- def set_run_config(self, run_id: str, run_config: dict, **kwargs):
685
+ def set_run_config(self, run_id: str, run_config: dict):
602
686
  """Set the run config used to run the run_id
603
687
 
604
688
  Args:
@@ -610,7 +694,7 @@ class BaseRunLogStore(ABC, BaseModel):
610
694
  run_log.run_config.update(run_config)
611
695
  self.put_run_log(run_log=run_log)
612
696
 
613
- def create_step_log(self, name: str, internal_name: str, **kwargs):
697
+ def create_step_log(self, name: str, internal_name: str):
614
698
  """
615
699
  Create a step log by the name and internal name
616
700
 
@@ -628,7 +712,7 @@ class BaseRunLogStore(ABC, BaseModel):
628
712
  logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
629
713
  return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
630
714
 
631
- def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog:
715
+ def get_step_log(self, internal_name: str, run_id: str) -> StepLog:
632
716
  """
633
717
  Get a step log from the datastore for run_id and the internal naming of the step log
634
718
 
@@ -657,7 +741,7 @@ class BaseRunLogStore(ABC, BaseModel):
657
741
  step_log, _ = run_log.search_step_by_internal_name(internal_name)
658
742
  return step_log
659
743
 
660
- def add_step_log(self, step_log: StepLog, run_id: str, **kwargs):
744
+ def add_step_log(self, step_log: StepLog, run_id: str):
661
745
  """
662
746
  Add the step log in the run log as identified by the run_id in the datastore
663
747
 
@@ -687,7 +771,7 @@ class BaseRunLogStore(ABC, BaseModel):
687
771
  branch.steps[step_log.internal_name] = step_log
688
772
  self.put_run_log(run_log=run_log)
689
773
 
690
- def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog:
774
+ def create_branch_log(self, internal_branch_name: str) -> BranchLog:
691
775
  """
692
776
  Creates a uncommitted branch log object by the internal name given
693
777
 
@@ -704,7 +788,7 @@ class BaseRunLogStore(ABC, BaseModel):
704
788
  return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
705
789
 
706
790
  def get_branch_log(
707
- self, internal_branch_name: str, run_id: str, **kwargs
791
+ self, internal_branch_name: str, run_id: str
708
792
  ) -> Union[BranchLog, RunLog]:
709
793
  """
710
794
  Returns the branch log by the internal branch name for the run id
@@ -724,9 +808,7 @@ class BaseRunLogStore(ABC, BaseModel):
724
808
  branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
725
809
  return branch
726
810
 
727
- def add_branch_log(
728
- self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs
729
- ):
811
+ def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str):
730
812
  """
731
813
  The method should:
732
814
  # Get the run log
@@ -758,8 +840,7 @@ class BaseRunLogStore(ABC, BaseModel):
758
840
  step.branches[internal_branch_name] = branch_log # type: ignore
759
841
  self.put_run_log(run_log)
760
842
 
761
- #
762
- def create_code_identity(self, **kwargs) -> CodeIdentity:
843
+ def create_code_identity(self) -> CodeIdentity:
763
844
  """
764
845
  Creates an uncommitted Code identity class
765
846
 
@@ -769,7 +850,7 @@ class BaseRunLogStore(ABC, BaseModel):
769
850
  logger.info(f"{self.service_name} Creating Code identity")
770
851
  return CodeIdentity()
771
852
 
772
- def create_data_catalog(self, name: str, **kwargs) -> DataCatalog:
853
+ def create_data_catalog(self, name: str) -> DataCatalog:
773
854
  """
774
855
  Create a uncommitted data catalog object
775
856
 
@@ -782,6 +863,45 @@ class BaseRunLogStore(ABC, BaseModel):
782
863
  logger.info(f"{self.service_name} Creating Data Catalog for {name}")
783
864
  return DataCatalog(name=name)
784
865
 
866
+ def create_job_log(self) -> JobLog:
867
+ """
868
+ Creates a Job log and adds it to the db
869
+
870
+ Refer to BaseRunLogStore.create_job_log
871
+ """
872
+ logger.info(f"{self.service_name} Creating a Job Log and adding it to DB")
873
+ return JobLog(status=defaults.CREATED)
874
+
875
+ def get_job_log(self, run_id: str) -> JobLog:
876
+ """
877
+ Returns the run_log defined by id
878
+
879
+ Raises Exception if not found
880
+ """
881
+ logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
882
+ run_log = self.get_run_log_by_id(run_id)
883
+
884
+ try:
885
+ assert run_log.job
886
+ except AssertionError as exc:
887
+ raise exceptions.JobLogNotFoundError(run_id) from exc
888
+
889
+ return run_log.job
890
+
891
+ def add_job_log(self, run_id: str, job_log: JobLog):
892
+ """
893
+ Adds the job log to the run log
894
+
895
+ Args:
896
+ run_id (str): The run_id of the run
897
+ job_log (JobLog): The job log to add to the run log
898
+ """
899
+ logger.info(f"{self.service_name} Adding the job log to DB for: {run_id}")
900
+ run_log = self.get_run_log_by_id(run_id=run_id)
901
+ run_log.job = job_log
902
+ run_log.status = job_log.status
903
+ self.put_run_log(run_log=run_log)
904
+
785
905
 
786
906
  class BufferRunLogstore(BaseRunLogStore):
787
907
  """
@@ -804,9 +924,13 @@ class BufferRunLogstore(BaseRunLogStore):
804
924
  """
805
925
 
806
926
  service_name: str = "buffered"
927
+
807
928
  run_log: Optional[RunLog] = Field(
808
929
  default=None, exclude=True
809
930
  ) # For a buffered Run Log, this is the database
931
+ job_log: Optional[JobLog] = Field(
932
+ default=None, exclude=True
933
+ ) # For a buffered Run Log, this is the database
810
934
 
811
935
  def get_summary(self) -> Dict[str, Any]:
812
936
  summary = {"Type": self.service_name, "Location": "Not persisted"}
@@ -821,7 +945,6 @@ class BufferRunLogstore(BaseRunLogStore):
821
945
  tag: str = "",
822
946
  original_run_id: str = "",
823
947
  status: str = defaults.CREATED,
824
- **kwargs,
825
948
  ) -> RunLog:
826
949
  """
827
950
  # Creates a Run log
@@ -840,7 +963,7 @@ class BufferRunLogstore(BaseRunLogStore):
840
963
  )
841
964
  return self.run_log
842
965
 
843
- def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs):
966
+ def get_run_log_by_id(self, run_id: str, full: bool = False):
844
967
  """
845
968
  # Returns the run_log defined by id
846
969
  # Raises Exception if not found
@@ -852,7 +975,7 @@ class BufferRunLogstore(BaseRunLogStore):
852
975
 
853
976
  raise exceptions.RunLogNotFoundError(run_id)
854
977
 
855
- def put_run_log(self, run_log: RunLog, **kwargs):
978
+ def put_run_log(self, run_log: RunLog):
856
979
  """
857
980
  # Puts the run log in the db
858
981
  # Raises Exception if not found
runnable/defaults.py CHANGED
@@ -1,6 +1,11 @@
1
- from enum import Enum
2
- from typing import TypedDict # type: ignore[unused-ignore]
3
- from typing import Any, Dict, Mapping, Optional, Union
1
+ from typing import (
2
+ Any,
3
+ Dict,
4
+ Mapping,
5
+ Optional,
6
+ TypedDict, # type: ignore[unused-ignore]
7
+ Union,
8
+ )
4
9
 
5
10
  from rich.style import Style
6
11
  from typing_extensions import TypeAlias
@@ -12,16 +17,6 @@ LOGGER_NAME = "runnable"
12
17
  LOG_LEVEL = "WARNING"
13
18
 
14
19
 
15
- class EXECUTION_PLAN(Enum):
16
- """
17
- The possible execution plans for a runnable job.
18
- """
19
-
20
- CHAINED = "chained" #  121 relationship between run log and the dag.
21
- UNCHAINED = "unchained" # Only captures execution of steps, no relation.
22
- INTERACTIVE = "interactive" # used for interactive sessions
23
-
24
-
25
20
  # Type definitions
26
21
  class ServiceConfig(TypedDict):
27
22
  type: str
@@ -32,7 +27,7 @@ class RunnableConfig(TypedDict, total=False):
32
27
  run_log_store: Optional[ServiceConfig]
33
28
  secrets: Optional[ServiceConfig]
34
29
  catalog: Optional[ServiceConfig]
35
- executor: Optional[ServiceConfig]
30
+ pipeline_executor: Optional[ServiceConfig]
36
31
  pickler: Optional[ServiceConfig]
37
32
 
38
33
 
@@ -45,7 +40,6 @@ RUNNABLE_RUN_TAG = "RUNNABLE_RUN_TAG"
45
40
 
46
41
  # Interaction settings
47
42
  TRACK_PREFIX = "RUNNABLE_TRACK_"
48
- STEP_INDICATOR = "_STEP_"
49
43
  PARAMETER_PREFIX = "RUNNABLE_PRM_"
50
44
  MAP_VARIABLE = "RUNNABLE_MAP_VARIABLE"
51
45
  VARIABLE_PREFIX = "RUNNABLE_VAR_"
@@ -66,18 +60,14 @@ TRIGGERED = "TRIGGERED"
66
60
 
67
61
  # Node and Command settings
68
62
  COMMAND_TYPE = "python"
69
- NODE_SPEC_FILE = "node_spec.yaml"
70
63
  COMMAND_FRIENDLY_CHARACTER = "%"
71
- DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/runnable/"
72
- DEFAULT_CONTAINER_DATA_PATH = "data/"
73
- DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json"
74
64
 
75
65
  # Default services
76
- DEFAULT_EXECUTOR = ServiceConfig(type="local", config={})
66
+ DEFAULT_PIPELINE_EXECUTOR = ServiceConfig(type="local", config={})
67
+ DEFAULT_JOB_EXECUTOR = ServiceConfig(type="local", config={})
77
68
  DEFAULT_RUN_LOG_STORE = ServiceConfig(type="file-system", config={})
78
69
  DEFAULT_CATALOG = ServiceConfig(type="file-system", config={})
79
70
  DEFAULT_SECRETS = ServiceConfig(type="env-secrets", config={})
80
- DEFAULT_EXPERIMENT_TRACKER = ServiceConfig(type="do-nothing", config={})
81
71
  DEFAULT_PICKLER = ServiceConfig(type="pickle", config={})
82
72
 
83
73
  # Map state
@@ -109,41 +99,10 @@ COMPUTE_DATA_FOLDER = "."
109
99
  # Secrets settings
110
100
  DOTENV_FILE_LOCATION = ".env"
111
101
 
112
-
113
- # Docker settings
114
- DOCKERFILE_NAME = "Dockerfile"
115
- DOCKERFILE_CONTENT = r"""# Python 3.8 Image without Dependecies
116
- FROM python:3.8
117
-
118
- LABEL maintainer="mesanthu@gmail.com"
119
-
120
- RUN apt-get update && apt-get install -y --no-install-recommends \
121
- git \
122
- && rm -rf /var/lib/apt/lists/*
123
-
124
- ${INSTALL_STYLE}
125
-
126
- ENV VIRTUAL_ENV=/opt/venv
127
- RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV
128
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
129
-
130
- ${COPY_CONTENT}
131
- WORKDIR /app
132
-
133
- ${INSTALL_REQUIREMENTS}
134
- """
135
- GIT_ARCHIVE_NAME = "git_tracked"
136
102
  LEN_SHA_FOR_TAG = 8
137
103
 
138
-
139
- class ENTRYPOINT(Enum):
140
- """
141
- The possible container entrypoint types.
142
- """
143
-
144
- USER = "user"
145
- SYSTEM = "system"
146
-
104
+ # JOB CONFIG
105
+ DEFAULT_JOB_NAME = "job"
147
106
 
148
107
  ## Logging settings
149
108