PyPI - runnable - Versions diffs - 0.18.0__tar.gz → 0.19.0__tar.gz - Mend

runnable 0.18.0tar.gz → 0.19.0tar.gz

Files changed (58) hide show

{runnable-0.18.0 → runnable-0.19.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: runnable
-Version: 0.18.0
+Version: 0.19.0
 Summary: Add your description here
 Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
 License-File: LICENSE

{runnable-0.18.0 → runnable-0.19.0}/extensions/job_executor/k8s.py RENAMED Viewed

@@ -101,23 +101,40 @@ class HostPath(BaseModel):
     path: str
-class Volume(BaseModel):
+class HostPathVolume(BaseModel):
     name: str
     host_path: HostPath
-class TemplateSpec(BaseModel):
+class PVCClaim(BaseModel):
+    claim_name: str
+    model_config = ConfigDict(
+        alias_generator=to_camel,
+        populate_by_name=True,
+        from_attributes=True,
+    )
+class PVCVolume(BaseModel):
+    name: str
+    persistent_volume_claim: PVCClaim
+class K8sTemplateSpec(BaseModel):
     active_deadline_seconds: int = Field(default=60 * 60 * 2)  # 2 hours
     node_selector: Optional[dict[str, str]] = None
     tolerations: Optional[list[dict[str, str]]] = None
-    volumes: Optional[list[Volume]] = Field(default_factory=lambda: [])
+    volumes: Optional[list[HostPathVolume | PVCVolume]] = Field(
+        default_factory=lambda: []
+    )
     service_account_name: Optional[str] = "default"
     restart_policy: RestartPolicy = RestartPolicy.NEVER
     container: Container
-class Template(BaseModel):
-    spec: TemplateSpec
+class K8sTemplate(BaseModel):
+    spec: K8sTemplateSpec
     metadata: Optional[ObjectMetaData] = None
@@ -125,32 +142,25 @@ class Spec(BaseModel):
     active_deadline_seconds: Optional[int] = Field(default=60 * 60 * 2)  # 2 hours
     backoff_limit: int = 6
     selector: Optional[LabelSelector] = None
-    template: Template
+    template: K8sTemplate
     ttl_seconds_after_finished: Optional[int] = Field(default=60 * 60 * 24)  # 24 hours
-class K8sJobExecutor(GenericJobExecutor):
+class GenericK8sJobExecutor(GenericJobExecutor):
     service_name: str = "k8s-job"
     config_path: Optional[str] = None
     job_spec: Spec
     mock: bool = False
-    # The location the mount of .run_log_store is mounted to in minikube
-    # ensure that minikube mount $HOME/workspace/runnable/.run_log_store:/volume/run_logs is executed first
-    # $HOME/workspace/runnable/.catalog:/volume/catalog
-    # Ensure that the docker build is done with eval $(minikube docker-env)
-    mini_k8s_run_log_location: str = Field(default="/volume/run_logs/")
-    mini_k8s_catalog_location: str = Field(default="/volume/catalog/")
+    namespace: str = Field(default="default")
     _is_local: bool = PrivateAttr(default=False)
+    _volume_mounts: list[VolumeMount] = PrivateAttr(default_factory=lambda: [])
+    _volumes: list[HostPathVolume | PVCVolume] = PrivateAttr(default_factory=lambda: [])
     _container_log_location: str = PrivateAttr(default="/tmp/run_logs/")
     _container_catalog_location: str = PrivateAttr(default="/tmp/catalog/")
     _container_secrets_location: str = PrivateAttr(default="/tmp/dotenv")
-    _volumes: list[Volume] = []
-    _volume_mounts: list[VolumeMount] = []
     model_config = ConfigDict(
         alias_generator=to_camel,
         populate_by_name=True,
@@ -287,14 +297,17 @@ class K8sJobExecutor(GenericJobExecutor):
         )
         logger.info(f"Submitting job: {job.__dict__}")
+        if self.mock:
+            print(job.__dict__)
+            return
         try:
             k8s_batch = self._client.BatchV1Api()
             response = k8s_batch.create_namespaced_job(
                 body=job,
-                namespace="default",
                 _preload_content=False,
                 pretty=True,
+                namespace=self.namespace,
             )
             logger.debug(f"Kubernetes job response: {response}")
         except Exception as e:
@@ -302,6 +315,43 @@ class K8sJobExecutor(GenericJobExecutor):
             print(e)
             raise
+    def _create_volumes(self): ...
+    def _use_volumes(self):
+        match self._context.run_log_store.service_name:
+            case "file-system":
+                self._context.run_log_store.log_folder = self._container_log_location
+            case "chunked-fs":
+                self._context.run_log_store.log_folder = self._container_log_location
+        match self._context.catalog_handler.service_name:
+            case "file-system":
+                self._context.catalog_handler.catalog_location = (
+                    self._container_catalog_location
+                )
+class MiniK8sJobExecutor(GenericK8sJobExecutor):
+    service_name: str = "k8s-job"
+    config_path: Optional[str] = None
+    job_spec: Spec
+    mock: bool = False
+    # The location the mount of .run_log_store is mounted to in minikube
+    # ensure that minikube mount $HOME/workspace/runnable/.run_log_store:/volume/run_logs is executed first
+    # $HOME/workspace/runnable/.catalog:/volume/catalog
+    # Ensure that the docker build is done with eval $(minikube docker-env)
+    mini_k8s_run_log_location: str = Field(default="/volume/run_logs/")
+    mini_k8s_catalog_location: str = Field(default="/volume/catalog/")
+    _is_local: bool = PrivateAttr(default=False)
+    model_config = ConfigDict(
+        alias_generator=to_camel,
+        populate_by_name=True,
+        from_attributes=True,
+    )
     def _create_volumes(self):
         match self._context.run_log_store.service_name:
             case "file-system":
@@ -311,7 +361,7 @@ class K8sJobExecutor(GenericJobExecutor):
                     # You then are creating a volume that is mounted to /tmp/run_logs in the container
                     # You are then referring to it.
                     # https://stackoverflow.com/questions/57411456/minikube-mounted-host-folders-are-not-working
-                    Volume(
+                    HostPathVolume(
                         name="run-logs",
                         host_path=HostPath(path=self.mini_k8s_run_log_location),
                     )
@@ -323,7 +373,7 @@ class K8sJobExecutor(GenericJobExecutor):
                 )
             case "chunked-fs":
                 self._volumes.append(
-                    Volume(
+                    HostPathVolume(
                         name="run-logs",
                         host_path=HostPath(path=self.mini_k8s_run_log_location),
                     )
@@ -337,7 +387,7 @@ class K8sJobExecutor(GenericJobExecutor):
         match self._context.catalog_handler.service_name:
             case "file-system":
                 self._volumes.append(
-                    Volume(
+                    HostPathVolume(
                         name="catalog",
                         host_path=HostPath(path=self.mini_k8s_catalog_location),
                     )
@@ -348,15 +398,87 @@ class K8sJobExecutor(GenericJobExecutor):
                     )
                 )
-    def _use_volumes(self):
+class K8sJobExecutor(GenericK8sJobExecutor):
+    service_name: str = "k8s-job"
+    config_path: Optional[str] = None
+    job_spec: Spec
+    mock: bool = False
+    pvc_claim_name: str
+    # change the spec to pull image if not present
+    def model_post_init(self, __context):
+        self.job_spec.template.spec.container.image_pull_policy = ImagePullPolicy.ALWAYS
+    _is_local: bool = PrivateAttr(default=False)
+    model_config = ConfigDict(
+        alias_generator=to_camel,
+        populate_by_name=True,
+        from_attributes=True,
+    )
+    def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
+        self._use_volumes()
+        self._set_up_run_log()
+        job_log = self._context.run_log_store.create_job_log()
+        self._context.run_log_store.add_job_log(
+            run_id=self._context.run_id, job_log=job_log
+        )
+        job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
+        attempt_log = job.execute_command(
+            attempt_number=self.step_attempt_number,
+            mock=self.mock,
+        )
+        job_log.status = attempt_log.status
+        job_log.attempts.append(attempt_log)
+        data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
+            catalog_settings=catalog_settings
+        )
+        logger.debug(f"data_catalogs_put: {data_catalogs_put}")
+        job_log.add_data_catalogs(data_catalogs_put or [])
+        console.print("Summary of job")
+        console.print(job_log.get_summary())
+        self._context.run_log_store.add_job_log(
+            run_id=self._context.run_id, job_log=job_log
+        )
+    def _create_volumes(self):
+        self._volumes.append(
+            PVCVolume(
+                name=self.pvc_claim_name,
+                persistent_volume_claim=PVCClaim(claim_name=self.pvc_claim_name),
+            )
+        )
         match self._context.run_log_store.service_name:
             case "file-system":
-                self._context.run_log_store.log_folder = self._container_log_location
+                self._volume_mounts.append(
+                    VolumeMount(
+                        name=self.pvc_claim_name,
+                        mount_path=self._container_log_location,
+                    )
+                )
             case "chunked-fs":
-                self._context.run_log_store.log_folder = self._container_log_location
+                self._volume_mounts.append(
+                    VolumeMount(
+                        name=self.pvc_claim_name,
+                        mount_path=self._container_log_location,
+                    )
+                )
         match self._context.catalog_handler.service_name:
             case "file-system":
-                self._context.catalog_handler.catalog_location = (
-                    self._container_catalog_location
+                self._volume_mounts.append(
+                    VolumeMount(
+                        name=self.pvc_claim_name,
+                        mount_path=self._container_catalog_location,
+                    )
                 )

{runnable-0.18.0 → runnable-0.19.0}/extensions/pipeline_executor/local_container.py RENAMED Viewed

@@ -268,7 +268,6 @@ class LocalContainerExecutor(GenericPipelineExecutor):
                     f"Please provide a docker_image using executor_config of the step {node.name} or at global config"
                 )
-            # TODO: Should consider using getpass.getuser() when running the docker container? Volume permissions
             container = client.containers.create(
                 image=docker_image,
                 command=command,

{runnable-0.18.0 → runnable-0.19.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "runnable"
-version = "0.18.0"
+version = "0.19.0"
 description = "Add your description here"
 readme = "README.md"
 authors = [
@@ -98,6 +98,7 @@ include = [
 [project.entry-points.'job_executor']
 "local" = "extensions.job_executor.local:LocalJobExecutor"
 "local-container" = "extensions.job_executor.local_container:LocalContainerJobExecutor"
+"mini-k8s-job" = "extensions.job_executor.k8s:MiniK8sJobExecutor"
 "k8s-job" = "extensions.job_executor.k8s:K8sJobExecutor"
 # "argo" = "extensions.pipeline_executor.argo:ArgoExecutor"
 # "mocked" = "extensions.pipeline_executor.mocked:MockedExecutor"

{runnable-0.18.0 → runnable-0.19.0}/runnable/__init__.py RENAMED Viewed

@@ -1,5 +1,6 @@
 # ruff: noqa
 import logging
 import os
 from logging.config import dictConfig

{runnable-0.18.0 → runnable-0.19.0}/runnable/catalog.py RENAMED Viewed

@@ -10,8 +10,6 @@ from runnable.datastore import DataCatalog
 logger = logging.getLogger(defaults.LOGGER_NAME)
-# TODO: Should ** be allowed as glob pattern as it can potentially copy everything to catalog
 def is_catalog_out_of_sync(
     catalog, synced_catalogs=Optional[List[DataCatalog]]
@@ -170,3 +168,4 @@ class DoNothingCatalog(BaseCatalog):
         Does nothing
         """
         logger.info("Using a do-nothing catalog, doing nothing while sync between runs")
+        logger.info("Using a do-nothing catalog, doing nothing while sync between runs")

{runnable-0.18.0 → runnable-0.19.0}/runnable/entrypoints.py RENAMED Viewed

@@ -16,9 +16,6 @@ from runnable.executor import BaseJobExecutor, BasePipelineExecutor
 logger = logging.getLogger(defaults.LOGGER_NAME)
-print("")  # removes the buffer print
 def get_default_configs() -> RunnableConfig:
     """
     User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
@@ -128,11 +125,10 @@ def prepare_configurations(
             "job-executor", None
         )  # type: ignore
         if not job_executor_config:
-            executor_config = cast(
+            job_executor_config = cast(
                 ServiceConfig,
                 runnable_defaults.get("job-executor", defaults.DEFAULT_JOB_EXECUTOR),
             )
         assert job_executor_config, "Job executor is not provided"
         configured_executor = utils.get_provider_by_name_and_type(
             "job_executor", job_executor_config

{runnable-0.18.0 → runnable-0.19.0}/runnable/executor.py RENAMED Viewed

@@ -11,9 +11,9 @@ import runnable.context as context
 from runnable import defaults
 from runnable.datastore import DataCatalog, JobLog, StepLog
 from runnable.defaults import TypeMapVariable
-from runnable.graph import Graph
 if TYPE_CHECKING:  # pragma: no cover
+    from runnable.graph import Graph
     from runnable.nodes import BaseNode
     from runnable.tasks import BaseTaskType

{runnable-0.18.0 → runnable-0.19.0}/runnable/parameters.py RENAMED Viewed

@@ -15,8 +15,6 @@ from runnable.utils import remove_prefix
 logger = logging.getLogger(defaults.LOGGER_NAME)
-# TODO: Revisit this, it might be a bit too complicated than required
 def get_user_set_parameters(remove: bool = False) -> Dict[str, JsonParameter]:
     """
@@ -50,13 +48,6 @@ def get_user_set_parameters(remove: bool = False) -> Dict[str, JsonParameter]:
     return parameters
-def serialize_parameter_as_str(value: Any) -> str:
-    if isinstance(value, BaseModel):
-        return json.dumps(value.model_dump())
-    return json.dumps(value)
 def filter_arguments_for_func(
     func: Callable[..., Any],
     params: Dict[str, Any],

{runnable-0.18.0 → runnable-0.19.0}/runnable/utils.py RENAMED Viewed

@@ -17,7 +17,7 @@ from ruamel.yaml import YAML
 from stevedore import driver
 import runnable.context as context
-from runnable import defaults, names
+from runnable import console, defaults, names
 from runnable.defaults import TypeMapVariable
 if TYPE_CHECKING:  # pragma: no cover
@@ -176,7 +176,7 @@ def is_a_git_repo() -> bool:
         logger.info("Found the code to be git versioned")
         return True
     except BaseException:  # pylint: disable=W0702
-        logger.error("No git repo found, unsafe hash")
+        console.print("Not a git repo", style="bold red")
     return False
@@ -195,27 +195,7 @@ def get_current_code_commit() -> Union[str, None]:
         logger.info("Found the git commit to be: %s", label)
         return label
     except BaseException:  # pylint: disable=W0702
-        logger.exception("Error getting git hash")
-        raise
-def archive_git_tracked(name: str):
-    """Generate a git archive of the tracked files.
-    Args:
-        name (str): The name to give the archive
-    Raises:
-        Exception: If its not a git repo
-    """
-    command = f"git archive -v -o {name}.tar.gz --format=tar.gz HEAD"
-    if not is_a_git_repo():
-        raise Exception("Not a git repo")
-    try:
-        subprocess.check_output(command.split()).strip().decode("utf-8")
-    except BaseException:  # pylint: disable=W0702
-        logger.exception("Error archiving repo")
+        console.print("Not a git repo, error getting hash", style="bold red")
         raise
@@ -234,7 +214,7 @@ def is_git_clean() -> Tuple[bool, Union[None, str]]:
             return True, None
         return False, label
     except BaseException:  # pylint: disable=W0702
-        logger.exception("Error checking if the code is git clean")
+        console.print("Not a git repo, not clean", style="bold red")
     return False, None
@@ -253,7 +233,7 @@ def get_git_remote() -> Union[str, None]:
         logger.info("Found the git remote to be: %s", label)
         return label
     except BaseException:  # pylint: disable=W0702
-        logger.exception("Error getting git remote")
+        console.print("Not a git repo, no remote", style="bold red")
         raise