PyPI - runnable - Versions diffs - 0.36.1__tar.gz → 0.38.0__tar.gz - Mend

runnable 0.36.1tar.gz → 0.38.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

{runnable-0.36.1 → runnable-0.38.0}/.gitignore RENAMED Viewed

@@ -157,3 +157,4 @@ cov.xml
 data/
 minikube/
+.pth # For model saving and loading

{runnable-0.36.1 → runnable-0.38.0}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,10 @@
 Metadata-Version: 2.4
 Name: runnable
-Version: 0.36.1
+Version: 0.38.0
 Summary: Add your description here
 Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
 License-File: LICENSE
 Requires-Python: >=3.10
-Requires-Dist: click-plugins>=1.1.1
-Requires-Dist: click<=8.1.3
 Requires-Dist: cloudpathlib>=0.20.0
 Requires-Dist: dill>=0.3.9
 Requires-Dist: pydantic>=2.10.3
@@ -15,7 +13,7 @@ Requires-Dist: rich>=13.9.4
 Requires-Dist: ruamel-yaml>=0.18.6
 Requires-Dist: setuptools>=75.6.0
 Requires-Dist: stevedore>=5.4.0
-Requires-Dist: typer>=0.15.1
+Requires-Dist: typer>=0.17.3
 Provides-Extra: docker
 Requires-Dist: docker>=7.1.0; extra == 'docker'
 Provides-Extra: examples

{runnable-0.36.1 → runnable-0.38.0}/extensions/catalog/any_path.py RENAMED Viewed

@@ -95,7 +95,10 @@ class AnyPathCatalog(BaseCatalog):
         return data_catalogs
     def put(
-        self, name: str, allow_file_not_found_exc: bool = False
+        self,
+        name: str,
+        allow_file_not_found_exc: bool = False,
+        store_copy: bool = True,
     ) -> List[DataCatalog]:
         """
         Put the files matching the glob pattern into the catalog.
@@ -154,7 +157,15 @@ class AnyPathCatalog(BaseCatalog):
             data_catalogs.append(data_catalog)
             # TODO: Think about syncing only if the file is changed
-            self.upload_to_catalog(file)
+            if store_copy:
+                logger.debug(
+                    f"Copying file {file} to the catalog location for run_id: {run_id}"
+                )
+                self.upload_to_catalog(file)
+            else:
+                logger.debug(
+                    f"Not copying file {file} to the catalog location for run_id: {run_id}"
+                )
         if not data_catalogs and not allow_file_not_found_exc:
             raise Exception(f"Did not find any files matching {name} in {copy_from}")

{runnable-0.36.1 → runnable-0.38.0}/extensions/job_executor/__init__.py RENAMED Viewed

@@ -29,6 +29,7 @@ class GenericJobExecutor(BaseJobExecutor):
     @property
     def _context(self):
         assert context.run_context
+        assert isinstance(context.run_context, context.JobContext)
         return context.run_context
     def _get_parameters(self) -> Dict[str, JsonParameter]:
@@ -147,7 +148,9 @@ class GenericJobExecutor(BaseJobExecutor):
         data_catalogs = []
         for name_pattern in catalog_settings:
             data_catalog = self._context.catalog.put(
-                name=name_pattern, allow_file_not_found_exc=allow_file_not_found_exc
+                name=name_pattern,
+                allow_file_not_found_exc=allow_file_not_found_exc,
+                store_copy=self._context.catalog_store_copy,
             )
             logger.debug(f"Added data catalog: {data_catalog} to job log")

{runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/__init__.py RENAMED Viewed

@@ -160,7 +160,9 @@ class GenericPipelineExecutor(BasePipelineExecutor):
             elif stage == "put":
                 data_catalog = self._context.catalog.put(
-                    name=name_pattern, allow_file_not_found_exc=allow_file_no_found_exc
+                    name=name_pattern,
+                    allow_file_not_found_exc=allow_file_no_found_exc,
+                    store_copy=node_catalog_settings.get("store_copy", True),
                 )
             else:
                 raise Exception(f"Stage {stage} not supported")

{runnable-0.36.1 → runnable-0.38.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "runnable"
-version = "0.36.1"
+version = "0.38.0"
 description = "Add your description here"
 readme = "README.md"
 authors = [
@@ -8,8 +8,6 @@ authors = [
 ]
 requires-python = ">=3.10"
 dependencies = [
-    "click-plugins>=1.1.1",
-    "click<=8.1.3",
     "pydantic>=2.10.3",
     "ruamel-yaml>=0.18.6",
     "stevedore>=5.4.0",
@@ -17,7 +15,7 @@ dependencies = [
     "dill>=0.3.9",
     "setuptools>=75.6.0",
     "python-dotenv>=1.0.1",
-    "typer>=0.15.1",
+    "typer>=0.17.3",
     "cloudpathlib>=0.20.0",
 ]
@@ -58,6 +56,9 @@ docs = [
 release = [
     "python-semantic-release>=9.15.2",
 ]
+examples-torch = [
+    "torch>=2.7.1",
+]
 [tool.uv.workspace]
 members = ["extensions/catalog",
@@ -148,7 +149,7 @@ file-system = "extensions.run_log_store.file_system:FileSystemRunLogstore"
 # Release configuration
 [tool.semantic_release]
-commit_parser = "angular"
+commit_parser = "conventional"
 major_on_zero = true
 allow_zero_version = true
 tag_format = "{version}"

{runnable-0.36.1 → runnable-0.38.0}/runnable/catalog.py RENAMED Viewed

@@ -57,7 +57,7 @@ class BaseCatalog(ABC, BaseModel):
     @abstractmethod
     def put(
-        self, name: str, allow_file_not_found_exc: bool = False
+        self, name: str, allow_file_not_found_exc: bool = False, store_copy: bool = True
     ) -> List[DataCatalog]:
         """
         Put the file by 'name' from the 'compute_data_folder' in the catalog for the run_id.
@@ -120,7 +120,10 @@ class DoNothingCatalog(BaseCatalog):
         return []
     def put(
-        self, name: str, allow_file_not_found_exc: bool = False
+        self,
+        name: str,
+        allow_file_not_found_exc: bool = False,
+        store_copy: bool = True,
     ) -> List[DataCatalog]:
         """
         Does nothing

{runnable-0.36.1 → runnable-0.38.0}/runnable/context.py RENAMED Viewed

@@ -475,6 +475,7 @@ class JobContext(RunnableContext):
         default=None,
         description="Catalog settings to be used for the job.",
     )
+    catalog_store_copy: bool = Field(default=True, alias="catalog_store_copy")
     @computed_field  # type: ignore
     @cached_property

{runnable-0.36.1 → runnable-0.38.0}/runnable/graph.py RENAMED Viewed

@@ -329,7 +329,7 @@ def create_graph(dag_config: Dict[str, Any], internal_branch_name: str = "") ->
     Returns:
         Graph: The created graph object
     """
-    description: str = dag_config.get("description", None)
+    description: str | None = dag_config.get("description", None)
     start_at: str = cast(
         str, dag_config.get("start_at")
     )  # Let the start_at be relative to the graph

{runnable-0.36.1 → runnable-0.38.0}/runnable/nodes.py RENAMED Viewed

@@ -411,11 +411,13 @@ class TraversalNode(BaseNode):
         return self.overrides.get(executor_type) or ""
+# Unfortunately, this is defined in 2 places. Look in SDK
 class CatalogStructure(BaseModel):
     model_config = ConfigDict(extra="forbid")  # Need to forbid
     get: List[str] = Field(default_factory=list)
     put: List[str] = Field(default_factory=list)
+    store_copy: bool = Field(default=True, alias="store_copy")
 class ExecutableNode(TraversalNode):

runnable-0.38.0/runnable/parameters.py ADDED Viewed

@@ -0,0 +1,215 @@
+import argparse
+import inspect
+import json
+import logging
+import os
+from typing import Any, Dict, Type
+from pydantic import BaseModel, ConfigDict
+from typing_extensions import Callable
+from runnable import defaults
+from runnable.datastore import JsonParameter, ObjectParameter
+from runnable.defaults import MapVariableType
+from runnable.utils import remove_prefix
+logger = logging.getLogger(defaults.LOGGER_NAME)
+def get_user_set_parameters(remove: bool = False) -> Dict[str, JsonParameter]:
+    """
+    Scans the environment variables for any user returned parameters that have a prefix runnable_PRM_.
+    This function does not deal with any type conversion of the parameters.
+    It just deserializes the parameters and returns them as a dictionary.
+    Args:
+        remove (bool, optional): Flag to remove the parameter if needed. Defaults to False.
+    Returns:
+        dict: The dictionary of found user returned parameters
+    """
+    parameters: Dict[str, JsonParameter] = {}
+    for env_var, value in os.environ.items():
+        if env_var.startswith(defaults.PARAMETER_PREFIX):
+            key = remove_prefix(env_var, defaults.PARAMETER_PREFIX)
+            try:
+                parameters[key.lower()] = JsonParameter(
+                    kind="json", value=json.loads(value)
+                )
+            except json.decoder.JSONDecodeError:
+                logger.warning(
+                    f"Parameter {key} could not be JSON decoded, adding the literal value"
+                )
+                parameters[key.lower()] = JsonParameter(kind="json", value=value)
+            if remove:
+                del os.environ[env_var]
+    return parameters
+def return_json_parameters(params: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Returns the parameters as a JSON serializable dictionary.
+    Args:
+        params (dict): The parameters to serialize.
+    Returns:
+        dict: The JSON serializable dictionary.
+    """
+    return_params = {}
+    for key, value in params.items():
+        if isinstance(value, ObjectParameter):
+            continue
+        return_params[key] = value.get_value()
+    return return_params
+def filter_arguments_for_func(
+    func: Callable[..., Any],
+    params: Dict[str, Any],
+    map_variable: MapVariableType = None,
+) -> Dict[str, Any]:
+    """
+    Inspects the function to be called as part of the pipeline to find the arguments of the function.
+    Matches the function arguments to the parameters available either by static parameters or by up stream steps.
+    The function "func" signature could be:
+    - def my_function(arg1: int, arg2: str, arg3: float):
+    - def my_function(arg1: int, arg2: str, arg3: float, **kwargs):
+        in this case, we would need to send in remaining keyword arguments as a dictionary.
+    - def my_function(arg1: int, arg2: str, arg3: float, args: argparse.Namespace):
+        In this case, we need to send the rest of the parameters as attributes of the args object.
+    Args:
+        func (Callable): The function to inspect
+        parameters (dict): The parameters available for the run
+    Returns:
+        dict: The parameters matching the function signature
+    """
+    function_args = inspect.signature(func).parameters
+    # Update parameters with the map variables
+    for key, v in (map_variable or {}).items():
+        params[key] = JsonParameter(kind="json", value=v)
+    bound_args = {}
+    missing_required_args: list[str] = []
+    var_keyword_param = None
+    namespace_param = None
+    # First pass: Handle regular parameters and identify special parameters
+    for name, value in function_args.items():
+        # Ignore any *args
+        if value.kind == inspect.Parameter.VAR_POSITIONAL:
+            logger.warning(f"Ignoring parameter {name} as it is VAR_POSITIONAL")
+            continue
+        # Check for **kwargs parameter
+        if value.kind == inspect.Parameter.VAR_KEYWORD:
+            var_keyword_param = name
+            continue
+        # Check for argparse.Namespace parameter
+        if value.annotation == argparse.Namespace:
+            namespace_param = name
+            continue
+        # Handle regular parameters
+        if name not in params:
+            if value.default != inspect.Parameter.empty:
+                # Default value is given in the function signature, we can use it
+                bound_args[name] = value.default
+            else:
+                # This is a required parameter that's missing
+                missing_required_args.append(name)
+        else:
+            # We have a parameter of this name, lets bind it
+            param_value = params[name]
+            if (
+                inspect.isclass(value.annotation)
+                and issubclass(value.annotation, BaseModel)
+            ) and not isinstance(param_value, ObjectParameter):
+                # Even if the annotation is a pydantic model, it can be passed as an object parameter
+                # We try to cast it as a pydantic model if asked
+                named_param = params[name].get_value()
+                if not isinstance(named_param, dict):
+                    # A case where the parameter is a one attribute model
+                    named_param = {name: named_param}
+                bound_model = bind_args_for_pydantic_model(
+                    named_param, value.annotation
+                )
+                bound_args[name] = bound_model
+            elif value.annotation in [str, int, float, bool] and callable(
+                value.annotation
+            ):
+                # Cast it if its a primitive type. Ensure the type matches the annotation.
+                try:
+                    bound_args[name] = value.annotation(params[name].get_value())
+                except (ValueError, TypeError) as e:
+                    raise ValueError(
+                        f"Cannot cast parameter '{name}' to {value.annotation.__name__}: {e}"
+                    )
+            else:
+                # We do not know type of parameter, we send the value as found
+                bound_args[name] = params[name].get_value()
+    # Find extra parameters (parameters in params but not consumed by regular function parameters)
+    consumed_param_names = set(bound_args.keys()) | set(missing_required_args)
+    extra_params = {k: v for k, v in params.items() if k not in consumed_param_names}
+    # Second pass: Handle **kwargs and argparse.Namespace parameters
+    if var_keyword_param is not None:
+        # Function accepts **kwargs - add all extra parameters directly to bound_args
+        for param_name, param_value in extra_params.items():
+            bound_args[param_name] = param_value.get_value()
+    elif namespace_param is not None:
+        # Function accepts argparse.Namespace - create namespace with extra parameters
+        args_namespace = argparse.Namespace()
+        for param_name, param_value in extra_params.items():
+            setattr(args_namespace, param_name, param_value.get_value())
+        bound_args[namespace_param] = args_namespace
+    elif extra_params:
+        # Function doesn't accept **kwargs or namespace, but we have extra parameters
+        # This should only be an error if we also have missing required parameters
+        # or if the function truly can't handle the extra parameters
+        if missing_required_args:
+            # We have both missing required and extra parameters - this is an error
+            raise ValueError(
+                f"Function {func.__name__} has parameters {missing_required_args} that are not present in the parameters"
+            )
+        # If we only have extra parameters and no missing required ones, we just ignore the extras
+        # This allows for more flexible parameter passing
+    # Check for missing required parameters
+    if missing_required_args:
+        if var_keyword_param is None and namespace_param is None:
+            # No way to handle missing parameters
+            raise ValueError(
+                f"Function {func.__name__} has parameters {missing_required_args} that are not present in the parameters"
+            )
+        # If we have **kwargs or namespace, missing parameters might be handled there
+        # But if they're truly required (no default), we should still error
+        raise ValueError(
+            f"Function {func.__name__} has parameters {missing_required_args} that are not present in the parameters"
+        )
+    return bound_args
+def bind_args_for_pydantic_model(
+    params: Dict[str, Any], model: Type[BaseModel]
+) -> BaseModel:
+    class EasyModel(model):  # type: ignore
+        model_config = ConfigDict(extra="ignore")
+    swallow_all = EasyModel(**params)
+    bound_model = model(**swallow_all.model_dump())
+    return bound_model

{runnable-0.36.1 → runnable-0.38.0}/runnable/sdk.py RENAMED Viewed

@@ -60,6 +60,7 @@ class Catalog(BaseModel):
     Attributes:
         get (List[str]): List of glob patterns to get from central catalog to the compute data folder.
         put (List[str]): List of glob patterns to put into central catalog from the compute data folder.
+        store_copy (bool): Whether to store a copy of the data in the central catalog.
     Examples:
         >>> from runnable import Catalog
@@ -74,6 +75,7 @@ class Catalog(BaseModel):
     # compute_data_folder: str = Field(default="", alias="compute_data_folder")
     get: List[str] = Field(default_factory=list, alias="get")
     put: List[str] = Field(default_factory=list, alias="put")
+    store_copy: bool = Field(default=True, alias="store_copy")
 class BaseTraversal(ABC, BaseModel):
@@ -845,6 +847,11 @@ class BaseJob(BaseModel):
             return []
         return self.catalog.put
+    def return_bool_catalog_store_copy(self) -> bool:
+        if self.catalog is None:
+            return True
+        return self.catalog.store_copy
     def _is_called_for_definition(self) -> bool:
         """
         If the run context is set, we are coming in only to get the pipeline definition.
@@ -888,6 +895,7 @@ class BaseJob(BaseModel):
         }
         run_context = context.JobContext.model_validate(configurations)
+        run_context.catalog_store_copy = self.return_bool_catalog_store_copy()
         assert isinstance(run_context.job_executor, BaseJobExecutor)

runnable-0.36.1/runnable/parameters.py DELETED Viewed

@@ -1,144 +0,0 @@
-import inspect
-import json
-import logging
-import os
-from typing import Any, Dict, Type
-import pydantic
-from pydantic import BaseModel, ConfigDict
-from typing_extensions import Callable
-from runnable import defaults
-from runnable.datastore import JsonParameter, ObjectParameter
-from runnable.defaults import MapVariableType
-from runnable.utils import remove_prefix
-logger = logging.getLogger(defaults.LOGGER_NAME)
-def get_user_set_parameters(remove: bool = False) -> Dict[str, JsonParameter]:
-    """
-    Scans the environment variables for any user returned parameters that have a prefix runnable_PRM_.
-    This function does not deal with any type conversion of the parameters.
-    It just deserializes the parameters and returns them as a dictionary.
-    Args:
-        remove (bool, optional): Flag to remove the parameter if needed. Defaults to False.
-    Returns:
-        dict: The dictionary of found user returned parameters
-    """
-    parameters: Dict[str, JsonParameter] = {}
-    for env_var, value in os.environ.items():
-        if env_var.startswith(defaults.PARAMETER_PREFIX):
-            key = remove_prefix(env_var, defaults.PARAMETER_PREFIX)
-            try:
-                parameters[key.lower()] = JsonParameter(
-                    kind="json", value=json.loads(value)
-                )
-            except json.decoder.JSONDecodeError:
-                logger.warning(
-                    f"Parameter {key} could not be JSON decoded, adding the literal value"
-                )
-                parameters[key.lower()] = JsonParameter(kind="json", value=value)
-            if remove:
-                del os.environ[env_var]
-    return parameters
-def filter_arguments_for_func(
-    func: Callable[..., Any],
-    params: Dict[str, Any],
-    map_variable: MapVariableType = None,
-) -> Dict[str, Any]:
-    """
-    Inspects the function to be called as part of the pipeline to find the arguments of the function.
-    Matches the function arguments to the parameters available either by command line or by up stream steps.
-    Args:
-        func (Callable): The function to inspect
-        parameters (dict): The parameters available for the run
-    Returns:
-        dict: The parameters matching the function signature
-    """
-    function_args = inspect.signature(func).parameters
-    # Update parameters with the map variables
-    for key, v in (map_variable or {}).items():
-        params[key] = JsonParameter(kind="json", value=v)
-    bound_args = {}
-    unassigned_params = set(params.keys())
-    # Check if VAR_KEYWORD is used, it is we send back everything
-    # If **kwargs is present in the function signature, we send back everything
-    for name, value in function_args.items():
-        if value.kind != inspect.Parameter.VAR_KEYWORD:
-            continue
-        # Found VAR_KEYWORD, we send back everything as found
-        for key, value in params.items():
-            bound_args[key] = params[key].get_value()
-        return bound_args
-    # Lets return what is asked for then!!
-    for name, value in function_args.items():
-        # Ignore any *args
-        if value.kind == inspect.Parameter.VAR_POSITIONAL:
-            logger.warning(f"Ignoring parameter {name} as it is VAR_POSITIONAL")
-            continue
-        if name not in params:
-            # No parameter of this name was provided
-            if value.default == inspect.Parameter.empty:
-                # No default value is given in the function signature. error as parameter is required.
-                raise ValueError(
-                    f"Parameter {name} is required for {func.__name__} but not provided"
-                )
-            # default value is given in the function signature, nothing further to do.
-            continue
-        param_value = params[name]
-        if type(value.annotation) in [
-            BaseModel,
-            pydantic._internal._model_construction.ModelMetaclass,
-        ] and not isinstance(param_value, ObjectParameter):
-            # Even if the annotation is a pydantic model, it can be passed as an object parameter
-            # We try to cast it as a pydantic model if asked
-            named_param = params[name].get_value()
-            if not isinstance(named_param, dict):
-                # A case where the parameter is a one attribute model
-                named_param = {name: named_param}
-            bound_model = bind_args_for_pydantic_model(named_param, value.annotation)
-            bound_args[name] = bound_model
-        elif value.annotation in [str, int, float, bool]:
-            # Cast it if its a primitive type. Ensure the type matches the annotation.
-            bound_args[name] = value.annotation(params[name].get_value())
-        else:
-            bound_args[name] = params[name].get_value()
-        unassigned_params.remove(name)
-        params = {
-            key: params[key] for key in unassigned_params
-        }  # remove keys from params if they are assigned
-    return bound_args
-def bind_args_for_pydantic_model(
-    params: Dict[str, Any], model: Type[BaseModel]
-) -> BaseModel:
-    class EasyModel(model):  # type: ignore
-        model_config = ConfigDict(extra="ignore")
-    swallow_all = EasyModel(**params)
-    bound_model = model(**swallow_all.model_dump())
-    return bound_model