PyPI - runnable - Versions diffs - 0.14.0__py3-none-any.whl → 0.17.0__py3-none-any.whl - Mend

runnable 0.14.0py3-none-any.whl → 0.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

runnable/__init__.py +1 -1
runnable/catalog.py +2 -0
runnable/cli.py +264 -307
runnable/context.py +12 -3
runnable/datastore.py +159 -25
runnable/defaults.py +13 -54
runnable/entrypoints.py +197 -185
runnable/exceptions.py +22 -0
runnable/executor.py +114 -88
runnable/graph.py +0 -1
runnable/nodes.py +36 -6
runnable/sdk.py +132 -36
runnable/tasks.py +6 -15
runnable/utils.py +22 -30
{runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/METADATA +6 -3
runnable-0.17.0.dist-info/RECORD +23 -0
{runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/entry_points.txt +12 -7
runnable/integration.py +0 -197
runnable-0.14.0.dist-info/RECORD +0 -24
{runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/WHEEL +0 -0
{runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/licenses/LICENSE +0 -0

runnable/entrypoints.py CHANGED Viewed

@@ -9,12 +9,16 @@ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
 from rich.table import Column
 import runnable.context as context
-from runnable import console, defaults, graph, task_console, utils
+from runnable import console, defaults, graph, task_console, tasks, utils
 from runnable.defaults import RunnableConfig, ServiceConfig
+from runnable.executor import BaseJobExecutor, BasePipelineExecutor
 logger = logging.getLogger(defaults.LOGGER_NAME)
+print("")  # removes the buffer print
 def get_default_configs() -> RunnableConfig:
     """
     User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
@@ -29,10 +33,9 @@ def get_default_configs() -> RunnableConfig:
 def prepare_configurations(
     run_id: str,
     configuration_file: str = "",
-    pipeline_file: str = "",
     tag: str = "",
     parameters_file: str = "",
-    force_local_executor: bool = False,
+    is_job: bool = False,
 ) -> context.Context:
     """
     Sets up everything needed
@@ -42,7 +45,6 @@ def prepare_configurations(
     Args:
         variables_file (str): The variables file, if used or None
-        pipeline_file (str): The config/dag file
         run_id (str): The run id of the run.
         tag (str): If a tag is provided at the run time
@@ -59,22 +61,26 @@ def prepare_configurations(
     )
     if configuration_file:
-        templated_configuration = utils.load_yaml(configuration_file) or {}
+        templated_configuration = utils.load_yaml(configuration_file)
+    # apply variables
+    configuration = cast(
+        RunnableConfig, utils.apply_variables(templated_configuration, variables)
+    )
     # Since all the services (run_log_store, catalog, secrets, executor) are
     # dynamically loaded via stevedore, we cannot validate the configuration
     # before they are passed to the service.
-    configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
     logger.info(f"Resolved configurations: {configuration}")
     # Run log settings, configuration over-rides everything
-    run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
+    # The user config has run-log-store while internally we use run_log_store
+    run_log_config: Optional[ServiceConfig] = configuration.get("run-log-store", None)  # type: ignore
     if not run_log_config:
         run_log_config = cast(
             ServiceConfig,
-            runnable_defaults.get("run_log_store", defaults.DEFAULT_RUN_LOG_STORE),
+            runnable_defaults.get("run-log-store", defaults.DEFAULT_RUN_LOG_STORE),
         )
     run_log_store = utils.get_provider_by_name_and_type("run_log_store", run_log_config)
@@ -100,18 +106,37 @@ def prepare_configurations(
     )
     pickler_handler = utils.get_provider_by_name_and_type("pickler", pickler_config)
-    # executor configurations, configuration over rides everything
-    executor_config: Optional[ServiceConfig] = configuration.get("executor", None)
-    if force_local_executor:
-        executor_config = ServiceConfig(type="local", config={})
+    if not is_job:
+        # executor configurations, configuration over rides everything
+        executor_config: Optional[ServiceConfig] = configuration.get(
+            "pipeline-executor", None
+        )  # type: ignore
+        # as pipeline-executor is not a valid key
+        if not executor_config:
+            executor_config = cast(
+                ServiceConfig,
+                runnable_defaults.get(
+                    "pipeline-executor", defaults.DEFAULT_PIPELINE_EXECUTOR
+                ),
+            )
+        configured_executor = utils.get_provider_by_name_and_type(
+            "pipeline_executor", executor_config
+        )
+    else:
+        # executor configurations, configuration over rides everything
+        job_executor_config: Optional[ServiceConfig] = configuration.get(
+            "job-executor", None
+        )  # type: ignore
+        if not job_executor_config:
+            executor_config = cast(
+                ServiceConfig,
+                runnable_defaults.get("job-executor", defaults.DEFAULT_JOB_EXECUTOR),
+            )
-    if not executor_config:
-        executor_config = cast(
-            ServiceConfig, runnable_defaults.get("executor", defaults.DEFAULT_EXECUTOR)
+        assert job_executor_config, "Job executor is not provided"
+        configured_executor = utils.get_provider_by_name_and_type(
+            "job_executor", job_executor_config
         )
-    configured_executor = utils.get_provider_by_name_and_type(
-        "executor", executor_config
-    )
     # Construct the context
     run_context = context.Context(
@@ -127,38 +152,45 @@ def prepare_configurations(
         parameters_file=parameters_file,
     )
-    if pipeline_file:
-        if pipeline_file.endswith(".py"):
-            # converting a pipeline defined in python to a dag in yaml
-            module_file = pipeline_file.strip(".py")
-            module, func = utils.get_module_and_attr_names(module_file)
-            sys.path.insert(0, os.getcwd())  # Need to add the current directory to path
-            imported_module = importlib.import_module(module)
+    context.run_context = run_context
-            os.environ["RUNNABLE_PY_TO_YAML"] = "true"
-            dag = getattr(imported_module, func)().return_dag()
+    return run_context
-        else:
-            pipeline_config = utils.load_yaml(pipeline_file)
-            logger.info("The input pipeline:")
-            logger.info(json.dumps(pipeline_config, indent=4))
+def set_pipeline_spec_from_yaml(run_context: context.Context, pipeline_file: str):
+    """
+    Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
+    """
+    pipeline_config = utils.load_yaml(pipeline_file)
+    logger.info("The input pipeline:")
+    logger.info(json.dumps(pipeline_config, indent=4))
-            dag_config = pipeline_config["dag"]
+    dag_config = pipeline_config["dag"]
-            dag_hash = utils.get_dag_hash(dag_config)
-            dag = graph.create_graph(dag_config)
-            run_context.dag_hash = dag_hash
+    dag_hash = utils.get_dag_hash(dag_config)
+    dag = graph.create_graph(dag_config)
+    run_context.dag_hash = dag_hash
-        run_context.pipeline_file = pipeline_file
-        run_context.dag = dag
+    run_context.pipeline_file = pipeline_file
+    run_context.dag = dag
-    context.run_context = run_context
-    return run_context
+def set_pipeline_spec_from_python(run_context: context.Context, python_module: str):
+    # Call the SDK to get the dag
+    # Import the module and call the function to get the dag
+    module_file = python_module.strip(".py")
+    module, func = utils.get_module_and_attr_names(module_file)
+    sys.path.insert(0, os.getcwd())  # Need to add the current directory to path
+    imported_module = importlib.import_module(module)
+    run_context.from_sdk = True
+    dag = getattr(imported_module, func)().return_dag()
+    run_context.pipeline_file = python_module
+    run_context.dag = dag
-def execute(
+def execute_pipeline_yaml_spec(
     pipeline_file: str,
     configuration_file: str = "",
     tag: str = "",
@@ -167,39 +199,35 @@ def execute(
 ):
     # pylint: disable=R0914,R0913
     """
-    The entry point to runnable execution. This method would prepare the configurations and delegates traversal to the
-    executor
-    Args:
-        pipeline_file (str): The config/dag file
-        run_id (str): The run id of the run.
-        tag (str): If a tag is provided at the run time
-        parameters_file (str): The parameters being sent in to the application
+    The entry point to runnable execution for any YAML based spec.
+    The result could:
+        - Execution of the pipeline if its local executor
+        - Rendering of the spec in the case of non local executor
     """
     run_id = utils.generate_run_id(run_id=run_id)
     run_context = prepare_configurations(
         configuration_file=configuration_file,
-        pipeline_file=pipeline_file,
         run_id=run_id,
         tag=tag,
         parameters_file=parameters_file,
     )
-    console.print("Working with context:")
-    console.print(run_context)
-    console.rule(style="[dark orange]")
+    assert isinstance(run_context.executor, BasePipelineExecutor)
+    set_pipeline_spec_from_yaml(run_context, pipeline_file)
     executor = run_context.executor
-    run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
     utils.set_runnable_environment_variables(
         run_id=run_id, configuration_file=configuration_file, tag=tag
     )
     # Prepare for graph execution
-    executor.prepare_for_graph_execution()
+    executor._set_up_run_log(exists_ok=False)
+    console.print("Working with context:")
+    console.print(run_context)
+    console.rule(style="[dark orange]")
     logger.info(f"Executing the graph: {run_context.dag}")
     with Progress(
@@ -218,8 +246,8 @@ def execute(
             run_context.progress = progress
             executor.execute_graph(dag=run_context.dag)  # type: ignore
-            # Non local executors have no run logs
-            if not executor._local:
+            if not executor._is_local:
+                # Non local executors only traverse the graph and do not execute the nodes
                 executor.send_return_code(stage="traversal")
                 return
@@ -259,24 +287,18 @@ def execute_single_node(
     pipeline_file: str,
     step_name: str,
     map_variable: str,
+    mode: str,
     run_id: str,
     tag: str = "",
     parameters_file: str = "",
 ):
     """
-    The entry point into executing a single node of runnable. Orchestration modes should extensively use this
-    entry point.
-    It should have similar set up of configurations to execute because orchestrator modes can initiate the execution.
-    Args:
-        variables_file (str): The variables file, if used or None
-        step_name : The name of the step to execute in dot path convention
-        pipeline_file (str): The config/dag file
-        run_id (str): The run id of the run.
-        tag (str): If a tag is provided at the run time
-        parameters_file (str): The parameters being sent in to the application
+    This entry point is triggered during the execution of the pipeline
+        - non local execution environments
+    The mode defines how the pipeline spec is provided to the runnable
+        - yaml
+        - python
     """
     from runnable import nodes
@@ -290,30 +312,30 @@ def execute_single_node(
     run_context = prepare_configurations(
         configuration_file=configuration_file,
-        pipeline_file=pipeline_file,
         run_id=run_id,
         tag=tag,
         parameters_file=parameters_file,
     )
+    assert isinstance(run_context.executor, BasePipelineExecutor)
+    if mode == "yaml":
+        # Load the yaml file
+        set_pipeline_spec_from_yaml(run_context, pipeline_file)
+    elif mode == "python":
+        # Call the SDK to get the dag
+        set_pipeline_spec_from_python(run_context, pipeline_file)
+    assert run_context.dag
     task_console.print("Working with context:")
     task_console.print(run_context)
     task_console.rule(style="[dark orange]")
     executor = run_context.executor
-    run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
     utils.set_runnable_environment_variables(
         run_id=run_id, configuration_file=configuration_file, tag=tag
     )
-    executor.prepare_for_node_execution()
-    # TODO: may be make its own entry point
-    # if not run_context.dag:
-    #     # There are a few entry points that make graph dynamically and do not have a dag defined statically.
-    #     run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_id, full=False)
-    #     run_context.dag = graph.create_graph(run_log.run_config["pipeline"])
-    assert run_context.dag
     map_variable_dict = utils.json_to_ordered_dict(map_variable)
     step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
@@ -322,7 +344,7 @@ def execute_single_node(
     )
     logger.info("Executing the single node of : %s", node_to_execute)
-    ## This step is where we save the log file
+    ## This step is where we save output of the function/shell command
     try:
         executor.execute_node(node=node_to_execute, map_variable=map_variable_dict)
     finally:
@@ -336,23 +358,15 @@ def execute_single_node(
         run_context.catalog_handler.put(name=log_file_name, run_id=run_context.run_id)
         os.remove(log_file_name)
-    # executor.send_return_code(stage="execution")
-def execute_notebook(
-    entrypoint: str,
-    notebook_file: str,
-    catalog_config: dict,
-    configuration_file: str,
-    notebook_output_path: str = "",
+def execute_job_yaml_spec(
+    job_definition_file: str,
+    configuration_file: str = "",
     tag: str = "",
     run_id: str = "",
     parameters_file: str = "",
 ):
-    """
-    The entry point to runnable execution of a notebook. This method would prepare the configurations and
-    delegates traversal to the executor
-    """
+    # A job and task are internally the same.
     run_id = utils.generate_run_id(run_id=run_id)
     run_context = prepare_configurations(
@@ -360,71 +374,87 @@ def execute_notebook(
         run_id=run_id,
         tag=tag,
         parameters_file=parameters_file,
+        is_job=True,
     )
+    assert isinstance(run_context.executor, BaseJobExecutor)
     executor = run_context.executor
-    run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
     utils.set_runnable_environment_variables(
         run_id=run_id, configuration_file=configuration_file, tag=tag
     )
+    run_context.job_definition_file = job_definition_file
+    job_config = utils.load_yaml(job_definition_file)
+    logger.info(
+        "Executing the job from the user."
+        f"job definition: {job_definition_file}, config: {job_config}"
+    )
+    assert job_config.get("type"), "Job type is not provided"
     console.print("Working with context:")
     console.print(run_context)
     console.rule(style="[dark orange]")
-    step_config = {
-        "command": notebook_file,
-        "command_type": "notebook",
-        "notebook_output_path": notebook_output_path,
-        "type": "task",
-        "next": "success",
-        "catalog": catalog_config,
-    }
-    node = graph.create_node(name="executing job", step_config=step_config)
-    if entrypoint == defaults.ENTRYPOINT.USER.value:
-        # Prepare for graph execution
-        executor.prepare_for_graph_execution()
-        logger.info(
-            "Executing the job from the user. We are still in the caller's compute environment"
-        )
-        executor.execute_job(node=node)
+    # A hack where we create a task node and get our job/catalog settings
+    catalog_config: list[str] = job_config.pop("catalog", {})
-    elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value:
-        executor.prepare_for_node_execution()
-        logger.info(
-            "Executing the job from the system. We are in the config's compute environment"
-        )
-        executor.execute_node(node=node)
+    # rename the type to command_type of task
+    job_config["command_type"] = job_config.pop("type")
+    job = tasks.create_task(job_config)
-        # Update the status of the run log
-        step_log = run_context.run_log_store.get_step_log(
-            node._get_step_log_name(), run_id
-        )
-        run_context.run_log_store.update_run_log_status(
-            run_id=run_id, status=step_log.status
-        )
+    logger.info(
+        "Executing the job from the user. We are still in the caller's compute environment"
+    )
-    else:
-        raise ValueError(f"Invalid entrypoint {entrypoint}")
+    assert isinstance(executor, BaseJobExecutor)
+    executor.submit_job(job, catalog_settings=catalog_config)
     executor.send_return_code()
-def execute_function(
-    entrypoint: str,
-    command: str,
-    catalog_config: dict,
-    configuration_file: str,
+def set_job_spec_from_yaml(run_context: context.Context, job_definition_file: str):
+    """
+    Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
+    """
+    job_config = utils.load_yaml(job_definition_file)
+    logger.info("The input job definition file:")
+    logger.info(json.dumps(job_config, indent=4))
+    catalog_config: list[str] = job_config.pop("catalog", {})
+    job_config["command_type"] = job_config.pop("type")
+    run_context.job_definition_file = job_definition_file
+    run_context.job = tasks.create_task(job_config)
+    run_context.job_catalog_settings = catalog_config
+def set_job_spec_from_python(run_context: context.Context, python_module: str):
+    # Import the module and call the function to get the task
+    module_file = python_module.strip(".py")
+    module, func = utils.get_module_and_attr_names(module_file)
+    sys.path.insert(0, os.getcwd())  # Need to add the current directory to path
+    imported_module = importlib.import_module(module)
+    run_context.from_sdk = True
+    task = getattr(imported_module, func)().return_task()
+    catalog_settings = getattr(imported_module, func)().return_catalog_settings()
+    run_context.job_definition_file = python_module
+    run_context.job = task
+    run_context.job_catalog_settings = catalog_settings
+def execute_job_non_local(
+    job_definition_file: str,
+    configuration_file: str = "",
+    mode: str = "yaml",
     tag: str = "",
     run_id: str = "",
     parameters_file: str = "",
 ):
-    """
-    The entry point to runnable execution of a function. This method would prepare the configurations and
-    delegates traversal to the executor
-    """
     run_id = utils.generate_run_id(run_id=run_id)
     run_context = prepare_configurations(
@@ -432,57 +462,33 @@ def execute_function(
         run_id=run_id,
         tag=tag,
         parameters_file=parameters_file,
+        is_job=True,
     )
-    executor = run_context.executor
+    assert isinstance(run_context.executor, BaseJobExecutor)
-    run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
-    utils.set_runnable_environment_variables(
-        run_id=run_id, configuration_file=configuration_file, tag=tag
-    )
+    if mode == "yaml":
+        # Load the yaml file
+        set_job_spec_from_yaml(run_context, job_definition_file)
+    elif mode == "python":
+        # Call the SDK to get the task
+        set_job_spec_from_python(run_context, job_definition_file)
+    assert run_context.job
     console.print("Working with context:")
     console.print(run_context)
     console.rule(style="[dark orange]")
-    # Prepare the graph with a single node
-    step_config = {
-        "command": command,
-        "command_type": "python",
-        "type": "task",
-        "next": "success",
-        "catalog": catalog_config,
-    }
-    node = graph.create_node(name="executing job", step_config=step_config)
-    if entrypoint == defaults.ENTRYPOINT.USER.value:
-        # Prepare for graph execution
-        executor.prepare_for_graph_execution()
-        logger.info(
-            "Executing the job from the user. We are still in the caller's compute environment"
-        )
-        executor.execute_job(node=node)
-    elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value:
-        executor.prepare_for_node_execution()
-        logger.info(
-            "Executing the job from the system. We are in the config's compute environment"
-        )
-        executor.execute_node(node=node)
-        # Update the status of the run log
-        step_log = run_context.run_log_store.get_step_log(
-            node._get_step_log_name(), run_id
-        )
-        run_context.run_log_store.update_run_log_status(
-            run_id=run_id, status=step_log.status
-        )
+    logger.info(
+        "Executing the job from the user. We are still in the caller's compute environment"
+    )
-    else:
-        raise ValueError(f"Invalid entrypoint {entrypoint}")
+    run_context.executor.execute_job(
+        run_context.job, catalog_settings=run_context.job_catalog_settings
+    )
-    executor.send_return_code()
+    run_context.executor.send_return_code()
 def fan(
@@ -518,23 +524,29 @@ def fan(
     run_context = prepare_configurations(
         configuration_file=configuration_file,
-        pipeline_file=pipeline_file,
         run_id=run_id,
         tag=tag,
         parameters_file=parameters_file,
     )
+    assert isinstance(run_context.executor, BasePipelineExecutor)
+    if mode == "yaml":
+        # Load the yaml file
+        set_pipeline_spec_from_yaml(run_context, pipeline_file)
+    elif mode == "python":
+        # Call the SDK to get the dag
+        set_pipeline_spec_from_python(run_context, pipeline_file)
     console.print("Working with context:")
     console.print(run_context)
     console.rule(style="[dark orange]")
     executor = run_context.executor
-    run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
     utils.set_runnable_environment_variables(
         run_id=run_id, configuration_file=configuration_file, tag=tag
     )
-    executor.prepare_for_node_execution()
     step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
     node_to_execute, _ = graph.search_node_by_internal_name(
         run_context.dag,  # type: ignore
@@ -553,6 +565,6 @@ def fan(
         raise ValueError(f"Invalid mode {mode}")
-if __name__ == "__main__":
-    # This is only for perf testing purposes.
-    prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")
+# if __name__ == "__main__":
+#     # This is only for perf testing purposes.
+#     prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")

runnable/exceptions.py CHANGED Viewed

@@ -10,6 +10,18 @@ class RunLogExistsError(Exception):  # pragma: no cover
         self.message = f"Run id for {run_id} is already found in the datastore"
+class JobLogNotFoundError(Exception):
+    """
+    Exception class
+    Args:
+        Exception ([type]): [description]
+    """
+    def __init__(self, run_id):
+        super().__init__()
+        self.message = f"Job for {run_id} is not found in the datastore"
 class RunLogNotFoundError(Exception):  # pragma: no cover
     """
     Exception class
@@ -74,6 +86,16 @@ class BranchNotFoundError(Exception):  # pragma: no cover
         self.message = f"Branch of name {name} is not found the graph"
+class NodeMethodCallError(Exception):
+    """
+    Exception class
+    """
+    def __init__(self, message):
+        super().__init__()
+        self.message = message
 class TerminalNodeError(Exception):  # pragma: no cover
     def __init__(self):
         super().__init__()

runnable 0.14.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

runnable 0.14.0py3-none-any.whl → 0.17.0py3-none-any.whl