runnable 0.35.0__py3-none-any.whl → 0.36.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. extensions/job_executor/__init__.py +3 -4
  2. extensions/job_executor/emulate.py +106 -0
  3. extensions/job_executor/k8s.py +8 -8
  4. extensions/job_executor/local_container.py +13 -14
  5. extensions/nodes/__init__.py +0 -0
  6. extensions/nodes/conditional.py +7 -5
  7. extensions/nodes/fail.py +72 -0
  8. extensions/nodes/map.py +350 -0
  9. extensions/nodes/parallel.py +159 -0
  10. extensions/nodes/stub.py +89 -0
  11. extensions/nodes/success.py +72 -0
  12. extensions/nodes/task.py +92 -0
  13. extensions/pipeline_executor/__init__.py +24 -26
  14. extensions/pipeline_executor/argo.py +18 -15
  15. extensions/pipeline_executor/emulate.py +112 -0
  16. extensions/pipeline_executor/local.py +4 -4
  17. extensions/pipeline_executor/local_container.py +19 -79
  18. extensions/pipeline_executor/mocked.py +4 -4
  19. extensions/pipeline_executor/retry.py +6 -10
  20. extensions/tasks/torch.py +1 -1
  21. runnable/__init__.py +0 -8
  22. runnable/catalog.py +1 -21
  23. runnable/cli.py +0 -59
  24. runnable/context.py +519 -28
  25. runnable/datastore.py +51 -54
  26. runnable/defaults.py +12 -34
  27. runnable/entrypoints.py +82 -440
  28. runnable/exceptions.py +35 -34
  29. runnable/executor.py +13 -20
  30. runnable/names.py +1 -1
  31. runnable/nodes.py +16 -15
  32. runnable/parameters.py +2 -2
  33. runnable/sdk.py +66 -163
  34. runnable/tasks.py +62 -21
  35. runnable/utils.py +6 -268
  36. {runnable-0.35.0.dist-info → runnable-0.36.0.dist-info}/METADATA +1 -1
  37. runnable-0.36.0.dist-info/RECORD +74 -0
  38. {runnable-0.35.0.dist-info → runnable-0.36.0.dist-info}/entry_points.txt +8 -7
  39. extensions/nodes/nodes.py +0 -778
  40. runnable-0.35.0.dist-info/RECORD +0 -66
  41. {runnable-0.35.0.dist-info → runnable-0.36.0.dist-info}/WHEEL +0 -0
  42. {runnable-0.35.0.dist-info → runnable-0.36.0.dist-info}/licenses/LICENSE +0 -0
@@ -84,7 +84,6 @@ class GenericJobExecutor(BaseJobExecutor):
84
84
  run_id=self._context.run_id,
85
85
  tag=self._context.tag,
86
86
  status=defaults.PROCESSING,
87
- dag_hash=self._context.dag_hash,
88
87
  )
89
88
  # Any interaction with run log store attributes should happen via API if available.
90
89
  self._context.run_log_store.set_parameters(
@@ -92,7 +91,7 @@ class GenericJobExecutor(BaseJobExecutor):
92
91
  )
93
92
 
94
93
  # Update run_config
95
- run_config = utils.get_run_config()
94
+ run_config = self._context.model_dump()
96
95
  logger.debug(f"run_config as seen by executor: {run_config}")
97
96
  self._context.run_log_store.set_run_config(
98
97
  run_id=self._context.run_id, run_config=run_config
@@ -147,7 +146,7 @@ class GenericJobExecutor(BaseJobExecutor):
147
146
 
148
147
  data_catalogs = []
149
148
  for name_pattern in catalog_settings:
150
- data_catalog = self._context.catalog_handler.put(
149
+ data_catalog = self._context.catalog.put(
151
150
  name=name_pattern, allow_file_not_found_exc=allow_file_not_found_exc
152
151
  )
153
152
 
@@ -165,5 +164,5 @@ class GenericJobExecutor(BaseJobExecutor):
165
164
  )
166
165
  task_console.save_text(log_file_name)
167
166
  # Put the log file in the catalog
168
- self._context.catalog_handler.put(name=log_file_name)
167
+ self._context.catalog.put(name=log_file_name)
169
168
  os.remove(log_file_name)
@@ -0,0 +1,106 @@
1
+ import logging
2
+ import shlex
3
+ import subprocess
4
+ import sys
5
+ from typing import List, Optional
6
+
7
+
8
+ from extensions.job_executor import GenericJobExecutor
9
+ from runnable import console, context, defaults
10
+ from runnable.datastore import DataCatalog
11
+ from runnable.tasks import BaseTaskType
12
+
13
+ logger = logging.getLogger(defaults.LOGGER_NAME)
14
+
15
+
16
+ class EmulatorJobExecutor(GenericJobExecutor):
17
+ """
18
+ The EmulatorJobExecutor is a job executor that emulates the job execution.
19
+ """
20
+
21
+ service_name: str = "emulator"
22
+
23
+ def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
24
+ """
25
+ This method gets invoked by the CLI.
26
+ """
27
+ self._set_up_run_log()
28
+
29
+ # Call the container job
30
+ job_log = self._context.run_log_store.create_job_log()
31
+ self._context.run_log_store.add_job_log(
32
+ run_id=self._context.run_id, job_log=job_log
33
+ )
34
+ self.run_click_command()
35
+
36
+ def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
37
+ """
38
+ Focusses on execution of the job.
39
+ """
40
+ logger.info("Trying to execute job")
41
+
42
+ job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
43
+ self.add_code_identities(job_log)
44
+
45
+ attempt_log = job.execute_command()
46
+
47
+ job_log.status = attempt_log.status
48
+ job_log.attempts.append(attempt_log)
49
+
50
+ allow_file_not_found_exc = True
51
+ if job_log.status == defaults.SUCCESS:
52
+ allow_file_not_found_exc = False
53
+
54
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
55
+ catalog_settings=catalog_settings,
56
+ allow_file_not_found_exc=allow_file_not_found_exc,
57
+ )
58
+
59
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
60
+
61
+ job_log.add_data_catalogs(data_catalogs_put or [])
62
+
63
+ console.print("Summary of job")
64
+ console.print(job_log.get_summary())
65
+
66
+ self._context.run_log_store.add_job_log(
67
+ run_id=self._context.run_id, job_log=job_log
68
+ )
69
+
70
+ def run_click_command(self) -> str:
71
+ """
72
+ Execute a Click-based CLI command in the current virtual environment.
73
+
74
+ Args:
75
+ command: Click command to execute
76
+ """
77
+ assert isinstance(self._context, context.JobContext)
78
+ command = self._context.get_job_callable_command()
79
+
80
+ sub_command = [sys.executable, "-m", "runnable.cli"] + shlex.split(command)[1:]
81
+
82
+ process = subprocess.Popen(
83
+ sub_command,
84
+ stdout=subprocess.PIPE,
85
+ stderr=subprocess.STDOUT,
86
+ universal_newlines=True,
87
+ bufsize=1,
88
+ )
89
+
90
+ output = []
91
+ try:
92
+ while True:
93
+ line = process.stdout.readline() # type: ignore
94
+ if not line and process.poll() is not None:
95
+ break
96
+ print(line, end="")
97
+ output.append(line)
98
+ finally:
99
+ process.stdout.close() # type: ignore
100
+
101
+ if process.returncode != 0:
102
+ raise subprocess.CalledProcessError(
103
+ process.returncode, command, "".join(output)
104
+ )
105
+
106
+ return "".join(output)
@@ -9,7 +9,7 @@ from pydantic import BaseModel, ConfigDict, Field, PlainSerializer, PrivateAttr
9
9
  from pydantic.alias_generators import to_camel
10
10
 
11
11
  from extensions.job_executor import GenericJobExecutor
12
- from runnable import console, defaults, utils
12
+ from runnable import console, context, defaults
13
13
  from runnable.datastore import DataCatalog, StepAttempt
14
14
  from runnable.tasks import BaseTaskType
15
15
 
@@ -173,7 +173,7 @@ class GenericK8sJobExecutor(GenericJobExecutor):
173
173
  mock: bool = False
174
174
  namespace: str = Field(default="default")
175
175
 
176
- _is_local: bool = PrivateAttr(default=False)
176
+ _should_setup_run_log_at_traversal: bool = PrivateAttr(default=False)
177
177
  _volume_mounts: list[VolumeMount] = PrivateAttr(default_factory=lambda: [])
178
178
  _volumes: list[HostPathVolume | PVCVolume] = PrivateAttr(default_factory=lambda: [])
179
179
 
@@ -258,8 +258,8 @@ class GenericK8sJobExecutor(GenericJobExecutor):
258
258
  self._client.V1VolumeMount(**vol.model_dump())
259
259
  for vol in self._volume_mounts
260
260
  ]
261
-
262
- command = utils.get_job_execution_command()
261
+ assert isinstance(self._context, context.JobContext)
262
+ command = self._context.get_job_callable_command()
263
263
 
264
264
  container_env = [
265
265
  self._client.V1EnvVar(**env.model_dump())
@@ -355,9 +355,9 @@ class GenericK8sJobExecutor(GenericJobExecutor):
355
355
  case "chunked-fs":
356
356
  self._context.run_log_store.log_folder = self._container_log_location
357
357
 
358
- match self._context.catalog_handler.service_name:
358
+ match self._context.catalog.service_name:
359
359
  case "file-system":
360
- self._context.catalog_handler.catalog_location = (
360
+ self._context.catalog.catalog_location = (
361
361
  self._container_catalog_location
362
362
  )
363
363
 
@@ -415,7 +415,7 @@ class MiniK8sJobExecutor(GenericK8sJobExecutor):
415
415
  )
416
416
  )
417
417
 
418
- match self._context.catalog_handler.service_name:
418
+ match self._context.catalog.service_name:
419
419
  case "file-system":
420
420
  self._volumes.append(
421
421
  HostPathVolume(
@@ -503,7 +503,7 @@ class K8sJobExecutor(GenericK8sJobExecutor):
503
503
  )
504
504
  )
505
505
 
506
- match self._context.catalog_handler.service_name:
506
+ match self._context.catalog.service_name:
507
507
  case "file-system":
508
508
  self._volume_mounts.append(
509
509
  VolumeMount(
@@ -2,10 +2,10 @@ import logging
2
2
  from pathlib import Path
3
3
  from typing import Dict, List, Optional
4
4
 
5
- from pydantic import Field
5
+ from pydantic import Field, PrivateAttr
6
6
 
7
7
  from extensions.job_executor import GenericJobExecutor
8
- from runnable import console, defaults, utils
8
+ from runnable import console, context, defaults
9
9
  from runnable.datastore import DataCatalog, StepAttempt
10
10
  from runnable.tasks import BaseTaskType
11
11
 
@@ -23,7 +23,7 @@ class LocalContainerJobExecutor(GenericJobExecutor):
23
23
  auto_remove_container: bool = True
24
24
  environment: Dict[str, str] = Field(default_factory=dict)
25
25
 
26
- _is_local: bool = False
26
+ _should_setup_run_log_at_traversal: bool = PrivateAttr(default=True)
27
27
 
28
28
  _container_log_location = "/tmp/run_logs/"
29
29
  _container_catalog_location = "/tmp/catalog/"
@@ -100,7 +100,8 @@ class LocalContainerJobExecutor(GenericJobExecutor):
100
100
  ) from ex
101
101
 
102
102
  try:
103
- command = utils.get_job_execution_command()
103
+ assert isinstance(self._context, context.JobContext)
104
+ command = self._context.get_job_callable_command()
104
105
  logger.info(f"Running the command {command}")
105
106
  print(command)
106
107
 
@@ -165,17 +166,17 @@ class LocalContainerJobExecutor(GenericJobExecutor):
165
166
  "mode": "rw",
166
167
  }
167
168
 
168
- match self._context.catalog_handler.service_name:
169
+ match self._context.catalog.service_name:
169
170
  case "file-system":
170
- catalog_location = self._context.catalog_handler.catalog_location
171
+ catalog_location = self._context.catalog.catalog_location
171
172
  self._volumes[str(Path(catalog_location).resolve())] = {
172
173
  "bind": f"{self._container_catalog_location}",
173
174
  "mode": "rw",
174
175
  }
175
176
 
176
- match self._context.secrets_handler.service_name:
177
+ match self._context.secrets.service_name:
177
178
  case "dotenv":
178
- secrets_location = self._context.secrets_handler.location
179
+ secrets_location = self._context.secrets.location
179
180
  self._volumes[str(Path(secrets_location).resolve())] = {
180
181
  "bind": f"{self._container_secrets_location}",
181
182
  "mode": "ro",
@@ -188,14 +189,12 @@ class LocalContainerJobExecutor(GenericJobExecutor):
188
189
  case "chunked-fs":
189
190
  self._context.run_log_store.log_folder = self._container_log_location
190
191
 
191
- match self._context.catalog_handler.service_name:
192
+ match self._context.catalog.service_name:
192
193
  case "file-system":
193
- self._context.catalog_handler.catalog_location = (
194
+ self._context.catalog.catalog_location = (
194
195
  self._container_catalog_location
195
196
  )
196
197
 
197
- match self._context.secrets_handler.service_name:
198
+ match self._context.secrets.service_name:
198
199
  case "dotenv":
199
- self._context.secrets_handler.location = (
200
- self._container_secrets_location
201
- )
200
+ self._context.secrets.location = self._container_secrets_location
File without changes
@@ -7,7 +7,7 @@ from pydantic import Field, field_serializer, field_validator
7
7
  from runnable import console, defaults
8
8
  from runnable.datastore import Parameter
9
9
  from runnable.graph import Graph, create_graph
10
- from runnable.nodes import CompositeNode, TypeMapVariable
10
+ from runnable.nodes import CompositeNode, MapVariableType
11
11
 
12
12
  logger = logging.getLogger(defaults.LOGGER_NAME)
13
13
 
@@ -124,7 +124,7 @@ class ConditionalNode(CompositeNode):
124
124
 
125
125
  raise Exception(f"Branch {branch_name} does not exist")
126
126
 
127
- def fan_out(self, map_variable: TypeMapVariable = None):
127
+ def fan_out(self, map_variable: MapVariableType = None):
128
128
  """
129
129
  This method is restricted to creating branch logs.
130
130
  """
@@ -160,7 +160,7 @@ class ConditionalNode(CompositeNode):
160
160
  "None of the branches were true. Please check your evaluate statements"
161
161
  )
162
162
 
163
- def execute_as_graph(self, map_variable: TypeMapVariable = None):
163
+ def execute_as_graph(self, map_variable: MapVariableType = None):
164
164
  """
165
165
  This function does the actual execution of the sub-branches of the parallel node.
166
166
 
@@ -190,11 +190,13 @@ class ConditionalNode(CompositeNode):
190
190
  if result:
191
191
  # if the condition is met, execute the graph
192
192
  logger.debug(f"Executing graph for {branch}")
193
- self._context.executor.execute_graph(branch, map_variable=map_variable)
193
+ self._context.pipeline_executor.execute_graph(
194
+ branch, map_variable=map_variable
195
+ )
194
196
 
195
197
  self.fan_in(map_variable=map_variable)
196
198
 
197
- def fan_in(self, map_variable: TypeMapVariable = None):
199
+ def fan_in(self, map_variable: MapVariableType = None):
198
200
  """
199
201
  The general fan in method for a node of type Parallel.
200
202
 
@@ -0,0 +1,72 @@
1
+ from datetime import datetime
2
+ from typing import Any, Dict, cast
3
+
4
+ from pydantic import Field
5
+
6
+ from runnable import datastore, defaults
7
+ from runnable.datastore import StepLog
8
+ from runnable.defaults import MapVariableType
9
+ from runnable.nodes import TerminalNode
10
+
11
+
12
+ class FailNode(TerminalNode):
13
+ """
14
+ A leaf node of the graph that represents a failure node
15
+ """
16
+
17
+ node_type: str = Field(default="fail", serialization_alias="type")
18
+
19
+ @classmethod
20
+ def parse_from_config(cls, config: Dict[str, Any]) -> "FailNode":
21
+ return cast("FailNode", super().parse_from_config(config))
22
+
23
+ def get_summary(self) -> Dict[str, Any]:
24
+ summary = {
25
+ "name": self.name,
26
+ "type": self.node_type,
27
+ }
28
+
29
+ return summary
30
+
31
+ def execute(
32
+ self,
33
+ mock=False,
34
+ map_variable: MapVariableType = None,
35
+ attempt_number: int = 1,
36
+ ) -> StepLog:
37
+ """
38
+ Execute the failure node.
39
+ Set the run or branch log status to failure.
40
+
41
+ Args:
42
+ executor (_type_): the executor class
43
+ mock (bool, optional): If we should just mock and not do the actual execution. Defaults to False.
44
+ map_variable (dict, optional): If the node belongs to internal branches. Defaults to None.
45
+
46
+ Returns:
47
+ StepAttempt: The step attempt object
48
+ """
49
+ step_log = self._context.run_log_store.get_step_log(
50
+ self._get_step_log_name(map_variable), self._context.run_id
51
+ )
52
+
53
+ attempt_log = datastore.StepAttempt(
54
+ status=defaults.SUCCESS,
55
+ start_time=str(datetime.now()),
56
+ end_time=str(datetime.now()),
57
+ attempt_number=attempt_number,
58
+ )
59
+
60
+ run_or_branch_log = self._context.run_log_store.get_branch_log(
61
+ self._get_branch_log_name(map_variable), self._context.run_id
62
+ )
63
+ run_or_branch_log.status = defaults.FAIL
64
+ self._context.run_log_store.add_branch_log(
65
+ run_or_branch_log, self._context.run_id
66
+ )
67
+
68
+ step_log.status = attempt_log.status
69
+
70
+ step_log.attempts.append(attempt_log)
71
+
72
+ return step_log