runnable 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. runnable/__init__.py +34 -0
  2. runnable/catalog.py +141 -0
  3. runnable/cli.py +272 -0
  4. runnable/context.py +34 -0
  5. runnable/datastore.py +686 -0
  6. runnable/defaults.py +179 -0
  7. runnable/entrypoints.py +484 -0
  8. runnable/exceptions.py +94 -0
  9. runnable/executor.py +431 -0
  10. runnable/experiment_tracker.py +139 -0
  11. runnable/extensions/catalog/__init__.py +21 -0
  12. runnable/extensions/catalog/file_system/__init__.py +0 -0
  13. runnable/extensions/catalog/file_system/implementation.py +226 -0
  14. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  15. runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
  16. runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
  17. runnable/extensions/executor/__init__.py +714 -0
  18. runnable/extensions/executor/argo/__init__.py +0 -0
  19. runnable/extensions/executor/argo/implementation.py +1182 -0
  20. runnable/extensions/executor/argo/specification.yaml +51 -0
  21. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  22. runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
  23. runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
  24. runnable/extensions/executor/local/__init__.py +0 -0
  25. runnable/extensions/executor/local/implementation.py +69 -0
  26. runnable/extensions/executor/local_container/__init__.py +0 -0
  27. runnable/extensions/executor/local_container/implementation.py +367 -0
  28. runnable/extensions/executor/mocked/__init__.py +0 -0
  29. runnable/extensions/executor/mocked/implementation.py +220 -0
  30. runnable/extensions/experiment_tracker/__init__.py +0 -0
  31. runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
  32. runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
  33. runnable/extensions/nodes.py +675 -0
  34. runnable/extensions/run_log_store/__init__.py +0 -0
  35. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  36. runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
  37. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  38. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
  39. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
  40. runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
  41. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  42. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  43. runnable/extensions/run_log_store/file_system/implementation.py +136 -0
  44. runnable/extensions/run_log_store/generic_chunked.py +541 -0
  45. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  46. runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
  47. runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
  48. runnable/extensions/secrets/__init__.py +0 -0
  49. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  50. runnable/extensions/secrets/dotenv/implementation.py +100 -0
  51. runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  52. runnable/extensions/secrets/env_secrets/implementation.py +42 -0
  53. runnable/graph.py +464 -0
  54. runnable/integration.py +205 -0
  55. runnable/interaction.py +399 -0
  56. runnable/names.py +546 -0
  57. runnable/nodes.py +489 -0
  58. runnable/parameters.py +183 -0
  59. runnable/pickler.py +102 -0
  60. runnable/sdk.py +470 -0
  61. runnable/secrets.py +95 -0
  62. runnable/tasks.py +392 -0
  63. runnable/utils.py +630 -0
  64. runnable-0.2.0.dist-info/METADATA +437 -0
  65. runnable-0.2.0.dist-info/RECORD +69 -0
  66. runnable-0.2.0.dist-info/entry_points.txt +44 -0
  67. runnable-0.1.0.dist-info/METADATA +0 -16
  68. runnable-0.1.0.dist-info/RECORD +0 -6
  69. /runnable/{.gitkeep → extensions/__init__.py} +0 -0
  70. {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/LICENSE +0 -0
  71. {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,367 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Dict, cast
4
+
5
+ from pydantic import Field
6
+ from rich import print
7
+
8
+ from runnable import defaults, integration, utils
9
+ from runnable.datastore import StepLog
10
+ from runnable.defaults import TypeMapVariable
11
+ from runnable.extensions.executor import GenericExecutor
12
+ from runnable.extensions.nodes import TaskNode
13
+ from runnable.integration import BaseIntegration
14
+ from runnable.nodes import BaseNode
15
+
16
+ logger = logging.getLogger(defaults.LOGGER_NAME)
17
+
18
+
19
+ class LocalContainerExecutor(GenericExecutor):
20
+ """
21
+ In the mode of local-container, we execute all the commands in a container.
22
+
23
+ Ensure that the local compute has enough resources to finish all your jobs.
24
+
25
+ The image of the run, could either be provided as default in the configuration of the execution engine
26
+ i.e.:
27
+ execution:
28
+ type: 'local-container'
29
+ config:
30
+ docker_image: the image you want the code to run in.
31
+
32
+ or default image could be over-ridden for a single node by providing a docker_image in the step config.
33
+ i.e:
34
+ dag:
35
+ steps:
36
+ step:
37
+ executor_config:
38
+ local-container:
39
+ docker_image: The image that you want that single step to run in.
40
+ This image would only be used for that step only.
41
+
42
+ This mode does not build the docker image with the latest code for you, it is still left for the user to build
43
+ and ensure that the docker image provided is the correct one.
44
+
45
+ Example config:
46
+ execution:
47
+ type: local-container
48
+ config:
49
+ docker_image: The default docker image to use if the node does not provide one.
50
+ """
51
+
52
+ service_name: str = "local-container"
53
+ docker_image: str
54
+ auto_remove_container: bool = True
55
+ run_in_local: bool = False
56
+ environment: Dict[str, str] = Field(default_factory=dict)
57
+
58
+ _container_log_location = "/tmp/run_logs/"
59
+ _container_catalog_location = "/tmp/catalog/"
60
+ _container_secrets_location = "/tmp/dotenv"
61
+ _volumes: Dict[str, Dict[str, str]] = {}
62
+
63
+ def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
64
+ """
65
+ Call the Base class to add the git code identity and add docker identity
66
+
67
+ Args:
68
+ node (BaseNode): The node we are adding the code identity
69
+ step_log (Object): The step log corresponding to the node
70
+ """
71
+
72
+ super().add_code_identities(node, step_log)
73
+
74
+ if node.node_type in ["success", "fail"]:
75
+ # Need not add code identities if we are in a success or fail node
76
+ return
77
+
78
+ executor_config = self._resolve_executor_config(node)
79
+
80
+ docker_image = executor_config.get("docker_image", None)
81
+ if docker_image:
82
+ code_id = self._context.run_log_store.create_code_identity()
83
+
84
+ code_id.code_identifier = utils.get_local_docker_image_id(docker_image)
85
+ code_id.code_identifier_type = "docker"
86
+ code_id.code_identifier_dependable = True
87
+ code_id.code_identifier_url = "local docker host"
88
+ step_log.code_identities.append(code_id)
89
+
90
+ def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
91
+ """
92
+ We are already in the container, we just execute the node.
93
+ The node is already prepared for execution.
94
+ """
95
+ return self._execute_node(node, map_variable, **kwargs)
96
+
97
+ def execute_job(self, node: TaskNode):
98
+ """
99
+ Set up the step log and call the execute node
100
+
101
+ Args:
102
+ node (BaseNode): _description_
103
+ """
104
+
105
+ step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable=None))
106
+
107
+ self.add_code_identities(node=node, step_log=step_log)
108
+
109
+ step_log.step_type = node.node_type
110
+ step_log.status = defaults.PROCESSING
111
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
112
+
113
+ command = utils.get_job_execution_command(node)
114
+ self._spin_container(node=node, command=command)
115
+
116
+ # Check the step log status and warn if necessary. Docker errors are generally suppressed.
117
+ step_log = self._context.run_log_store.get_step_log(
118
+ node._get_step_log_name(map_variable=None), self._context.run_id
119
+ )
120
+ if step_log.status != defaults.SUCCESS:
121
+ msg = (
122
+ "Node execution inside the container failed. Please check the logs.\n"
123
+ "Note: If you do not see any docker issue from your side and the code works properly on local execution"
124
+ "please raise a bug report."
125
+ )
126
+ logger.warning(msg)
127
+
128
+ def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
129
+ """
130
+ We come into this step via execute from graph, use trigger job to spin up the container.
131
+
132
+
133
+ If the config has "run_in_local: True", we compute it on local system instead of container.
134
+ In local container execution, we just spin the container to execute magnus execute_single_node.
135
+
136
+ Args:
137
+ node (BaseNode): The node we are currently executing
138
+ map_variable (str, optional): If the node is part of the map branch. Defaults to ''.
139
+ """
140
+ executor_config = self._resolve_executor_config(node)
141
+ auto_remove_container = executor_config.get("auto_remove_container", True)
142
+
143
+ logger.debug("Here is the resolved executor config")
144
+ logger.debug(executor_config)
145
+
146
+ if executor_config.get("run_in_local", False):
147
+ # Do not change config but only validate the configuration.
148
+ # Trigger the job on local system instead of a container
149
+ integration.validate(self, self._context.run_log_store)
150
+ integration.validate(self, self._context.catalog_handler)
151
+ integration.validate(self, self._context.secrets_handler)
152
+
153
+ self.execute_node(node=node, map_variable=map_variable, **kwargs)
154
+ return
155
+
156
+ command = utils.get_node_execution_command(node, map_variable=map_variable)
157
+
158
+ self._spin_container(
159
+ node=node,
160
+ command=command,
161
+ map_variable=map_variable,
162
+ auto_remove_container=auto_remove_container,
163
+ **kwargs,
164
+ )
165
+
166
+ step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(map_variable), self._context.run_id)
167
+ if step_log.status != defaults.SUCCESS:
168
+ msg = (
169
+ "Node execution inside the container failed. Please check the logs.\n"
170
+ "Note: If you do not see any docker issue from your side and the code works properly on local execution"
171
+ "please raise a bug report."
172
+ )
173
+ logger.warning(msg)
174
+ step_log.status = defaults.FAIL
175
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
176
+
177
+ def _spin_container(
178
+ self,
179
+ node: BaseNode,
180
+ command: str,
181
+ map_variable: TypeMapVariable = None,
182
+ auto_remove_container: bool = True,
183
+ **kwargs,
184
+ ):
185
+ """
186
+ During the flow run, we have to spin up a container with the docker image mentioned
187
+ and the right log locations
188
+ """
189
+ # Conditional import
190
+ import docker # pylint: disable=C0415
191
+
192
+ try:
193
+ client = docker.from_env()
194
+ api_client = docker.APIClient()
195
+ except Exception as ex:
196
+ logger.exception("Could not get access to docker")
197
+ raise Exception("Could not get the docker socket file, do you have docker installed?") from ex
198
+
199
+ try:
200
+ logger.info(f"Running the command {command}")
201
+ #  Overrides global config with local
202
+ executor_config = self._resolve_executor_config(node)
203
+
204
+ docker_image = executor_config.get("docker_image", None)
205
+ environment = executor_config.get("environment", {})
206
+ environment.update(self._context.variables)
207
+ if not docker_image:
208
+ raise Exception(
209
+ f"Please provide a docker_image using executor_config of the step {node.name} or at global config"
210
+ )
211
+
212
+ # TODO: Should consider using getpass.getuser() when running the docker container? Volume permissions
213
+ container = client.containers.create(
214
+ image=docker_image,
215
+ command=command,
216
+ auto_remove=False,
217
+ volumes=self._volumes,
218
+ network_mode="host",
219
+ environment=environment,
220
+ )
221
+
222
+ # print(container.__dict__)
223
+
224
+ container.start()
225
+ stream = api_client.logs(container=container.id, timestamps=True, stream=True, follow=True)
226
+ while True:
227
+ try:
228
+ output = next(stream).decode("utf-8")
229
+ output = output.strip("\r\n")
230
+ logger.info(output)
231
+ print(output)
232
+ except StopIteration:
233
+ logger.info("Docker Run completed")
234
+ break
235
+
236
+ exit_status = api_client.inspect_container(container.id)["State"]["ExitCode"]
237
+
238
+ if auto_remove_container:
239
+ container.remove(force=True)
240
+
241
+ if exit_status != 0:
242
+ msg = f"Docker command failed with exit code {exit_status}"
243
+ raise Exception(msg)
244
+
245
+ except Exception as _e:
246
+ logger.exception("Problems with spinning/running the container")
247
+ raise _e
248
+
249
+
250
+ class LocalContainerComputeFileSystemRunLogstore(BaseIntegration):
251
+ """
252
+ Integration between local container and file system run log store
253
+ """
254
+
255
+ executor_type = "local-container"
256
+ service_type = "run_log_store" # One of secret, catalog, datastore
257
+ service_provider = "file-system" # The actual implementation of the service
258
+
259
+ def validate(self, **kwargs):
260
+ if self.executor._is_parallel_execution(): # pragma: no branch
261
+ msg = (
262
+ "Run log generated by file-system run log store are not thread safe. "
263
+ "Inconsistent results are possible because of race conditions to write to the same file.\n"
264
+ "Consider using partitioned run log store like database for consistent results."
265
+ )
266
+ logger.warning(msg)
267
+
268
+ def configure_for_traversal(self, **kwargs):
269
+ from runnable.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore
270
+
271
+ self.executor = cast(LocalContainerExecutor, self.executor)
272
+ self.service = cast(FileSystemRunLogstore, self.service)
273
+
274
+ write_to = self.service.log_folder_name
275
+ self.executor._volumes[str(Path(write_to).resolve())] = {
276
+ "bind": f"{self.executor._container_log_location}",
277
+ "mode": "rw",
278
+ }
279
+
280
+ def configure_for_execution(self, **kwargs):
281
+ from runnable.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore
282
+
283
+ self.executor = cast(LocalContainerExecutor, self.executor)
284
+ self.service = cast(FileSystemRunLogstore, self.service)
285
+
286
+ self.service.log_folder = self.executor._container_log_location
287
+
288
+
289
+ class LocalContainerComputeFileSystemCatalog(BaseIntegration):
290
+ """
291
+ Integration pattern between Local container and File System catalog
292
+ """
293
+
294
+ executor_type = "local-container"
295
+ service_type = "catalog" # One of secret, catalog, datastore
296
+ service_provider = "file-system" # The actual implementation of the service
297
+
298
+ def configure_for_traversal(self, **kwargs):
299
+ from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog
300
+
301
+ self.executor = cast(LocalContainerExecutor, self.executor)
302
+ self.service = cast(FileSystemCatalog, self.service)
303
+
304
+ catalog_location = self.service.catalog_location
305
+ self.executor._volumes[str(Path(catalog_location).resolve())] = {
306
+ "bind": f"{self.executor._container_catalog_location}",
307
+ "mode": "rw",
308
+ }
309
+
310
+ def configure_for_execution(self, **kwargs):
311
+ from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog
312
+
313
+ self.executor = cast(LocalContainerExecutor, self.executor)
314
+ self.service = cast(FileSystemCatalog, self.service)
315
+
316
+ self.service.catalog_location = self.executor._container_catalog_location
317
+
318
+
319
+ class LocalContainerComputeDotEnvSecrets(BaseIntegration):
320
+ """
321
+ Integration between local container and dot env secrets
322
+ """
323
+
324
+ executor_type = "local-container"
325
+ service_type = "secrets" # One of secret, catalog, datastore
326
+ service_provider = "dotenv" # The actual implementation of the service
327
+
328
+ def validate(self, **kwargs):
329
+ logger.warning("Using dot env for non local deployments is not ideal, consider options")
330
+
331
+ def configure_for_traversal(self, **kwargs):
332
+ from runnable.extensions.secrets.dotenv.implementation import DotEnvSecrets
333
+
334
+ self.executor = cast(LocalContainerExecutor, self.executor)
335
+ self.service = cast(DotEnvSecrets, self.service)
336
+
337
+ secrets_location = self.service.secrets_location
338
+ self.executor._volumes[str(Path(secrets_location).resolve())] = {
339
+ "bind": f"{self.executor._container_secrets_location}",
340
+ "mode": "ro",
341
+ }
342
+
343
+ def configure_for_execution(self, **kwargs):
344
+ from runnable.extensions.secrets.dotenv.implementation import DotEnvSecrets
345
+
346
+ self.executor = cast(LocalContainerExecutor, self.executor)
347
+ self.service = cast(DotEnvSecrets, self.service)
348
+
349
+ self.service.location = self.executor._container_secrets_location
350
+
351
+
352
+ class LocalContainerComputeEnvSecretsManager(BaseIntegration):
353
+ """
354
+ Integration between local container and env secrets manager
355
+ """
356
+
357
+ executor_type = "local-container"
358
+ service_type = "secrets" # One of secret, catalog, datastore
359
+ service_provider = "env-secrets-manager" # The actual implementation of the service
360
+
361
+ def validate(self, **kwargs):
362
+ msg = (
363
+ "Local container executions cannot be used with environment secrets manager. "
364
+ "Please use a supported secrets manager"
365
+ )
366
+ logger.exception(msg)
367
+ raise Exception(msg)
File without changes
@@ -0,0 +1,220 @@
1
+ import copy
2
+ import logging
3
+ from typing import Any, Dict, Type, cast
4
+
5
+ from pydantic import ConfigDict, Field
6
+
7
+ from runnable import context, defaults
8
+ from runnable.defaults import TypeMapVariable
9
+ from runnable.extensions.executor import GenericExecutor
10
+ from runnable.extensions.nodes import TaskNode
11
+ from runnable.integration import BaseIntegration
12
+ from runnable.nodes import BaseNode
13
+ from runnable.tasks import BaseTaskType
14
+
15
+ logger = logging.getLogger(defaults.LOGGER_NAME)
16
+
17
+
18
+ def create_executable(params: Dict[str, Any], model: Type[BaseTaskType], node_name: str) -> BaseTaskType:
19
+ class EasyModel(model): # type: ignore
20
+ model_config = ConfigDict(extra="ignore")
21
+
22
+ swallow_all = EasyModel(**params, node_name=node_name)
23
+ return swallow_all
24
+
25
+
26
+ class MockedExecutor(GenericExecutor):
27
+ service_name: str = "mocked"
28
+
29
+ enable_parallel: bool = defaults.ENABLE_PARALLEL
30
+
31
+ patches: Dict[str, Any] = Field(default_factory=dict)
32
+
33
+ @property
34
+ def _context(self):
35
+ return context.run_context
36
+
37
+ def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
38
+ raise Exception("MockedExecutor does not support re-run")
39
+
40
+ def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
41
+ """
42
+ This is the entry point to from the graph execution.
43
+
44
+ While the self.execute_graph is responsible for traversing the graph, this function is responsible for
45
+ actual execution of the node.
46
+
47
+ If the node type is:
48
+ * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
49
+ * success: We can delegate to _execute_node
50
+ * fail: We can delegate to _execute_node
51
+
52
+ For nodes that are internally graphs:
53
+ * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
54
+ * dag: Delegate the responsibility of execution to the node.execute_as_graph()
55
+ * map: Delegate the responsibility of execution to the node.execute_as_graph()
56
+
57
+ Transpilers will NEVER use this method and will NEVER call ths method.
58
+ This method should only be used by interactive executors.
59
+
60
+ Args:
61
+ node (Node): The node to execute
62
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
63
+ Defaults to None.
64
+ """
65
+ step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable))
66
+
67
+ self.add_code_identities(node=node, step_log=step_log)
68
+
69
+ step_log.step_type = node.node_type
70
+ step_log.status = defaults.PROCESSING
71
+
72
+ # Add the step log to the database as per the situation.
73
+ # If its a terminal node, complete it now
74
+ if node.node_type in ["success", "fail"]:
75
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
76
+ self._execute_node(node, map_variable=map_variable, **kwargs)
77
+ return
78
+
79
+ # We call an internal function to iterate the sub graphs and execute them
80
+ if node.is_composite:
81
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
82
+ node.execute_as_graph(map_variable=map_variable, **kwargs)
83
+ return
84
+
85
+ node_to_send: TaskNode = cast(TaskNode, node).model_copy(deep=True)
86
+ if node.name not in self.patches:
87
+ # node is not patched, so mock it
88
+ step_log.mock = True
89
+ else:
90
+ # node is mocked, change the executable to python with the
91
+ # command as the patch value
92
+ executable_type = node_to_send.executable.__class__
93
+ executable = create_executable(
94
+ self.patches[node.name],
95
+ executable_type,
96
+ node_name=node.name,
97
+ )
98
+ node_to_send.executable = executable
99
+ pass
100
+
101
+ # Executor specific way to trigger a job
102
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
103
+ self.trigger_job(node=node_to_send, map_variable=map_variable, **kwargs)
104
+
105
+ def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
106
+ """
107
+ Call this method only if we are responsible for traversing the graph via
108
+ execute_from_graph().
109
+
110
+ We are not prepared to execute node as of now.
111
+
112
+ Args:
113
+ node (BaseNode): The node to execute
114
+ map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
115
+ Defaults to ''.
116
+
117
+ NOTE: We do not raise an exception as this method is not required by many extensions
118
+ """
119
+ self.prepare_for_node_execution()
120
+ self.execute_node(node=node, map_variable=map_variable, **kwargs)
121
+
122
+ def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
123
+ """
124
+ In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
125
+ necessary.
126
+ * True: If its not a re-run.
127
+ * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
128
+ * False: If its a re-run and we succeeded in the last run.
129
+
130
+ Most cases, this logic need not be touched
131
+
132
+ Args:
133
+ node (Node): The node to check against re-run
134
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
135
+ Defaults to None.
136
+
137
+ Returns:
138
+ bool: Eligibility for re-run. True means re-run, False means skip to the next step.
139
+ """
140
+ return True
141
+
142
+ def _resolve_executor_config(self, node: BaseNode):
143
+ """
144
+ The overrides section can contain specific over-rides to an global executor config.
145
+ To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
146
+ The nodes can over-ride the global config by referring to key in the overrides.
147
+
148
+ This function also applies variables to the effective node config.
149
+
150
+ For example:
151
+ # configuration.yaml
152
+ execution:
153
+ type: cloud-implementation
154
+ config:
155
+ k1: v1
156
+ k3: v3
157
+ overrides:
158
+ custom_config:
159
+ k1: v11
160
+ k2: v2 # Could be a mapping internally.
161
+
162
+ # in pipeline definition.yaml
163
+ dag:
164
+ steps:
165
+ step1:
166
+ overrides:
167
+ cloud-implementation: custom_config
168
+
169
+ This method should resolve the node_config to {'k1': 'v11', 'k2': 'v2', 'k3': 'v3'}
170
+
171
+ Args:
172
+ node (BaseNode): The current node being processed.
173
+
174
+ """
175
+ effective_node_config = copy.deepcopy(self.model_dump())
176
+
177
+ return effective_node_config
178
+
179
+ def execute_job(self, node: TaskNode):
180
+ pass
181
+
182
+ def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
183
+ """
184
+ For local execution, we just execute the node.
185
+
186
+ Args:
187
+ node (BaseNode): _description_
188
+ map_variable (dict[str, str], optional): _description_. Defaults to None.
189
+ """
190
+ self._execute_node(node=node, map_variable=map_variable, **kwargs)
191
+
192
+
193
+ class LocalContainerComputeFileSystemRunLogstore(BaseIntegration):
194
+ """
195
+ Integration between local container and file system run log store
196
+ """
197
+
198
+ executor_type = "local-container"
199
+ service_type = "run_log_store" # One of secret, catalog, datastore
200
+ service_provider = "file-system" # The actual implementation of the service
201
+
202
+ def validate(self, **kwargs):
203
+ if self.executor._is_parallel_execution(): # pragma: no branch
204
+ msg = "Mocked executor does not support parallel execution. "
205
+ logger.warning(msg)
206
+
207
+
208
+ class LocalContainerComputeChunkedFSRunLogstore(BaseIntegration):
209
+ """
210
+ Integration between local container and file system run log store
211
+ """
212
+
213
+ executor_type = "local-container"
214
+ service_type = "run_log_store" # One of secret, catalog, datastore
215
+ service_provider = "chunked-fs" # The actual implementation of the service
216
+
217
+ def validate(self, **kwargs):
218
+ if self.executor._is_parallel_execution(): # pragma: no branch
219
+ msg = "Mocked executor does not support parallel execution. "
220
+ logger.warning(msg)
File without changes
@@ -0,0 +1,94 @@
1
+ import functools
2
+ import logging
3
+ from typing import Any, Union
4
+
5
+ from pydantic import ConfigDict, PrivateAttr
6
+
7
+ from runnable import defaults
8
+ from runnable.experiment_tracker import BaseExperimentTracker
9
+
10
+ logger = logging.getLogger(defaults.NAME)
11
+
12
+
13
+ class MLFlowExperimentTracker(BaseExperimentTracker):
14
+ """
15
+ A MLFlow experiment tracker.
16
+
17
+ TODO: Need to set up credentials from secrets
18
+ """
19
+
20
+ service_name: str = "mlflow"
21
+
22
+ server_url: str
23
+ autolog: bool = False
24
+
25
+ _default_experiment_name: str = PrivateAttr(default="Default")
26
+ _active_run_id: str = PrivateAttr(default="")
27
+ _client: Any = PrivateAttr(default=None)
28
+
29
+ model_config = ConfigDict(extra="forbid")
30
+
31
+ def model_post_init(self, __context: Any) -> None:
32
+ try:
33
+ import mlflow
34
+ except ImportError:
35
+ raise Exception("You need to install mlflow to use MLFlowExperimentTracker.")
36
+
37
+ self._client = mlflow
38
+
39
+ self._client.set_tracking_uri(self.server_url)
40
+
41
+ if self.autolog:
42
+ self._client.autolog(log_models=False)
43
+
44
+ @functools.cached_property
45
+ def experiment_id(self):
46
+ experiment_name = self._default_experiment_name
47
+
48
+ # If a tag is provided, we should create that as our experiment
49
+ if self._context.tag:
50
+ experiment_name = self._context.tag
51
+
52
+ experiment = self._client.get_experiment_by_name(experiment_name)
53
+ if not experiment:
54
+ # Create the experiment and get it.
55
+ experiment = self._client.create_experiment(experiment_name)
56
+ experiment = self._client.get_experiment(experiment)
57
+
58
+ return experiment.experiment_id
59
+
60
+ @functools.cached_property
61
+ def run_name(self):
62
+ return self._context.run_id
63
+
64
+ @property
65
+ def client_context(self):
66
+ if self._active_run_id:
67
+ return self._client.start_run(
68
+ run_id=self._active_run_id, experiment_id=self.experiment_id, run_name=self.run_name
69
+ )
70
+
71
+ active_run = self._client.start_run(run_name=self.run_name, experiment_id=self.experiment_id)
72
+ self._active_run_id = active_run.info.run_id
73
+ return active_run
74
+
75
+ def log_metric(self, key: str, value: Union[int, float], step: int = 0):
76
+ """
77
+ Sets the metric in the experiment tracking.
78
+
79
+ Args:
80
+ key (str): The key against you want to store the value
81
+ value (Any): The value of the metric
82
+ """
83
+ if not isinstance(value, float) or isinstance(value, int):
84
+ msg = f"Only float/int values are accepted as metrics. Setting the metric {key} as parameter {key}_{step}"
85
+ logger.warning(msg)
86
+ self.log_parameter(key=key, value=value, step=step)
87
+ return
88
+
89
+ with self.client_context as _:
90
+ self._client.log_metric(key, float(value), step=step or None)
91
+
92
+ def log_parameter(self, key: str, value: Any, step: int = 0):
93
+ with self.client_context as _:
94
+ self._client.log_param(key + f"_{str(step)}", value)