runnable 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +34 -0
- runnable/catalog.py +141 -0
- runnable/cli.py +272 -0
- runnable/context.py +34 -0
- runnable/datastore.py +687 -0
- runnable/defaults.py +182 -0
- runnable/entrypoints.py +448 -0
- runnable/exceptions.py +94 -0
- runnable/executor.py +421 -0
- runnable/experiment_tracker.py +139 -0
- runnable/extensions/catalog/__init__.py +21 -0
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +227 -0
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
- runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
- runnable/extensions/executor/__init__.py +725 -0
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +1183 -0
- runnable/extensions/executor/argo/specification.yaml +51 -0
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
- runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
- runnable/extensions/executor/local/__init__.py +0 -0
- runnable/extensions/executor/local/implementation.py +70 -0
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +361 -0
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +189 -0
- runnable/extensions/experiment_tracker/__init__.py +0 -0
- runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
- runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
- runnable/extensions/nodes.py +655 -0
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
- runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +136 -0
- runnable/extensions/run_log_store/generic_chunked.py +541 -0
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
- runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +100 -0
- runnable/extensions/secrets/env_secrets/__init__.py +0 -0
- runnable/extensions/secrets/env_secrets/implementation.py +42 -0
- runnable/graph.py +464 -0
- runnable/integration.py +205 -0
- runnable/interaction.py +404 -0
- runnable/names.py +546 -0
- runnable/nodes.py +501 -0
- runnable/parameters.py +183 -0
- runnable/pickler.py +102 -0
- runnable/sdk.py +472 -0
- runnable/secrets.py +95 -0
- runnable/tasks.py +395 -0
- runnable/utils.py +630 -0
- runnable-0.3.0.dist-info/METADATA +437 -0
- runnable-0.3.0.dist-info/RECORD +69 -0
- {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/WHEEL +1 -1
- runnable-0.3.0.dist-info/entry_points.txt +44 -0
- runnable-0.1.0.dist-info/METADATA +0 -16
- runnable-0.1.0.dist-info/RECORD +0 -6
- /runnable/{.gitkeep → extensions/__init__.py} +0 -0
- {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/LICENSE +0 -0
@@ -0,0 +1,361 @@
|
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Dict, cast
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
from rich import print
|
7
|
+
|
8
|
+
from runnable import defaults, integration, utils
|
9
|
+
from runnable.datastore import StepLog
|
10
|
+
from runnable.defaults import TypeMapVariable
|
11
|
+
from runnable.extensions.executor import GenericExecutor
|
12
|
+
from runnable.extensions.nodes import TaskNode
|
13
|
+
from runnable.integration import BaseIntegration
|
14
|
+
from runnable.nodes import BaseNode
|
15
|
+
|
16
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
17
|
+
|
18
|
+
|
19
|
+
class LocalContainerExecutor(GenericExecutor):
|
20
|
+
"""
|
21
|
+
In the mode of local-container, we execute all the commands in a container.
|
22
|
+
|
23
|
+
Ensure that the local compute has enough resources to finish all your jobs.
|
24
|
+
|
25
|
+
The image of the run, could either be provided as default in the configuration of the execution engine
|
26
|
+
i.e.:
|
27
|
+
execution:
|
28
|
+
type: 'local-container'
|
29
|
+
config:
|
30
|
+
docker_image: the image you want the code to run in.
|
31
|
+
|
32
|
+
or default image could be over-ridden for a single node by providing a docker_image in the step config.
|
33
|
+
i.e:
|
34
|
+
dag:
|
35
|
+
steps:
|
36
|
+
step:
|
37
|
+
executor_config:
|
38
|
+
local-container:
|
39
|
+
docker_image: The image that you want that single step to run in.
|
40
|
+
This image would only be used for that step only.
|
41
|
+
|
42
|
+
This mode does not build the docker image with the latest code for you, it is still left for the user to build
|
43
|
+
and ensure that the docker image provided is the correct one.
|
44
|
+
|
45
|
+
Example config:
|
46
|
+
execution:
|
47
|
+
type: local-container
|
48
|
+
config:
|
49
|
+
docker_image: The default docker image to use if the node does not provide one.
|
50
|
+
"""
|
51
|
+
|
52
|
+
service_name: str = "local-container"
|
53
|
+
docker_image: str
|
54
|
+
auto_remove_container: bool = True
|
55
|
+
run_in_local: bool = False
|
56
|
+
environment: Dict[str, str] = Field(default_factory=dict)
|
57
|
+
|
58
|
+
_local: bool = False
|
59
|
+
|
60
|
+
_container_log_location = "/tmp/run_logs/"
|
61
|
+
_container_catalog_location = "/tmp/catalog/"
|
62
|
+
_container_secrets_location = "/tmp/dotenv"
|
63
|
+
_volumes: Dict[str, Dict[str, str]] = {}
|
64
|
+
|
65
|
+
def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
|
66
|
+
"""
|
67
|
+
Call the Base class to add the git code identity and add docker identity
|
68
|
+
|
69
|
+
Args:
|
70
|
+
node (BaseNode): The node we are adding the code identity
|
71
|
+
step_log (Object): The step log corresponding to the node
|
72
|
+
"""
|
73
|
+
|
74
|
+
super().add_code_identities(node, step_log)
|
75
|
+
|
76
|
+
if node.node_type in ["success", "fail"]:
|
77
|
+
# Need not add code identities if we are in a success or fail node
|
78
|
+
return
|
79
|
+
|
80
|
+
executor_config = self._resolve_executor_config(node)
|
81
|
+
|
82
|
+
docker_image = executor_config.get("docker_image", None)
|
83
|
+
if docker_image:
|
84
|
+
code_id = self._context.run_log_store.create_code_identity()
|
85
|
+
|
86
|
+
code_id.code_identifier = utils.get_local_docker_image_id(docker_image)
|
87
|
+
code_id.code_identifier_type = "docker"
|
88
|
+
code_id.code_identifier_dependable = True
|
89
|
+
code_id.code_identifier_url = "local docker host"
|
90
|
+
step_log.code_identities.append(code_id)
|
91
|
+
|
92
|
+
def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
93
|
+
"""
|
94
|
+
We are already in the container, we just execute the node.
|
95
|
+
The node is already prepared for execution.
|
96
|
+
"""
|
97
|
+
return self._execute_node(node, map_variable, **kwargs)
|
98
|
+
|
99
|
+
def execute_job(self, node: TaskNode):
|
100
|
+
"""
|
101
|
+
Set up the step log and call the execute node
|
102
|
+
|
103
|
+
Args:
|
104
|
+
node (BaseNode): _description_
|
105
|
+
"""
|
106
|
+
|
107
|
+
step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable=None))
|
108
|
+
|
109
|
+
self.add_code_identities(node=node, step_log=step_log)
|
110
|
+
|
111
|
+
step_log.step_type = node.node_type
|
112
|
+
step_log.status = defaults.PROCESSING
|
113
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
114
|
+
|
115
|
+
command = utils.get_job_execution_command(node)
|
116
|
+
self._spin_container(node=node, command=command)
|
117
|
+
|
118
|
+
# Check the step log status and warn if necessary. Docker errors are generally suppressed.
|
119
|
+
step_log = self._context.run_log_store.get_step_log(
|
120
|
+
node._get_step_log_name(map_variable=None), self._context.run_id
|
121
|
+
)
|
122
|
+
if step_log.status != defaults.SUCCESS:
|
123
|
+
msg = (
|
124
|
+
"Node execution inside the container failed. Please check the logs.\n"
|
125
|
+
"Note: If you do not see any docker issue from your side and the code works properly on local execution"
|
126
|
+
"please raise a bug report."
|
127
|
+
)
|
128
|
+
logger.warning(msg)
|
129
|
+
|
130
|
+
def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
131
|
+
"""
|
132
|
+
We come into this step via execute from graph, use trigger job to spin up the container.
|
133
|
+
|
134
|
+
|
135
|
+
If the config has "run_in_local: True", we compute it on local system instead of container.
|
136
|
+
In local container execution, we just spin the container to execute runnable execute_single_node.
|
137
|
+
|
138
|
+
Args:
|
139
|
+
node (BaseNode): The node we are currently executing
|
140
|
+
map_variable (str, optional): If the node is part of the map branch. Defaults to ''.
|
141
|
+
"""
|
142
|
+
executor_config = self._resolve_executor_config(node)
|
143
|
+
auto_remove_container = executor_config.get("auto_remove_container", True)
|
144
|
+
|
145
|
+
logger.debug("Here is the resolved executor config")
|
146
|
+
logger.debug(executor_config)
|
147
|
+
|
148
|
+
if executor_config.get("run_in_local", False):
|
149
|
+
# Do not change config but only validate the configuration.
|
150
|
+
# Trigger the job on local system instead of a container
|
151
|
+
integration.validate(self, self._context.run_log_store)
|
152
|
+
integration.validate(self, self._context.catalog_handler)
|
153
|
+
integration.validate(self, self._context.secrets_handler)
|
154
|
+
|
155
|
+
self.execute_node(node=node, map_variable=map_variable, **kwargs)
|
156
|
+
return
|
157
|
+
|
158
|
+
command = utils.get_node_execution_command(node, map_variable=map_variable)
|
159
|
+
|
160
|
+
self._spin_container(
|
161
|
+
node=node,
|
162
|
+
command=command,
|
163
|
+
map_variable=map_variable,
|
164
|
+
auto_remove_container=auto_remove_container,
|
165
|
+
**kwargs,
|
166
|
+
)
|
167
|
+
|
168
|
+
step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(map_variable), self._context.run_id)
|
169
|
+
if step_log.status != defaults.SUCCESS:
|
170
|
+
msg = (
|
171
|
+
"Node execution inside the container failed. Please check the logs.\n"
|
172
|
+
"Note: If you do not see any docker issue from your side and the code works properly on local execution"
|
173
|
+
"please raise a bug report."
|
174
|
+
)
|
175
|
+
logger.warning(msg)
|
176
|
+
step_log.status = defaults.FAIL
|
177
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
178
|
+
|
179
|
+
def _spin_container(
|
180
|
+
self,
|
181
|
+
node: BaseNode,
|
182
|
+
command: str,
|
183
|
+
map_variable: TypeMapVariable = None,
|
184
|
+
auto_remove_container: bool = True,
|
185
|
+
**kwargs,
|
186
|
+
):
|
187
|
+
"""
|
188
|
+
During the flow run, we have to spin up a container with the docker image mentioned
|
189
|
+
and the right log locations
|
190
|
+
"""
|
191
|
+
# Conditional import
|
192
|
+
import docker # pylint: disable=C0415
|
193
|
+
|
194
|
+
try:
|
195
|
+
client = docker.from_env()
|
196
|
+
api_client = docker.APIClient()
|
197
|
+
except Exception as ex:
|
198
|
+
logger.exception("Could not get access to docker")
|
199
|
+
raise Exception("Could not get the docker socket file, do you have docker installed?") from ex
|
200
|
+
|
201
|
+
try:
|
202
|
+
logger.info(f"Running the command {command}")
|
203
|
+
print(command)
|
204
|
+
# Overrides global config with local
|
205
|
+
executor_config = self._resolve_executor_config(node)
|
206
|
+
|
207
|
+
docker_image = executor_config.get("docker_image", None)
|
208
|
+
environment = executor_config.get("environment", {})
|
209
|
+
environment.update(self._context.variables)
|
210
|
+
if not docker_image:
|
211
|
+
raise Exception(
|
212
|
+
f"Please provide a docker_image using executor_config of the step {node.name} or at global config"
|
213
|
+
)
|
214
|
+
|
215
|
+
# TODO: Should consider using getpass.getuser() when running the docker container? Volume permissions
|
216
|
+
container = client.containers.create(
|
217
|
+
image=docker_image,
|
218
|
+
command=command,
|
219
|
+
auto_remove=False,
|
220
|
+
volumes=self._volumes,
|
221
|
+
network_mode="host",
|
222
|
+
environment=environment,
|
223
|
+
)
|
224
|
+
|
225
|
+
# print(container.__dict__)
|
226
|
+
|
227
|
+
container.start()
|
228
|
+
stream = api_client.logs(container=container.id, timestamps=True, stream=True, follow=True)
|
229
|
+
while True:
|
230
|
+
try:
|
231
|
+
output = next(stream).decode("utf-8")
|
232
|
+
output = output.strip("\r\n")
|
233
|
+
logger.info(output)
|
234
|
+
print(output)
|
235
|
+
except StopIteration:
|
236
|
+
logger.info("Docker Run completed")
|
237
|
+
break
|
238
|
+
|
239
|
+
exit_status = api_client.inspect_container(container.id)["State"]["ExitCode"]
|
240
|
+
|
241
|
+
if auto_remove_container:
|
242
|
+
container.remove(force=True)
|
243
|
+
|
244
|
+
if exit_status != 0:
|
245
|
+
msg = f"Docker command failed with exit code {exit_status}"
|
246
|
+
raise Exception(msg)
|
247
|
+
|
248
|
+
except Exception as _e:
|
249
|
+
logger.exception("Problems with spinning/running the container")
|
250
|
+
raise _e
|
251
|
+
|
252
|
+
|
253
|
+
class LocalContainerComputeFileSystemRunLogstore(BaseIntegration):
|
254
|
+
"""
|
255
|
+
Integration between local container and file system run log store
|
256
|
+
"""
|
257
|
+
|
258
|
+
executor_type = "local-container"
|
259
|
+
service_type = "run_log_store" # One of secret, catalog, datastore
|
260
|
+
service_provider = "file-system" # The actual implementation of the service
|
261
|
+
|
262
|
+
def configure_for_traversal(self, **kwargs):
|
263
|
+
from runnable.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore
|
264
|
+
|
265
|
+
self.executor = cast(LocalContainerExecutor, self.executor)
|
266
|
+
self.service = cast(FileSystemRunLogstore, self.service)
|
267
|
+
|
268
|
+
write_to = self.service.log_folder_name
|
269
|
+
self.executor._volumes[str(Path(write_to).resolve())] = {
|
270
|
+
"bind": f"{self.executor._container_log_location}",
|
271
|
+
"mode": "rw",
|
272
|
+
}
|
273
|
+
|
274
|
+
def configure_for_execution(self, **kwargs):
|
275
|
+
from runnable.extensions.run_log_store.file_system.implementation import FileSystemRunLogstore
|
276
|
+
|
277
|
+
self.executor = cast(LocalContainerExecutor, self.executor)
|
278
|
+
self.service = cast(FileSystemRunLogstore, self.service)
|
279
|
+
|
280
|
+
self.service.log_folder = self.executor._container_log_location
|
281
|
+
|
282
|
+
|
283
|
+
class LocalContainerComputeFileSystemCatalog(BaseIntegration):
|
284
|
+
"""
|
285
|
+
Integration pattern between Local container and File System catalog
|
286
|
+
"""
|
287
|
+
|
288
|
+
executor_type = "local-container"
|
289
|
+
service_type = "catalog" # One of secret, catalog, datastore
|
290
|
+
service_provider = "file-system" # The actual implementation of the service
|
291
|
+
|
292
|
+
def configure_for_traversal(self, **kwargs):
|
293
|
+
from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog
|
294
|
+
|
295
|
+
self.executor = cast(LocalContainerExecutor, self.executor)
|
296
|
+
self.service = cast(FileSystemCatalog, self.service)
|
297
|
+
|
298
|
+
catalog_location = self.service.catalog_location
|
299
|
+
self.executor._volumes[str(Path(catalog_location).resolve())] = {
|
300
|
+
"bind": f"{self.executor._container_catalog_location}",
|
301
|
+
"mode": "rw",
|
302
|
+
}
|
303
|
+
|
304
|
+
def configure_for_execution(self, **kwargs):
|
305
|
+
from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog
|
306
|
+
|
307
|
+
self.executor = cast(LocalContainerExecutor, self.executor)
|
308
|
+
self.service = cast(FileSystemCatalog, self.service)
|
309
|
+
|
310
|
+
self.service.catalog_location = self.executor._container_catalog_location
|
311
|
+
|
312
|
+
|
313
|
+
class LocalContainerComputeDotEnvSecrets(BaseIntegration):
|
314
|
+
"""
|
315
|
+
Integration between local container and dot env secrets
|
316
|
+
"""
|
317
|
+
|
318
|
+
executor_type = "local-container"
|
319
|
+
service_type = "secrets" # One of secret, catalog, datastore
|
320
|
+
service_provider = "dotenv" # The actual implementation of the service
|
321
|
+
|
322
|
+
def validate(self, **kwargs):
|
323
|
+
logger.warning("Using dot env for non local deployments is not ideal, consider options")
|
324
|
+
|
325
|
+
def configure_for_traversal(self, **kwargs):
|
326
|
+
from runnable.extensions.secrets.dotenv.implementation import DotEnvSecrets
|
327
|
+
|
328
|
+
self.executor = cast(LocalContainerExecutor, self.executor)
|
329
|
+
self.service = cast(DotEnvSecrets, self.service)
|
330
|
+
|
331
|
+
secrets_location = self.service.secrets_location
|
332
|
+
self.executor._volumes[str(Path(secrets_location).resolve())] = {
|
333
|
+
"bind": f"{self.executor._container_secrets_location}",
|
334
|
+
"mode": "ro",
|
335
|
+
}
|
336
|
+
|
337
|
+
def configure_for_execution(self, **kwargs):
|
338
|
+
from runnable.extensions.secrets.dotenv.implementation import DotEnvSecrets
|
339
|
+
|
340
|
+
self.executor = cast(LocalContainerExecutor, self.executor)
|
341
|
+
self.service = cast(DotEnvSecrets, self.service)
|
342
|
+
|
343
|
+
self.service.location = self.executor._container_secrets_location
|
344
|
+
|
345
|
+
|
346
|
+
class LocalContainerComputeEnvSecretsManager(BaseIntegration):
|
347
|
+
"""
|
348
|
+
Integration between local container and env secrets manager
|
349
|
+
"""
|
350
|
+
|
351
|
+
executor_type = "local-container"
|
352
|
+
service_type = "secrets" # One of secret, catalog, datastore
|
353
|
+
service_provider = "env-secrets-manager" # The actual implementation of the service
|
354
|
+
|
355
|
+
def validate(self, **kwargs):
|
356
|
+
msg = (
|
357
|
+
"Local container executions cannot be used with environment secrets manager. "
|
358
|
+
"Please use a supported secrets manager"
|
359
|
+
)
|
360
|
+
logger.exception(msg)
|
361
|
+
raise Exception(msg)
|
File without changes
|
@@ -0,0 +1,189 @@
|
|
1
|
+
import copy
|
2
|
+
import logging
|
3
|
+
from typing import Any, Dict, Type, cast
|
4
|
+
|
5
|
+
from pydantic import ConfigDict, Field
|
6
|
+
|
7
|
+
from runnable import context, defaults
|
8
|
+
from runnable.defaults import TypeMapVariable
|
9
|
+
from runnable.extensions.executor import GenericExecutor
|
10
|
+
from runnable.extensions.nodes import TaskNode
|
11
|
+
from runnable.nodes import BaseNode
|
12
|
+
from runnable.tasks import BaseTaskType
|
13
|
+
|
14
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
15
|
+
|
16
|
+
|
17
|
+
def create_executable(params: Dict[str, Any], model: Type[BaseTaskType], node_name: str) -> BaseTaskType:
|
18
|
+
class EasyModel(model): # type: ignore
|
19
|
+
model_config = ConfigDict(extra="ignore")
|
20
|
+
|
21
|
+
swallow_all = EasyModel(**params, node_name=node_name)
|
22
|
+
return swallow_all
|
23
|
+
|
24
|
+
|
25
|
+
class MockedExecutor(GenericExecutor):
|
26
|
+
service_name: str = "mocked"
|
27
|
+
_local_executor: bool = True
|
28
|
+
|
29
|
+
patches: Dict[str, Any] = Field(default_factory=dict)
|
30
|
+
|
31
|
+
@property
|
32
|
+
def _context(self):
|
33
|
+
return context.run_context
|
34
|
+
|
35
|
+
def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
|
36
|
+
raise Exception("MockedExecutor does not support re-run")
|
37
|
+
|
38
|
+
def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
39
|
+
"""
|
40
|
+
This is the entry point to from the graph execution.
|
41
|
+
|
42
|
+
While the self.execute_graph is responsible for traversing the graph, this function is responsible for
|
43
|
+
actual execution of the node.
|
44
|
+
|
45
|
+
If the node type is:
|
46
|
+
* task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
|
47
|
+
* success: We can delegate to _execute_node
|
48
|
+
* fail: We can delegate to _execute_node
|
49
|
+
|
50
|
+
For nodes that are internally graphs:
|
51
|
+
* parallel: Delegate the responsibility of execution to the node.execute_as_graph()
|
52
|
+
* dag: Delegate the responsibility of execution to the node.execute_as_graph()
|
53
|
+
* map: Delegate the responsibility of execution to the node.execute_as_graph()
|
54
|
+
|
55
|
+
Transpilers will NEVER use this method and will NEVER call ths method.
|
56
|
+
This method should only be used by interactive executors.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
node (Node): The node to execute
|
60
|
+
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
|
61
|
+
Defaults to None.
|
62
|
+
"""
|
63
|
+
step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable))
|
64
|
+
|
65
|
+
self.add_code_identities(node=node, step_log=step_log)
|
66
|
+
|
67
|
+
step_log.step_type = node.node_type
|
68
|
+
step_log.status = defaults.PROCESSING
|
69
|
+
|
70
|
+
# Add the step log to the database as per the situation.
|
71
|
+
# If its a terminal node, complete it now
|
72
|
+
if node.node_type in ["success", "fail"]:
|
73
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
74
|
+
self._execute_node(node, map_variable=map_variable, **kwargs)
|
75
|
+
return
|
76
|
+
|
77
|
+
# We call an internal function to iterate the sub graphs and execute them
|
78
|
+
if node.is_composite:
|
79
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
80
|
+
node.execute_as_graph(map_variable=map_variable, **kwargs)
|
81
|
+
return
|
82
|
+
|
83
|
+
node_to_send: TaskNode = cast(TaskNode, node).model_copy(deep=True)
|
84
|
+
if node.name not in self.patches:
|
85
|
+
# node is not patched, so mock it
|
86
|
+
step_log.mock = True
|
87
|
+
else:
|
88
|
+
# node is mocked, change the executable to python with the
|
89
|
+
# command as the patch value
|
90
|
+
executable_type = node_to_send.executable.__class__
|
91
|
+
executable = create_executable(
|
92
|
+
self.patches[node.name],
|
93
|
+
executable_type,
|
94
|
+
node_name=node.name,
|
95
|
+
)
|
96
|
+
node_to_send.executable = executable
|
97
|
+
pass
|
98
|
+
|
99
|
+
# Executor specific way to trigger a job
|
100
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
101
|
+
self.trigger_job(node=node_to_send, map_variable=map_variable, **kwargs)
|
102
|
+
|
103
|
+
def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
104
|
+
"""
|
105
|
+
Call this method only if we are responsible for traversing the graph via
|
106
|
+
execute_from_graph().
|
107
|
+
|
108
|
+
We are not prepared to execute node as of now.
|
109
|
+
|
110
|
+
Args:
|
111
|
+
node (BaseNode): The node to execute
|
112
|
+
map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
|
113
|
+
Defaults to ''.
|
114
|
+
|
115
|
+
NOTE: We do not raise an exception as this method is not required by many extensions
|
116
|
+
"""
|
117
|
+
self.prepare_for_node_execution()
|
118
|
+
self.execute_node(node=node, map_variable=map_variable, **kwargs)
|
119
|
+
|
120
|
+
# TODO: This needs to go away
|
121
|
+
def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
122
|
+
"""
|
123
|
+
In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
|
124
|
+
necessary.
|
125
|
+
* True: If its not a re-run.
|
126
|
+
* True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
|
127
|
+
* False: If its a re-run and we succeeded in the last run.
|
128
|
+
|
129
|
+
Most cases, this logic need not be touched
|
130
|
+
|
131
|
+
Args:
|
132
|
+
node (Node): The node to check against re-run
|
133
|
+
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
|
134
|
+
Defaults to None.
|
135
|
+
|
136
|
+
Returns:
|
137
|
+
bool: Eligibility for re-run. True means re-run, False means skip to the next step.
|
138
|
+
"""
|
139
|
+
return True
|
140
|
+
|
141
|
+
def _resolve_executor_config(self, node: BaseNode):
|
142
|
+
"""
|
143
|
+
The overrides section can contain specific over-rides to an global executor config.
|
144
|
+
To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
|
145
|
+
The nodes can over-ride the global config by referring to key in the overrides.
|
146
|
+
|
147
|
+
This function also applies variables to the effective node config.
|
148
|
+
|
149
|
+
For example:
|
150
|
+
# configuration.yaml
|
151
|
+
execution:
|
152
|
+
type: cloud-implementation
|
153
|
+
config:
|
154
|
+
k1: v1
|
155
|
+
k3: v3
|
156
|
+
overrides:
|
157
|
+
custom_config:
|
158
|
+
k1: v11
|
159
|
+
k2: v2 # Could be a mapping internally.
|
160
|
+
|
161
|
+
# in pipeline definition.yaml
|
162
|
+
dag:
|
163
|
+
steps:
|
164
|
+
step1:
|
165
|
+
overrides:
|
166
|
+
cloud-implementation: custom_config
|
167
|
+
|
168
|
+
This method should resolve the node_config to {'k1': 'v11', 'k2': 'v2', 'k3': 'v3'}
|
169
|
+
|
170
|
+
Args:
|
171
|
+
node (BaseNode): The current node being processed.
|
172
|
+
|
173
|
+
"""
|
174
|
+
effective_node_config = copy.deepcopy(self.model_dump())
|
175
|
+
|
176
|
+
return effective_node_config
|
177
|
+
|
178
|
+
def execute_job(self, node: TaskNode):
|
179
|
+
pass
|
180
|
+
|
181
|
+
def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
182
|
+
"""
|
183
|
+
For local execution, we just execute the node.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
node (BaseNode): _description_
|
187
|
+
map_variable (dict[str, str], optional): _description_. Defaults to None.
|
188
|
+
"""
|
189
|
+
self._execute_node(node=node, map_variable=map_variable, **kwargs)
|
File without changes
|
File without changes
|
@@ -0,0 +1,94 @@
|
|
1
|
+
import functools
|
2
|
+
import logging
|
3
|
+
from typing import Any, Union
|
4
|
+
|
5
|
+
from pydantic import ConfigDict, PrivateAttr
|
6
|
+
|
7
|
+
from runnable import defaults
|
8
|
+
from runnable.experiment_tracker import BaseExperimentTracker
|
9
|
+
|
10
|
+
logger = logging.getLogger(defaults.NAME)
|
11
|
+
|
12
|
+
|
13
|
+
class MLFlowExperimentTracker(BaseExperimentTracker):
|
14
|
+
"""
|
15
|
+
A MLFlow experiment tracker.
|
16
|
+
|
17
|
+
TODO: Need to set up credentials from secrets
|
18
|
+
"""
|
19
|
+
|
20
|
+
service_name: str = "mlflow"
|
21
|
+
|
22
|
+
server_url: str
|
23
|
+
autolog: bool = False
|
24
|
+
|
25
|
+
_default_experiment_name: str = PrivateAttr(default="Default")
|
26
|
+
_active_run_id: str = PrivateAttr(default="")
|
27
|
+
_client: Any = PrivateAttr(default=None)
|
28
|
+
|
29
|
+
model_config = ConfigDict(extra="forbid")
|
30
|
+
|
31
|
+
def model_post_init(self, __context: Any) -> None:
|
32
|
+
try:
|
33
|
+
import mlflow
|
34
|
+
except ImportError:
|
35
|
+
raise Exception("You need to install mlflow to use MLFlowExperimentTracker.")
|
36
|
+
|
37
|
+
self._client = mlflow
|
38
|
+
|
39
|
+
self._client.set_tracking_uri(self.server_url)
|
40
|
+
|
41
|
+
if self.autolog:
|
42
|
+
self._client.autolog(log_models=False)
|
43
|
+
|
44
|
+
@functools.cached_property
|
45
|
+
def experiment_id(self):
|
46
|
+
experiment_name = self._default_experiment_name
|
47
|
+
|
48
|
+
# If a tag is provided, we should create that as our experiment
|
49
|
+
if self._context.tag:
|
50
|
+
experiment_name = self._context.tag
|
51
|
+
|
52
|
+
experiment = self._client.get_experiment_by_name(experiment_name)
|
53
|
+
if not experiment:
|
54
|
+
# Create the experiment and get it.
|
55
|
+
experiment = self._client.create_experiment(experiment_name)
|
56
|
+
experiment = self._client.get_experiment(experiment)
|
57
|
+
|
58
|
+
return experiment.experiment_id
|
59
|
+
|
60
|
+
@functools.cached_property
|
61
|
+
def run_name(self):
|
62
|
+
return self._context.run_id
|
63
|
+
|
64
|
+
@property
|
65
|
+
def client_context(self):
|
66
|
+
if self._active_run_id:
|
67
|
+
return self._client.start_run(
|
68
|
+
run_id=self._active_run_id, experiment_id=self.experiment_id, run_name=self.run_name
|
69
|
+
)
|
70
|
+
|
71
|
+
active_run = self._client.start_run(run_name=self.run_name, experiment_id=self.experiment_id)
|
72
|
+
self._active_run_id = active_run.info.run_id
|
73
|
+
return active_run
|
74
|
+
|
75
|
+
def log_metric(self, key: str, value: Union[int, float], step: int = 0):
|
76
|
+
"""
|
77
|
+
Sets the metric in the experiment tracking.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
key (str): The key against you want to store the value
|
81
|
+
value (Any): The value of the metric
|
82
|
+
"""
|
83
|
+
if not isinstance(value, float) or isinstance(value, int):
|
84
|
+
msg = f"Only float/int values are accepted as metrics. Setting the metric {key} as parameter {key}_{step}"
|
85
|
+
logger.warning(msg)
|
86
|
+
self.log_parameter(key=key, value=value, step=step)
|
87
|
+
return
|
88
|
+
|
89
|
+
with self.client_context as _:
|
90
|
+
self._client.log_metric(key, float(value), step=step or None)
|
91
|
+
|
92
|
+
def log_parameter(self, key: str, value: Any, step: int = 0):
|
93
|
+
with self.client_context as _:
|
94
|
+
self._client.log_param(key + f"_{str(step)}", value)
|