runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
runnable/context.py ADDED
@@ -0,0 +1,730 @@
1
+ import contextvars
2
+ import hashlib
3
+ import importlib
4
+ import json
5
+ import logging
6
+ import os
7
+ import sys
8
+ from datetime import datetime
9
+ from enum import Enum
10
+ from functools import cached_property, partial
11
+ from typing import TYPE_CHECKING, Annotated, Any, Callable, Dict, Optional
12
+
13
+ import logfire_api as logfire
14
+ from pydantic import (
15
+ BaseModel,
16
+ BeforeValidator,
17
+ ConfigDict,
18
+ Field,
19
+ computed_field,
20
+ field_validator,
21
+ )
22
+ from stevedore import driver
23
+
24
+ from runnable import console, defaults, exceptions, names, utils
25
+ from runnable.catalog import BaseCatalog
26
+ from runnable.datastore import BaseRunLogStore
27
+ from runnable.executor import BaseJobExecutor, BasePipelineExecutor
28
+ from runnable.graph import Graph, create_graph
29
+ from runnable.nodes import BaseNode
30
+ from runnable.pickler import BasePickler
31
+ from runnable.secrets import BaseSecrets
32
+ from runnable.tasks import BaseTaskType
33
+
34
+
35
+ logger = logging.getLogger(defaults.LOGGER_NAME)
36
+
37
+
38
+ def get_pipeline_spec_from_yaml(pipeline_file: str) -> Graph:
39
+ """
40
+ Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
41
+ """
42
+ pipeline_config = utils.load_yaml(pipeline_file)
43
+ logger.info("The input pipeline:")
44
+ logger.info(json.dumps(pipeline_config, indent=4))
45
+
46
+ dag_config = pipeline_config["dag"]
47
+
48
+ dag = create_graph(dag_config)
49
+ return dag
50
+
51
+
52
+ def get_pipeline_spec_from_python(python_module: str) -> Graph:
53
+ # Call the SDK to get the dag
54
+ # Import the module and call the function to get the dag
55
+ module_file = python_module.rstrip(".py")
56
+ module, func = utils.get_module_and_attr_names(module_file)
57
+ sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
58
+ imported_module = importlib.import_module(module)
59
+
60
+ dag = getattr(imported_module, func)().return_dag()
61
+
62
+ return dag
63
+
64
+
65
+ def get_job_spec_from_python(
66
+ job_file: str,
67
+ ) -> tuple[BaseTaskType, list[str]]:
68
+ """
69
+ Reads the job file from a Python file and sets the job spec in the run context
70
+ """
71
+ from runnable.sdk import BaseJob
72
+
73
+ # Import the module and call the function to get the job
74
+ module_file = job_file.rstrip(".py")
75
+ module, func = utils.get_module_and_attr_names(module_file)
76
+ sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
77
+ imported_module = importlib.import_module(module)
78
+
79
+ job: BaseJob = getattr(imported_module, func)()
80
+
81
+ return job.get_task(), job.catalog.put if job.catalog else []
82
+
83
+
84
+ def get_service_by_name(namespace: str, service_config: dict[str, Any], _) -> Any: # noqa: ANN401, ANN001
85
+ """Get the service by name."""
86
+ service_config = service_config.copy()
87
+
88
+ kind = service_config.pop("type", None)
89
+
90
+ if "config" in service_config:
91
+ service_config = service_config.get("config", {})
92
+
93
+ logger.debug(
94
+ f"Trying to get a service of {namespace} with config: {service_config}"
95
+ )
96
+ try:
97
+ mgr: driver.DriverManager = driver.DriverManager(
98
+ namespace=namespace, # eg: reader
99
+ name=kind, # eg: csv, pdf
100
+ invoke_on_load=True,
101
+ invoke_kwds={**service_config},
102
+ )
103
+ return mgr.driver
104
+ except Exception as _e:
105
+ raise Exception(
106
+ f"Could not find the service of kind: {kind} in namespace:{namespace} with config: {service_config}"
107
+ ) from _e
108
+
109
+
110
+ def get_service(service: str) -> Callable:
111
+ """Get the service by name.
112
+
113
+ Args:
114
+ service (str): service name.
115
+
116
+ Returns:
117
+ Callable: callable function of service.
118
+ """
119
+ return partial(get_service_by_name, service)
120
+
121
+
122
+ InstantiatedCatalog = Annotated[BaseCatalog, BeforeValidator(get_service("catalog"))]
123
+ InstantiatedSecrets = Annotated[BaseSecrets, BeforeValidator(get_service("secrets"))]
124
+ InstantiatedPickler = Annotated[BasePickler, BeforeValidator(get_service("pickler"))]
125
+ InstantiatedRunLogStore = Annotated[
126
+ BaseRunLogStore, BeforeValidator(get_service("run_log_store"))
127
+ ]
128
+ InstantiatedPipelineExecutor = Annotated[
129
+ BasePipelineExecutor, BeforeValidator(get_service("pipeline_executor"))
130
+ ]
131
+ InstantiatedJobExecutor = Annotated[
132
+ BaseJobExecutor, BeforeValidator(get_service("job_executor"))
133
+ ]
134
+
135
+
136
+ class ExecutionMode(str, Enum):
137
+ YAML = "yaml"
138
+ PYTHON = "python"
139
+
140
+
141
+ class ExecutionContext(str, Enum):
142
+ PIPELINE = "pipeline"
143
+ JOB = "job"
144
+
145
+
146
+ class ServiceConfigurations(BaseModel):
147
+ configuration_file: Optional[str] = Field(
148
+ default=None, exclude=True, description="Path to the configuration file."
149
+ )
150
+ execution_context: ExecutionContext = ExecutionContext.PIPELINE
151
+ variables: dict[str, str] = Field(
152
+ default_factory=utils.gather_variables,
153
+ exclude=True,
154
+ description="Variables to be used.",
155
+ )
156
+
157
+ @field_validator("configuration_file", mode="before")
158
+ @classmethod
159
+ def override_configuration_file(cls, configuration_file: str | None) -> str | None:
160
+ """Determine the configuration file to use, following the order of precedence."""
161
+ # 1. Environment variable
162
+ env_config = os.environ.get(defaults.RUNNABLE_CONFIGURATION_FILE)
163
+ if env_config:
164
+ return env_config
165
+
166
+ # 2. User-provided at runtime
167
+ if configuration_file:
168
+ return configuration_file
169
+
170
+ # 3. Default user config file
171
+ if utils.does_file_exist(defaults.USER_CONFIG_FILE):
172
+ return defaults.USER_CONFIG_FILE
173
+
174
+ # 4. No config file
175
+ return None
176
+
177
+ @computed_field # type: ignore
178
+ @property
179
+ def services(self) -> dict[str, Any]:
180
+ """Get the effective services"""
181
+ _services = defaults.DEFAULT_SERVICES.copy()
182
+
183
+ if not self.configuration_file:
184
+ return _services
185
+
186
+ # Load the configuration file
187
+ templated_config = utils.load_yaml(self.configuration_file)
188
+ config = utils.apply_variables(templated_config, self.variables)
189
+
190
+ for key, value in config.items():
191
+ _services[key.replace("-", "_")] = value
192
+
193
+ if self.execution_context == ExecutionContext.JOB:
194
+ _services.pop("pipeline_executor", None)
195
+ elif self.execution_context == ExecutionContext.PIPELINE:
196
+ _services.pop("job_executor", None)
197
+ else:
198
+ raise ValueError(
199
+ f"Invalid execution context: {self.execution_context}. Must be 'pipeline' or 'job'."
200
+ )
201
+
202
+ return _services
203
+
204
+
205
+ class RunnableContext(BaseModel):
206
+ model_config = ConfigDict(use_enum_values=True, loc_by_alias=True)
207
+
208
+ execution_mode: ExecutionMode = ExecutionMode.PYTHON
209
+
210
+ parameters_file: Optional[str] = Field(
211
+ default=None, exclude=True, description="Path to the parameters file."
212
+ )
213
+ configuration_file: Optional[str] = Field(
214
+ default=None, exclude=True, description="Path to the configuration file."
215
+ )
216
+ variables: dict[str, str] = Field(
217
+ default_factory=utils.gather_variables,
218
+ exclude=True,
219
+ description="Variables to be used.",
220
+ )
221
+ run_id: str = Field(
222
+ description="The run ID, generated automatically if not provided"
223
+ )
224
+ tag: Optional[str] = Field(default=None, description="Tag to be used for the run.")
225
+
226
+ # TODO: Verify the design
227
+ object_serialisation: bool = (
228
+ True # Should be validated against executor being local
229
+ )
230
+ return_objects: Dict[
231
+ str, Any
232
+ ] = {} # Should be validated against executor being local, should this be here?
233
+
234
+ @field_validator("parameters_file", mode="before")
235
+ @classmethod
236
+ def override_parameters_file(cls, parameters_file: str) -> str:
237
+ """Override the parameters file if provided."""
238
+ if os.environ.get(defaults.RUNNABLE_PARAMETERS_FILE, None):
239
+ return os.environ.get(defaults.RUNNABLE_PARAMETERS_FILE, parameters_file)
240
+ return parameters_file
241
+
242
+ @field_validator("configuration_file", mode="before")
243
+ @classmethod
244
+ def override_configuration_file(cls, configuration_file: str) -> str:
245
+ """Override the configuration file if provided."""
246
+ return os.environ.get(defaults.RUNNABLE_CONFIGURATION_FILE, configuration_file)
247
+
248
+ @field_validator("run_id", mode="before")
249
+ @classmethod
250
+ def generate_run_id(cls, run_id: str) -> str:
251
+ """Generate a run id if not provided."""
252
+ # Convert None to empty string for consistency
253
+ if run_id is None:
254
+ run_id = ""
255
+
256
+ # Check for retry run id first - this takes precedence
257
+ retry_run_id = os.environ.get(defaults.RETRY_RUN_ID, "")
258
+ if retry_run_id:
259
+ return retry_run_id
260
+
261
+ if not run_id:
262
+ run_id = os.environ.get(defaults.ENV_RUN_ID, "")
263
+
264
+ # If both are not given, generate one
265
+ if not run_id:
266
+ now = datetime.now()
267
+ run_id = f"{names.get_random_name()}-{now.hour:02}{now.minute:02}"
268
+
269
+ return run_id
270
+
271
+ @computed_field # type: ignore
272
+ @property
273
+ def retry_indicator(self) -> str:
274
+ """Indicator for retry executions to distinguish attempt logs."""
275
+ return os.environ.get(defaults.RETRY_INDICATOR, "")
276
+
277
+ @computed_field # type: ignore
278
+ @property
279
+ def is_retry(self) -> bool:
280
+ """Flag indicating if this is a retry run based on environment variable."""
281
+ retry_run_id = os.environ.get(defaults.RETRY_RUN_ID, "")
282
+ return bool(retry_run_id)
283
+
284
+ def model_post_init(self, __context: Any) -> None:
285
+ os.environ[defaults.ENV_RUN_ID] = self.run_id
286
+
287
+ if self.configuration_file:
288
+ os.environ[defaults.RUNNABLE_CONFIGURATION_FILE] = self.configuration_file
289
+ if self.tag:
290
+ os.environ[defaults.RUNNABLE_RUN_TAG] = self.tag
291
+
292
+ # Set the context using contextvars for proper isolation
293
+ set_run_context(self)
294
+
295
+ def execute(self):
296
+ "Execute the pipeline or the job"
297
+ raise NotImplementedError
298
+
299
+
300
+ class PipelineContext(RunnableContext):
301
+ pipeline_executor: InstantiatedPipelineExecutor
302
+ catalog: InstantiatedCatalog
303
+ secrets: InstantiatedSecrets
304
+ pickler: InstantiatedPickler
305
+ run_log_store: InstantiatedRunLogStore
306
+
307
+ pipeline_definition_file: str
308
+
309
+ @computed_field # type: ignore
310
+ @cached_property
311
+ def dag(self) -> Graph | None:
312
+ """Get the dag."""
313
+ if self.execution_mode == ExecutionMode.YAML:
314
+ return get_pipeline_spec_from_yaml(self.pipeline_definition_file)
315
+ elif self.execution_mode == ExecutionMode.PYTHON:
316
+ return get_pipeline_spec_from_python(self.pipeline_definition_file)
317
+ else:
318
+ raise ValueError(
319
+ f"Invalid execution mode: {self.execution_mode}. Must be 'yaml' or 'python'."
320
+ )
321
+
322
+ @computed_field # type: ignore
323
+ @cached_property
324
+ def dag_hash(self) -> str:
325
+ dag = self.dag
326
+ if not dag:
327
+ return ""
328
+ dag_str = json.dumps(dag.model_dump(), sort_keys=True, ensure_ascii=True)
329
+ return hashlib.sha1(dag_str.encode("utf-8")).hexdigest()
330
+
331
+ def get_node_callable_command(
332
+ self,
333
+ node: BaseNode,
334
+ iter_variable: defaults.IterableParameterModel | None = None,
335
+ over_write_run_id: str = "",
336
+ log_level: str = "",
337
+ ) -> str:
338
+ run_id = self.run_id
339
+
340
+ if over_write_run_id:
341
+ run_id = over_write_run_id
342
+
343
+ log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
344
+
345
+ action = (
346
+ f"runnable execute-single-node {run_id} "
347
+ f"{self.pipeline_definition_file} "
348
+ f"{node._command_friendly_name()} "
349
+ f"--log-level {log_level} "
350
+ )
351
+
352
+ # yaml is the default mode
353
+ if self.execution_mode == ExecutionMode.PYTHON:
354
+ action = action + "--mode python "
355
+
356
+ if iter_variable:
357
+ action = action + f"--iter-variable '{iter_variable.model_dump_json()}' "
358
+
359
+ if self.configuration_file:
360
+ action = action + f"--config {self.configuration_file} "
361
+
362
+ if self.parameters_file:
363
+ action = action + f"--parameters-file {self.parameters_file} "
364
+
365
+ if self.tag:
366
+ action = action + f"--tag {self.tag}"
367
+
368
+ console.log(
369
+ f"Generated command for node {node._command_friendly_name()}: {action}"
370
+ )
371
+
372
+ return action
373
+
374
+ def get_fan_command(
375
+ self,
376
+ node: BaseNode,
377
+ mode: str,
378
+ run_id: str,
379
+ iter_variable: defaults.IterableParameterModel | None = None,
380
+ log_level: str = "",
381
+ ) -> str:
382
+ """
383
+ Return the fan "in or out" command for this pipeline context.
384
+
385
+ Args:
386
+ node (BaseNode): The composite node that we are fanning in or out
387
+ mode (str): "in" or "out"
388
+ map_variable (dict, optional): If the node is a map, we have the map variable. Defaults to None.
389
+ log_level (str, optional): Log level. Defaults to "".
390
+
391
+ Returns:
392
+ str: The fan in or out command
393
+ """
394
+ log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
395
+ action = (
396
+ f"runnable fan {run_id} "
397
+ f"{node._command_friendly_name()} "
398
+ f"{self.pipeline_definition_file} "
399
+ f"{mode} "
400
+ f"--log-level {log_level}"
401
+ )
402
+ if self.configuration_file:
403
+ action += f" --config-file {self.configuration_file}"
404
+ if self.parameters_file:
405
+ action += f" --parameters-file {self.parameters_file}"
406
+ if iter_variable:
407
+ action += f" --iter-variable '{iter_variable.model_dump_json()}'"
408
+ if self.execution_mode == ExecutionMode.PYTHON:
409
+ action += " --mode python"
410
+ if self.tag:
411
+ action += f" --tag {self.tag}"
412
+
413
+ console.log(
414
+ f"Generated command for fan {mode} for node {node._command_friendly_name()}: {action}"
415
+ )
416
+ return action
417
+
418
+ def execute(self):
419
+ assert self.dag is not None
420
+
421
+ pipeline_name = getattr(self.dag, "name", "unnamed")
422
+
423
+ with logfire.span(
424
+ "pipeline:{pipeline_name}",
425
+ pipeline_name=pipeline_name,
426
+ run_id=self.run_id,
427
+ executor=self.pipeline_executor.__class__.__name__,
428
+ ):
429
+ logfire.info("Pipeline execution started")
430
+
431
+ console.print("Working with context:")
432
+ console.print(get_run_context())
433
+ console.rule(style="[dark orange]")
434
+
435
+ # Prepare for graph execution
436
+ if self.pipeline_executor._should_setup_run_log_at_traversal:
437
+ self.pipeline_executor._set_up_run_log(exists_ok=False)
438
+
439
+ try:
440
+ self.pipeline_executor.execute_graph(dag=self.dag)
441
+ if not self.pipeline_executor._should_setup_run_log_at_traversal:
442
+ # non local executors just traverse the graph and do nothing
443
+ logfire.info("Pipeline submitted", status="submitted")
444
+ return {}
445
+
446
+ ctx = get_run_context()
447
+ assert ctx
448
+ assert isinstance(ctx, PipelineContext)
449
+ run_log = ctx.run_log_store.get_run_log_by_id(
450
+ run_id=ctx.run_id, full=False
451
+ )
452
+
453
+ if run_log.status == defaults.SUCCESS:
454
+ console.print(
455
+ "Pipeline executed successfully!", style=defaults.success_style
456
+ )
457
+ logfire.info("Pipeline completed", status="success")
458
+ else:
459
+ console.print(
460
+ "Pipeline execution failed.", style=defaults.error_style
461
+ )
462
+ logfire.error("Pipeline failed", status="failed")
463
+ raise exceptions.ExecutionFailedError(ctx.run_id)
464
+ except Exception as e: # noqa: E722
465
+ console.print(e, style=defaults.error_style)
466
+ logfire.error("Pipeline failed with exception", error=str(e)[:256])
467
+ raise
468
+
469
+ if self.pipeline_executor._should_setup_run_log_at_traversal:
470
+ ctx = get_run_context()
471
+ assert ctx
472
+ assert isinstance(ctx, PipelineContext)
473
+ return ctx.run_log_store.get_run_log_by_id(run_id=ctx.run_id)
474
+
475
+ def _handle_completion(self):
476
+ """Handle post-execution - shared by sync/async."""
477
+ ctx = get_run_context()
478
+ assert ctx
479
+ assert isinstance(ctx, PipelineContext)
480
+ run_log = ctx.run_log_store.get_run_log_by_id(run_id=ctx.run_id, full=False)
481
+
482
+ if run_log.status == defaults.SUCCESS:
483
+ console.print(
484
+ "Pipeline executed successfully!", style=defaults.success_style
485
+ )
486
+ logfire.info("Pipeline completed", status="success")
487
+ else:
488
+ console.print("Pipeline execution failed.", style=defaults.error_style)
489
+ logfire.error("Pipeline failed", status="failed")
490
+ raise exceptions.ExecutionFailedError(ctx.run_id)
491
+
492
+ async def execute_async(self):
493
+ """Async pipeline execution."""
494
+ assert self.dag is not None
495
+
496
+ pipeline_name = getattr(self.dag, "name", "unnamed")
497
+
498
+ with logfire.span(
499
+ "pipeline:{pipeline_name}",
500
+ pipeline_name=pipeline_name,
501
+ run_id=self.run_id,
502
+ executor=self.pipeline_executor.__class__.__name__,
503
+ ):
504
+ logfire.info("Async pipeline execution started")
505
+
506
+ console.print("Working with context:")
507
+ console.print(get_run_context())
508
+ console.rule(style="[dark orange]")
509
+
510
+ if self.pipeline_executor._should_setup_run_log_at_traversal:
511
+ self.pipeline_executor._set_up_run_log(exists_ok=False)
512
+
513
+ try:
514
+ await self.pipeline_executor.execute_graph_async(dag=self.dag)
515
+ self._handle_completion()
516
+
517
+ except Exception as e:
518
+ console.print(e, style=defaults.error_style)
519
+ logfire.error("Pipeline failed with exception", error=str(e)[:256])
520
+ raise
521
+
522
+ if self.pipeline_executor._should_setup_run_log_at_traversal:
523
+ ctx = get_run_context()
524
+ assert ctx
525
+ assert isinstance(ctx, PipelineContext)
526
+ return ctx.run_log_store.get_run_log_by_id(run_id=ctx.run_id)
527
+
528
+
529
+ class AsyncPipelineContext(RunnableContext):
530
+ """
531
+ Simplified context for async pipeline execution.
532
+
533
+ Unlike PipelineContext, this accepts the DAG directly rather than
534
+ introspecting from a file. This simplifies async execution since
535
+ we only support local executors for async pipelines.
536
+ """
537
+
538
+ pipeline_executor: InstantiatedPipelineExecutor
539
+ catalog: InstantiatedCatalog
540
+ secrets: InstantiatedSecrets
541
+ pickler: InstantiatedPickler
542
+ run_log_store: InstantiatedRunLogStore
543
+
544
+ # DAG is passed directly, not computed from a file
545
+ dag: Graph
546
+
547
+ @computed_field # type: ignore
548
+ @cached_property
549
+ def dag_hash(self) -> str:
550
+ dag = self.dag
551
+ if not dag:
552
+ return ""
553
+ dag_str = json.dumps(dag.model_dump(), sort_keys=True, ensure_ascii=True)
554
+ return hashlib.sha1(dag_str.encode("utf-8")).hexdigest()
555
+
556
+ async def execute_async(self):
557
+ """Async pipeline execution."""
558
+ assert self.dag is not None
559
+
560
+ pipeline_name = getattr(self.dag, "name", "unnamed")
561
+
562
+ with logfire.span(
563
+ "pipeline:{pipeline_name}",
564
+ pipeline_name=pipeline_name,
565
+ run_id=self.run_id,
566
+ executor=self.pipeline_executor.__class__.__name__,
567
+ ):
568
+ logfire.info("Async pipeline execution started")
569
+
570
+ console.print("Working with context:")
571
+ console.print(get_run_context())
572
+ console.rule(style="[dark orange]")
573
+
574
+ if self.pipeline_executor._should_setup_run_log_at_traversal:
575
+ self.pipeline_executor._set_up_run_log(exists_ok=False)
576
+
577
+ try:
578
+ await self.pipeline_executor.execute_graph_async(dag=self.dag)
579
+
580
+ run_log = self.run_log_store.get_run_log_by_id(
581
+ run_id=self.run_id, full=False
582
+ )
583
+
584
+ if run_log.status == defaults.SUCCESS:
585
+ console.print(
586
+ "Pipeline executed successfully!", style=defaults.success_style
587
+ )
588
+ logfire.info("Pipeline completed", status="success")
589
+ else:
590
+ console.print(
591
+ "Pipeline execution failed.", style=defaults.error_style
592
+ )
593
+ logfire.error("Pipeline failed", status="failed")
594
+ raise exceptions.ExecutionFailedError(self.run_id)
595
+
596
+ except Exception as e:
597
+ console.print(e, style=defaults.error_style)
598
+ logfire.error("Pipeline failed with exception", error=str(e)[:256])
599
+ raise
600
+
601
+ if self.pipeline_executor._should_setup_run_log_at_traversal:
602
+ return self.run_log_store.get_run_log_by_id(run_id=self.run_id)
603
+
604
+
605
+ class JobContext(RunnableContext):
606
+ job_executor: InstantiatedJobExecutor
607
+ catalog: InstantiatedCatalog
608
+ secrets: InstantiatedSecrets
609
+ pickler: InstantiatedPickler
610
+ run_log_store: InstantiatedRunLogStore
611
+
612
+ job_definition_file: str
613
+ catalog_store_copy: bool = Field(default=True, alias="catalog_store_copy")
614
+
615
+ @computed_field # type: ignore
616
+ @cached_property
617
+ def job(self) -> BaseTaskType:
618
+ job, _ = get_job_spec_from_python(self.job_definition_file)
619
+ return job
620
+
621
+ @computed_field # type: ignore
622
+ @cached_property
623
+ def catalog_settings(self) -> list[str] | None:
624
+ _, catalog_config = get_job_spec_from_python(self.job_definition_file)
625
+ return catalog_config
626
+
627
+ def get_job_callable_command(
628
+ self,
629
+ over_write_run_id: str = "",
630
+ ):
631
+ run_id = self.run_id
632
+
633
+ if over_write_run_id:
634
+ run_id = over_write_run_id
635
+
636
+ log_level = logging.getLevelName(logger.getEffectiveLevel())
637
+
638
+ action = (
639
+ f"runnable execute-job {self.job_definition_file} {run_id} "
640
+ f" --log-level {log_level}"
641
+ )
642
+
643
+ if self.configuration_file:
644
+ action = action + f" --config {self.configuration_file}"
645
+
646
+ if self.parameters_file:
647
+ action = action + f" --parameters {self.parameters_file}"
648
+
649
+ if self.tag:
650
+ action = action + f" --tag {self.tag}"
651
+
652
+ return action
653
+
654
+ def execute(self):
655
+ with logfire.span(
656
+ "job:{job_name}",
657
+ job_name=self.job_definition_file,
658
+ run_id=self.run_id,
659
+ executor=self.job_executor.__class__.__name__,
660
+ ):
661
+ logfire.info("Job execution started")
662
+
663
+ console.print("Working with context:")
664
+ console.print(get_run_context())
665
+ console.rule(style="[dark orange]")
666
+
667
+ try:
668
+ self.job_executor.submit_job(
669
+ job=self.job, catalog_settings=self.catalog_settings
670
+ )
671
+ logfire.info("Job submitted", status="submitted")
672
+ except Exception as e:
673
+ logfire.error("Job failed", error=str(e)[:256])
674
+ raise
675
+ finally:
676
+ console.print(f"Job execution completed for run id: {self.run_id}")
677
+
678
+ logger.info(
679
+ "Executing the job from the user. We are still in the caller's compute"
680
+ " environment"
681
+ )
682
+
683
+ if self.job_executor._should_setup_run_log_at_traversal:
684
+ ctx = get_run_context()
685
+ assert ctx
686
+ assert isinstance(ctx, JobContext)
687
+ return ctx.run_log_store.get_run_log_by_id(run_id=ctx.run_id)
688
+
689
+
690
+ # Context variable for thread/async-safe run context storage
691
+ if TYPE_CHECKING:
692
+ from typing import Union
693
+
694
+ RunnableContextType = Union[
695
+ "RunnableContext", "PipelineContext", "JobContext", "AsyncPipelineContext"
696
+ ]
697
+ else:
698
+ RunnableContextType = Any
699
+
700
+ _run_context_var: contextvars.ContextVar[
701
+ Optional["PipelineContext | JobContext | AsyncPipelineContext | RunnableContext"]
702
+ ] = contextvars.ContextVar("run_context", default=None)
703
+
704
+
705
+ def get_run_context() -> (
706
+ Optional["PipelineContext | JobContext | AsyncPipelineContext | RunnableContext"]
707
+ ):
708
+ """Get the current run context for this execution context."""
709
+ return _run_context_var.get()
710
+
711
+
712
+ def set_run_context(
713
+ context: Optional[
714
+ "PipelineContext | JobContext | AsyncPipelineContext | RunnableContext"
715
+ ],
716
+ ) -> None:
717
+ """Set the run context for this execution context."""
718
+ _run_context_var.set(context)
719
+
720
+
721
+ # BREAKING CHANGE: The global run_context variable has been replaced with
722
+ # get_run_context() and set_run_context() functions for proper context isolation.
723
+ # All code must be updated to use the new API.
724
+ #
725
+ # Migration guide:
726
+ # Before: run_context.run_log_store
727
+ # After: get_run_context().run_log_store
728
+ #
729
+ # This change was necessary to fix concurrency issues by using contextvars
730
+ # for proper thread and async isolation of run contexts.