dagster-async-executor 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_async_executor/__init__.py +9 -0
- dagster_async_executor/executor.py +316 -0
- dagster_async_executor/executor_definition.py +53 -0
- dagster_async_executor-0.0.1.dist-info/METADATA +180 -0
- dagster_async_executor-0.0.1.dist-info/RECORD +7 -0
- dagster_async_executor-0.0.1.dist-info/WHEEL +5 -0
- dagster_async_executor-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from dagster._core.libraries import DagsterLibraryRegistry
|
|
2
|
+
|
|
3
|
+
from dagster_async_executor.executor_definition import async_executor as async_executor
|
|
4
|
+
|
|
5
|
+
__version__ = "0.0.1"
|
|
6
|
+
|
|
7
|
+
DagsterLibraryRegistry.register(
|
|
8
|
+
"dagster-async-executor", __version__, is_dagster_package=False
|
|
9
|
+
)
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import queue
|
|
3
|
+
import sys
|
|
4
|
+
from collections.abc import AsyncIterator, Iterator
|
|
5
|
+
from contextlib import ExitStack
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Optional, ParamSpec, TypeVar
|
|
8
|
+
|
|
9
|
+
import anyio
|
|
10
|
+
import anyio.abc
|
|
11
|
+
from anyio.from_thread import start_blocking_portal
|
|
12
|
+
from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
|
|
13
|
+
from dagster_shared.utils.timing import format_duration
|
|
14
|
+
|
|
15
|
+
import dagster._check as check
|
|
16
|
+
from dagster._core.events import DagsterEvent, EngineEventData
|
|
17
|
+
from dagster._core.execution.api import ExecuteRunWithPlanIterable
|
|
18
|
+
from dagster._core.execution.compute_logs import create_compute_log_file_key
|
|
19
|
+
from dagster._core.execution.context.system import (
|
|
20
|
+
PlanExecutionContext,
|
|
21
|
+
PlanOrchestrationContext,
|
|
22
|
+
)
|
|
23
|
+
from dagster._core.execution.context_creation_job import PlanExecutionContextManager
|
|
24
|
+
from dagster._core.execution.plan.active import ActiveExecution
|
|
25
|
+
from dagster_async_executor.execution.plan.execute_plan import (
|
|
26
|
+
_trigger_hook,
|
|
27
|
+
dagster_event_sequence_for_step,
|
|
28
|
+
)
|
|
29
|
+
from dagster_async_executor.execution.plan.execute_step import _verify_if_complete
|
|
30
|
+
from dagster._core.execution.plan.execute_plan import _handle_compute_log_setup_error
|
|
31
|
+
from dagster._core.execution.plan.instance_concurrency_context import (
|
|
32
|
+
InstanceConcurrencyContext,
|
|
33
|
+
)
|
|
34
|
+
from dagster._core.execution.plan.objects import step_failure_event_from_exc_info
|
|
35
|
+
from dagster._core.execution.plan.plan import ExecutionPlan
|
|
36
|
+
from dagster._core.execution.plan.state import KnownExecutionState
|
|
37
|
+
from dagster._core.execution.plan.step import ExecutionStep
|
|
38
|
+
from dagster._core.execution.retries import RetryMode
|
|
39
|
+
from dagster._core.execution.step_dependency_config import StepDependencyConfig
|
|
40
|
+
from dagster._core.executor.base import Executor
|
|
41
|
+
from dagster._utils.timing import time_execution_scope
|
|
42
|
+
|
|
43
|
+
T = TypeVar("T")
|
|
44
|
+
P = ParamSpec("P")
|
|
45
|
+
|
|
46
|
+
_SENTINEL = object()
|
|
47
|
+
_SYNC_ASYNC_BRIDGE_QUEUE_MAXSIZE = 0
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class _ErrorWrapper:
|
|
52
|
+
exc: BaseException
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class AsyncExecutor(Executor):
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
retries: RetryMode,
|
|
59
|
+
max_concurrent: Optional[int] = None,
|
|
60
|
+
tag_concurrency_limits: Optional[list[dict[str, Any]]] = None,
|
|
61
|
+
step_dependency_config: StepDependencyConfig = StepDependencyConfig.default(),
|
|
62
|
+
):
|
|
63
|
+
self._retries = check.inst_param(retries, "retries", RetryMode)
|
|
64
|
+
self._step_dependency_config = check.inst_param(
|
|
65
|
+
step_dependency_config, "step_dependency_config", StepDependencyConfig
|
|
66
|
+
)
|
|
67
|
+
self._max_concurrent = check.opt_int_param(max_concurrent, "max_concurrent")
|
|
68
|
+
self._tag_concurrency_limits = check.opt_list_param(
|
|
69
|
+
tag_concurrency_limits, "tag_concurrency_limits"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def retries(self) -> RetryMode:
|
|
74
|
+
return self._retries
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def step_dependency_config(self) -> StepDependencyConfig:
|
|
78
|
+
return self._step_dependency_config
|
|
79
|
+
|
|
80
|
+
def execute(
|
|
81
|
+
self,
|
|
82
|
+
plan_context: PlanOrchestrationContext,
|
|
83
|
+
execution_plan: ExecutionPlan,
|
|
84
|
+
) -> Iterator[DagsterEvent]:
|
|
85
|
+
"""Synchronous entrypoint.
|
|
86
|
+
|
|
87
|
+
Uses ExecuteRunWithPlanIterable to get a PlanExecutionContext, but the
|
|
88
|
+
actual scheduling logic is defined by this executor via
|
|
89
|
+
_async_execution_iterator.
|
|
90
|
+
"""
|
|
91
|
+
check.inst_param(plan_context, "plan_context", PlanOrchestrationContext)
|
|
92
|
+
check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
|
|
93
|
+
|
|
94
|
+
step_keys_to_execute = execution_plan.step_keys_to_execute
|
|
95
|
+
|
|
96
|
+
yield DagsterEvent.engine_event(
|
|
97
|
+
plan_context,
|
|
98
|
+
"Executing steps with AsyncExecutor",
|
|
99
|
+
event_specific_data=EngineEventData.in_process(
|
|
100
|
+
os.getpid(), step_keys_to_execute
|
|
101
|
+
),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
with time_execution_scope() as timer_result:
|
|
105
|
+
yield from ExecuteRunWithPlanIterable(
|
|
106
|
+
execution_plan=execution_plan,
|
|
107
|
+
iterator=self._execution_iterator_wrapper,
|
|
108
|
+
execution_context_manager=PlanExecutionContextManager(
|
|
109
|
+
job=plan_context.job,
|
|
110
|
+
retry_mode=plan_context.retry_mode,
|
|
111
|
+
execution_plan=execution_plan,
|
|
112
|
+
run_config=plan_context.run_config,
|
|
113
|
+
dagster_run=plan_context.dagster_run,
|
|
114
|
+
instance=plan_context.instance,
|
|
115
|
+
raise_on_error=plan_context.raise_on_error,
|
|
116
|
+
output_capture=plan_context.output_capture,
|
|
117
|
+
step_dependency_config=self.step_dependency_config,
|
|
118
|
+
),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
yield DagsterEvent.engine_event(
|
|
122
|
+
plan_context,
|
|
123
|
+
f"Finished AsyncExecutor in {format_duration(timer_result.millis)}",
|
|
124
|
+
event_specific_data=EngineEventData.in_process(
|
|
125
|
+
os.getpid(), step_keys_to_execute
|
|
126
|
+
),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def _execution_iterator_wrapper(
|
|
130
|
+
self,
|
|
131
|
+
job_context: PlanExecutionContext,
|
|
132
|
+
execution_plan: ExecutionPlan,
|
|
133
|
+
) -> Iterator[DagsterEvent]:
|
|
134
|
+
"""Synchronous wrapper around the async execution iterator.
|
|
135
|
+
|
|
136
|
+
Uses an anyio BlockingPortal and a queue to bridge between the async and sync worlds.
|
|
137
|
+
This allows the executor to and present a synchronous iterator interface while leveraging async execution under
|
|
138
|
+
the hood, avoid any issues with things like compute log capture that are not inherently async-aware or thread-safe.
|
|
139
|
+
"""
|
|
140
|
+
event_queue: queue.Queue[DagsterEvent | _ErrorWrapper | object] = queue.Queue(
|
|
141
|
+
maxsize=_SYNC_ASYNC_BRIDGE_QUEUE_MAXSIZE
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
compute_log_manager = job_context.instance.compute_log_manager
|
|
145
|
+
step_keys = [s.key for s in execution_plan.get_steps_to_execute_in_topo_order()]
|
|
146
|
+
file_key = create_compute_log_file_key()
|
|
147
|
+
log_key = compute_log_manager.build_log_key_for_run(
|
|
148
|
+
job_context.run_id, file_key
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
with (
|
|
152
|
+
InstanceConcurrencyContext(
|
|
153
|
+
job_context.instance, job_context.dagster_run
|
|
154
|
+
) as instance_concurrency_context,
|
|
155
|
+
execution_plan.start(
|
|
156
|
+
retry_mode=self.retries,
|
|
157
|
+
max_concurrent=self._max_concurrent,
|
|
158
|
+
tag_concurrency_limits=self._tag_concurrency_limits,
|
|
159
|
+
instance_concurrency_context=instance_concurrency_context,
|
|
160
|
+
step_dependency_config=self.step_dependency_config,
|
|
161
|
+
) as active,
|
|
162
|
+
ExitStack() as capture_stack,
|
|
163
|
+
):
|
|
164
|
+
# 1) Compute logs (still sync)
|
|
165
|
+
try:
|
|
166
|
+
log_context = capture_stack.enter_context(
|
|
167
|
+
compute_log_manager.capture_logs(log_key)
|
|
168
|
+
)
|
|
169
|
+
yield DagsterEvent.capture_logs(
|
|
170
|
+
job_context, step_keys, log_key, log_context
|
|
171
|
+
)
|
|
172
|
+
except Exception:
|
|
173
|
+
yield from _handle_compute_log_setup_error(job_context, sys.exc_info())
|
|
174
|
+
|
|
175
|
+
# 2) Define async to sync bridge
|
|
176
|
+
async def _async_iterator_to_queue() -> None:
|
|
177
|
+
try:
|
|
178
|
+
async for i in self._async_execution_iterator(job_context, active):
|
|
179
|
+
event_queue.put(i)
|
|
180
|
+
except BaseException as e:
|
|
181
|
+
event_queue.put(_ErrorWrapper(e))
|
|
182
|
+
finally:
|
|
183
|
+
event_queue.put(_SENTINEL)
|
|
184
|
+
|
|
185
|
+
# 3) Start the blocking portal and consume from the queue
|
|
186
|
+
with start_blocking_portal() as portal:
|
|
187
|
+
task = portal.start_task_soon(_async_iterator_to_queue)
|
|
188
|
+
try:
|
|
189
|
+
while True:
|
|
190
|
+
item = event_queue.get()
|
|
191
|
+
if item is _SENTINEL:
|
|
192
|
+
break
|
|
193
|
+
if isinstance(item, _ErrorWrapper):
|
|
194
|
+
try:
|
|
195
|
+
task.cancel()
|
|
196
|
+
except BaseException:
|
|
197
|
+
pass
|
|
198
|
+
raise item.exc
|
|
199
|
+
assert isinstance(
|
|
200
|
+
item, DagsterEvent
|
|
201
|
+
) # after narrowing, item must be DagsterEvent
|
|
202
|
+
yield item
|
|
203
|
+
except BaseException:
|
|
204
|
+
try:
|
|
205
|
+
task.cancel()
|
|
206
|
+
except Exception:
|
|
207
|
+
pass
|
|
208
|
+
raise
|
|
209
|
+
|
|
210
|
+
async def _async_execution_iterator(
|
|
211
|
+
self,
|
|
212
|
+
job_context: PlanExecutionContext,
|
|
213
|
+
active: ActiveExecution,
|
|
214
|
+
) -> AsyncIterator[DagsterEvent]:
|
|
215
|
+
"""Async orchestrator that runs steps in a TaskGroup and yields events.
|
|
216
|
+
|
|
217
|
+
- Uses a single anyio memory object stream as the central event bus.
|
|
218
|
+
- Each step worker clones the send_stream and writes its events there.
|
|
219
|
+
- We update ActiveExecution based on events and schedule new work as usual.
|
|
220
|
+
"""
|
|
221
|
+
send_stream, recv_stream = anyio.create_memory_object_stream[DagsterEvent]()
|
|
222
|
+
|
|
223
|
+
async with recv_stream, anyio.create_task_group() as tg:
|
|
224
|
+
try:
|
|
225
|
+
while not active.is_complete:
|
|
226
|
+
steps_to_execute = active.get_steps_to_execute(limit=None)
|
|
227
|
+
|
|
228
|
+
for event in active.concurrency_event_iterator(job_context):
|
|
229
|
+
yield event
|
|
230
|
+
|
|
231
|
+
known_state = active.get_known_state()
|
|
232
|
+
for step in steps_to_execute:
|
|
233
|
+
tg.start_soon(
|
|
234
|
+
self._run_step_worker,
|
|
235
|
+
job_context,
|
|
236
|
+
known_state,
|
|
237
|
+
step,
|
|
238
|
+
send_stream.clone(),
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
try:
|
|
242
|
+
event = await recv_stream.receive()
|
|
243
|
+
except anyio.EndOfStream:
|
|
244
|
+
raise
|
|
245
|
+
|
|
246
|
+
yield event
|
|
247
|
+
active.handle_event(event)
|
|
248
|
+
await _verify_if_complete(job_context, active, event)
|
|
249
|
+
|
|
250
|
+
for plan_event in active.plan_events_iterator(job_context):
|
|
251
|
+
yield plan_event
|
|
252
|
+
finally:
|
|
253
|
+
await send_stream.aclose()
|
|
254
|
+
|
|
255
|
+
async for event in self._drain_events(active, job_context, recv_stream):
|
|
256
|
+
yield event
|
|
257
|
+
|
|
258
|
+
async def _run_step_worker(
|
|
259
|
+
self,
|
|
260
|
+
job_context: PlanExecutionContext,
|
|
261
|
+
known_state: KnownExecutionState,
|
|
262
|
+
step: ExecutionStep,
|
|
263
|
+
send_stream: MemoryObjectSendStream[DagsterEvent],
|
|
264
|
+
) -> None:
|
|
265
|
+
"""Run a single step's async compute, emitting DagsterEvents via send_stream."""
|
|
266
|
+
step_context = job_context.for_step(step, known_state)
|
|
267
|
+
missing_resources = [
|
|
268
|
+
resource_key
|
|
269
|
+
for resource_key in step_context.required_resource_keys
|
|
270
|
+
if not hasattr(step_context.resources, resource_key)
|
|
271
|
+
]
|
|
272
|
+
check.invariant(
|
|
273
|
+
len(missing_resources) == 0,
|
|
274
|
+
(
|
|
275
|
+
f"Expected step context for op {step_context.op.name} to have all required "
|
|
276
|
+
f"resources, but missing {missing_resources}."
|
|
277
|
+
),
|
|
278
|
+
)
|
|
279
|
+
async with send_stream:
|
|
280
|
+
try:
|
|
281
|
+
step_events: list[DagsterEvent] = []
|
|
282
|
+
async for event in dagster_event_sequence_for_step(step_context):
|
|
283
|
+
await send_stream.send(event)
|
|
284
|
+
step_events.append(event)
|
|
285
|
+
|
|
286
|
+
async for hook_event in _trigger_hook(step_context, step_events):
|
|
287
|
+
await send_stream.send(hook_event)
|
|
288
|
+
except BaseException:
|
|
289
|
+
failure_event = step_failure_event_from_exc_info(
|
|
290
|
+
step_context,
|
|
291
|
+
sys.exc_info(),
|
|
292
|
+
)
|
|
293
|
+
try:
|
|
294
|
+
await send_stream.send(failure_event)
|
|
295
|
+
finally:
|
|
296
|
+
raise
|
|
297
|
+
|
|
298
|
+
async def _drain_events(
|
|
299
|
+
self,
|
|
300
|
+
active: ActiveExecution,
|
|
301
|
+
job_context: PlanExecutionContext,
|
|
302
|
+
recv_stream: MemoryObjectReceiveStream[DagsterEvent],
|
|
303
|
+
) -> AsyncIterator[DagsterEvent]:
|
|
304
|
+
"""Drain any remaining events from recv_stream after all steps complete."""
|
|
305
|
+
while True:
|
|
306
|
+
try:
|
|
307
|
+
event = await recv_stream.receive()
|
|
308
|
+
except anyio.EndOfStream:
|
|
309
|
+
break
|
|
310
|
+
yield event
|
|
311
|
+
|
|
312
|
+
active.handle_event(event)
|
|
313
|
+
await _verify_if_complete(job_context, active, event)
|
|
314
|
+
|
|
315
|
+
for plan_event in active.plan_events_iterator(job_context):
|
|
316
|
+
yield plan_event
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from dagster._core.definitions.executor_definition import ExecutorConfig, executor
|
|
4
|
+
from dagster_shared import check
|
|
5
|
+
from dagster._builtins import Int
|
|
6
|
+
from dagster._config import Field, Noneable
|
|
7
|
+
from dagster._core.execution.retries import RetryMode, get_retries_config
|
|
8
|
+
from dagster._core.execution.step_dependency_config import (
|
|
9
|
+
StepDependencyConfig,
|
|
10
|
+
get_step_dependency_config_field,
|
|
11
|
+
)
|
|
12
|
+
from dagster._core.execution.tags import get_tag_concurrency_limits_config
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from dagster_async_executor.executor import AsyncExecutor
|
|
16
|
+
|
|
17
|
+
ASYNC_CONFIG = Field(
|
|
18
|
+
{
|
|
19
|
+
"max_concurrent": Field(
|
|
20
|
+
Noneable(Int),
|
|
21
|
+
default_value=None,
|
|
22
|
+
description=(
|
|
23
|
+
"The number of asynchronous tasks that may run concurrently. "
|
|
24
|
+
"By default, this is set to None, which allows for an unlimited number of "
|
|
25
|
+
"concurrent tasks."
|
|
26
|
+
),
|
|
27
|
+
),
|
|
28
|
+
"tag_concurrency_limits": get_tag_concurrency_limits_config(),
|
|
29
|
+
"retries": get_retries_config(),
|
|
30
|
+
"step_dependency_config": get_step_dependency_config_field(),
|
|
31
|
+
},
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@executor(
|
|
36
|
+
name="async_executor",
|
|
37
|
+
config_schema=ASYNC_CONFIG,
|
|
38
|
+
)
|
|
39
|
+
def async_executor(init_context):
|
|
40
|
+
return _core_async_executor_creation(init_context.executor_config)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _core_async_executor_creation(config: ExecutorConfig) -> "AsyncExecutor":
|
|
44
|
+
from dagster_async_executor.executor import AsyncExecutor
|
|
45
|
+
|
|
46
|
+
return AsyncExecutor(
|
|
47
|
+
retries=RetryMode.from_config(check.dict_elem(config, "retries")), # type: ignore # (possible none)
|
|
48
|
+
max_concurrent=check.opt_int_elem(config, "max_concurrent"),
|
|
49
|
+
tag_concurrency_limits=check.opt_list_elem(config, "tag_concurrency_limits"),
|
|
50
|
+
step_dependency_config=StepDependencyConfig.from_config(
|
|
51
|
+
check.opt_nullable_dict_elem(config, "step_dependency_config")
|
|
52
|
+
),
|
|
53
|
+
)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dagster-async-executor
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Async Support for Dagster
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: anyio>=4.12.0
|
|
8
|
+
Requires-Dist: dagster>=1.12.2
|
|
9
|
+
|
|
10
|
+
# dagster-async-executor
|
|
11
|
+
|
|
12
|
+
An executor for [Dagster](https://github.com/dagster-io/dagster) that adds **native `asyncio` support** for ops and assets.
|
|
13
|
+
|
|
14
|
+
`dagster-async-executor` lets you:
|
|
15
|
+
|
|
16
|
+
- Run `async def` ops without manually managing event loops or thread pools.
|
|
17
|
+
- Mix sync and async ops in the same job.
|
|
18
|
+
- Use dynamic / fan-out graphs with async upstream and downstream dependencies.
|
|
19
|
+
- Keep the familiar Dagster executor interface, while enabling more scalable, concurrency‑friendly workloads (e.g. I/O‑heavy tasks, service calls, and streaming patterns).
|
|
20
|
+
|
|
21
|
+
This integration is a community‑maintained port of the original core PR: [dagster-io/dagster#32833](https://github.com/dagster-io/dagster/pull/32833).
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install dagster-async-executor
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Quickstart
|
|
34
|
+
|
|
35
|
+
Use the `async_executor` when defining your job and write your ops as `async def`:
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import anyio
|
|
39
|
+
import dagster as dg
|
|
40
|
+
from dagster_async_executor import async_executor
|
|
41
|
+
|
|
42
|
+
NUM_FANOUTS = 300
|
|
43
|
+
SLEEP_SECONDS = 3
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dg.op(out=dg.DynamicOut())
|
|
47
|
+
async def create_dynamic_outputs():
|
|
48
|
+
"""Creates a dynamic number of outputs."""
|
|
49
|
+
for i in range(NUM_FANOUTS):
|
|
50
|
+
yield dg.DynamicOutput(value=f"item_{i}", mapping_key=f"key_{i}")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dg.op
|
|
54
|
+
async def process_item(context: dg.OpExecutionContext, item: str):
|
|
55
|
+
"""Process each item from the fan-out."""
|
|
56
|
+
context.log.info(f"[{context.op_handle}] sleeping...")
|
|
57
|
+
await anyio.sleep(SLEEP_SECONDS)
|
|
58
|
+
context.log.info(f"[{context.op_handle}] completed")
|
|
59
|
+
return item
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dg.op
|
|
63
|
+
async def collect_results(context: dg.OpExecutionContext, results: list):
|
|
64
|
+
"""Collect all results from the fan-out."""
|
|
65
|
+
context.log.info(f"[{context.op_handle}] collected {len(results)} results")
|
|
66
|
+
return results
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dg.job(executor_def=async_executor)
|
|
70
|
+
def simple_fanout_job():
|
|
71
|
+
# no need to use await
|
|
72
|
+
dynamic_items = create_dynamic_outputs()
|
|
73
|
+
processed = dynamic_items.map(process_item)
|
|
74
|
+
collect_results(processed.collect())
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Run the job as usual (e.g. via Dagit, `dagster job execute`, or your orchestration environment). From the outside, this executor behaves like a standard Dagster executor – but internally it uses async orchestration.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## How it works
|
|
82
|
+
|
|
83
|
+
At a high level, `dagster-async-executor` introduces an `AsyncExecutor` that:
|
|
84
|
+
|
|
85
|
+
- Reuses Dagster’s **existing execution plan** machinery.
|
|
86
|
+
- Runs steps in an **async orchestration loop** backed by an `anyio.TaskGroup`.
|
|
87
|
+
- Bridges between **async step execution** and Dagster’s **synchronous executor interface** via a queue‑based sync–async bridge.
|
|
88
|
+
|
|
89
|
+
### Execution model
|
|
90
|
+
|
|
91
|
+
Conceptually, execution looks like this:
|
|
92
|
+
|
|
93
|
+
1. **Plan creation (sync)**
|
|
94
|
+
The run, plan, and context are created synchronously, just like with `in_process` and other executors.
|
|
95
|
+
|
|
96
|
+
2. **Async orchestration loop**
|
|
97
|
+
Once the plan is ready, an async orchestrator:
|
|
98
|
+
- Schedules each ready step as an async task.
|
|
99
|
+
- Uses `dagster_event_sequence_for_step` to obtain an **async sequence of `DagsterEvent`s** for each step.
|
|
100
|
+
- Sends those events through async streams.
|
|
101
|
+
|
|
102
|
+
3. **Sync–async event bridge**
|
|
103
|
+
A synchronous wrapper:
|
|
104
|
+
- Starts the async orchestrator inside an `anyio` `BlockingPortal`.
|
|
105
|
+
- Streams `DagsterEvent`s into a `queue.Queue`.
|
|
106
|
+
- Exposes a standard **`Iterator[DagsterEvent]`** to Dagster’s core execution machinery.
|
|
107
|
+
|
|
108
|
+
From the rest of the system’s perspective, this executor still “looks like” a normal synchronous executor, which keeps:
|
|
109
|
+
|
|
110
|
+
- Resource initialization behavior consistent.
|
|
111
|
+
- Logging and event semantics unchanged.
|
|
112
|
+
- Compatibility with existing Dagster entrypoints and tooling.
|
|
113
|
+
|
|
114
|
+
### Sync + async ops
|
|
115
|
+
|
|
116
|
+
Per-step behavior:
|
|
117
|
+
|
|
118
|
+
- Async orchestration drives all steps.
|
|
119
|
+
- Each step:
|
|
120
|
+
- Builds a `step_context`.
|
|
121
|
+
- Iterates over `dagster_event_sequence_for_step(step_context)` in an async `for` loop.
|
|
122
|
+
- Sends each `DagsterEvent` back through the async stream → queue → iterator bridge.
|
|
123
|
+
|
|
124
|
+
Because the core execution semantics are reused, you can mix sync and async ops in the same graph:
|
|
125
|
+
|
|
126
|
+
- Async upstream → sync downstream
|
|
127
|
+
- Sync upstream → async downstream
|
|
128
|
+
- Dynamic outputs and mapped steps that interleave async work
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Performance
|
|
133
|
+
|
|
134
|
+
The executor is designed for I/O‑bound and highly concurrent workloads. An initial performance test (`test_async_executor_performance.py::test_async_performance_basic`) shows improved scaling with increased parallelism.
|
|
135
|
+
|
|
136
|
+
Example results (fan‑out of async ops sleeping for 3 seconds):
|
|
137
|
+
|
|
138
|
+
| Number of ops | Sleep (seconds) | Job duration (seconds) |
|
|
139
|
+
|--------------:|----------------:|------------------------:|
|
|
140
|
+
| 1 | 3 | 3.25 |
|
|
141
|
+
| 5 | 3 | 3.51 |
|
|
142
|
+
| 20 | 3 | 4.23 |
|
|
143
|
+
| 100 | 3 | 6.86 |
|
|
144
|
+
| 300 | 3 | 14.17 |
|
|
145
|
+
|
|
146
|
+
These numbers are illustrative; real‑world performance depends on your environment, I/O characteristics, and concurrency limits.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## Testing & behavior guarantees
|
|
151
|
+
|
|
152
|
+
The test suite focuses on validating behavior across a representative set of job shapes:
|
|
153
|
+
|
|
154
|
+
- **Basic async jobs**
|
|
155
|
+
- Single async op producing a simple output.
|
|
156
|
+
- Multiple async ops with dependencies and parallelism where possible.
|
|
157
|
+
|
|
158
|
+
- **Mixed sync/async graphs**
|
|
159
|
+
- Async upstream feeding into sync downstream.
|
|
160
|
+
- Sync upstream feeding into async downstream.
|
|
161
|
+
- Ensuring consistent materializations, events, and success/failure semantics across both kinds of ops.
|
|
162
|
+
|
|
163
|
+
- **Dynamic / fan‑out graphs**
|
|
164
|
+
- Async producers yielding dynamic outputs.
|
|
165
|
+
- Downstream mapping over dynamic keys.
|
|
166
|
+
- Interleaving async mapped steps and verifying all mapped outputs are awaited and collected correctly.
|
|
167
|
+
|
|
168
|
+
- **Error handling**
|
|
169
|
+
- Exceptions raised from async ops (including inside dynamic maps).
|
|
170
|
+
- Failures reported on the correct steps.
|
|
171
|
+
- Downstream steps cancelled or skipped according to normal Dagster rules.
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Limitations & notes
|
|
176
|
+
|
|
177
|
+
- This executor has not been tested on Python 3.14 free-threaded mode.
|
|
178
|
+
- The executor targets **I/O‑bound** concurrency; CPU‑bound workloads should still be offloaded to processes or threads.
|
|
179
|
+
- Cancellation, backpressure, and resource lifetime semantics follow Dagster’s existing execution model, but async nuances may still evolve.
|
|
180
|
+
- This is a **community‑maintained** integration; behavior may change more rapidly than core Dagster executors as we iterate.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
dagster_async_executor/__init__.py,sha256=vC3BtUjz0Mu8jQNsr7O6bulzLAUA7T2x6I14QoI1ebA,275
|
|
2
|
+
dagster_async_executor/executor.py,sha256=f898L65a65hgyMzpEQo0joaEc3jCvPE6a9PgX0DU0L8,12641
|
|
3
|
+
dagster_async_executor/executor_definition.py,sha256=AEElqwpAlyDBa2GYOG28Vrwbc-v9-CJl4okHRZVE07w,1926
|
|
4
|
+
dagster_async_executor-0.0.1.dist-info/METADATA,sha256=UnfNGRE8L_IzVr40CqmCZHmSAyFT843TtO_cB4j_5x8,6727
|
|
5
|
+
dagster_async_executor-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
dagster_async_executor-0.0.1.dist-info/top_level.txt,sha256=zc6K7GEfOGGmdYjRlAfpGPjFpVSmUV7_IIguUpslAbc,23
|
|
7
|
+
dagster_async_executor-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
dagster_async_executor
|