langgraph-node-deadline 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langgraph_node_deadline/__init__.py +176 -0
- langgraph_node_deadline/py.typed +0 -0
- langgraph_node_deadline-0.1.0.dist-info/METADATA +150 -0
- langgraph_node_deadline-0.1.0.dist-info/RECORD +6 -0
- langgraph_node_deadline-0.1.0.dist-info/WHEEL +4 -0
- langgraph_node_deadline-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""langgraph-node-deadline — one binding deadline for every inner timeout.
|
|
2
|
+
|
|
3
|
+
The problem this solves
|
|
4
|
+
-----------------------
|
|
5
|
+
A LangGraph node that does real work usually has *several* layers each
|
|
6
|
+
re-deriving their own clock: an outer ``TimeoutPolicy`` watchdog, an inner
|
|
7
|
+
agent/tool budget, a retry loop, a sub-planner that "wants" 60 seconds. When
|
|
8
|
+
those clocks disagree, the inner layers dispatch work the outer watchdog is
|
|
9
|
+
guaranteed to kill — and the kill is uncooperative: it cancels the node and
|
|
10
|
+
**discards everything**, including any partial result you could have salvaged.
|
|
11
|
+
(See the long-standing upstream report: langchain-ai/langgraph#5672, "Run
|
|
12
|
+
Cancellation Causes Loss of Streamed State Not Yet Persisted as a Checkpoint".)
|
|
13
|
+
|
|
14
|
+
The fix
|
|
15
|
+
-------
|
|
16
|
+
Establish **one** binding deadline at node entry and make every inner timeout
|
|
17
|
+
*clamp to it* instead of re-deriving its own. Then your inner calls always
|
|
18
|
+
yield at the node boundary — with a few hundred ms of grace — *before* the
|
|
19
|
+
watchdog fires, so a ``try/except`` around the inner call actually runs and you
|
|
20
|
+
return a complete-but-shorter answer instead of nothing.
|
|
21
|
+
|
|
22
|
+
How it threads through async code
|
|
23
|
+
---------------------------------
|
|
24
|
+
The deadline lives in a :class:`contextvars.ContextVar`. ``asyncio`` tasks copy
|
|
25
|
+
the ambient context at creation, so a scope opened before you ``await`` is
|
|
26
|
+
visible to the agent task *and every subagent task it spawns*. The default is
|
|
27
|
+
``None`` (no scope) and every consumer **fails open** — code that runs outside a
|
|
28
|
+
scope (unit tests, direct invocations) keeps its existing arithmetic unchanged.
|
|
29
|
+
|
|
30
|
+
Quick start
|
|
31
|
+
-----------
|
|
32
|
+
>>> import asyncio
|
|
33
|
+
>>> from langgraph_node_deadline import node_deadline_in, cooperative_wait_for
|
|
34
|
+
>>> async def node():
|
|
35
|
+
... # this node is allowed ~1.8s of cooperative runtime
|
|
36
|
+
... with node_deadline_in(1.8):
|
|
37
|
+
... try:
|
|
38
|
+
... # the planner "wants" 5s but will be clamped to what's left
|
|
39
|
+
... return await cooperative_wait_for(planner(), budget_secs=5.0)
|
|
40
|
+
... except asyncio.TimeoutError:
|
|
41
|
+
... return salvage_partial() # runs BEFORE the outer watchdog kills us
|
|
42
|
+
|
|
43
|
+
See ``examples/salvage_demo.py`` for a runnable, dependency-free contrast
|
|
44
|
+
between the naive path (work discarded) and the clamped path (work salvaged).
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
from __future__ import annotations
|
|
48
|
+
|
|
49
|
+
import asyncio
|
|
50
|
+
import time
|
|
51
|
+
from contextlib import contextmanager
|
|
52
|
+
from contextvars import ContextVar
|
|
53
|
+
from typing import Awaitable, Iterator, Optional, TypeVar
|
|
54
|
+
|
|
55
|
+
__all__ = [
|
|
56
|
+
"node_deadline",
|
|
57
|
+
"node_deadline_scope",
|
|
58
|
+
"node_deadline_in",
|
|
59
|
+
"get_node_deadline_remaining_secs",
|
|
60
|
+
"node_deadline_exceeded",
|
|
61
|
+
"clamp_to_node_deadline",
|
|
62
|
+
"cooperative_wait_for",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
__version__ = "0.1.0"
|
|
66
|
+
|
|
67
|
+
_T = TypeVar("_T")
|
|
68
|
+
|
|
69
|
+
_node_deadline_monotonic: ContextVar[Optional[float]] = ContextVar(
|
|
70
|
+
"node_deadline_monotonic", default=None
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@contextmanager
|
|
75
|
+
def node_deadline_scope(deadline_monotonic: Optional[float]) -> Iterator[None]:
|
|
76
|
+
"""Scope the binding cooperative deadline, on a ``time.monotonic()`` basis.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
deadline_monotonic: An absolute ``time.monotonic()`` timestamp by which
|
|
80
|
+
inner work should have yielded. Pass ``None`` to explicitly clear
|
|
81
|
+
the scope (fail-open) — used when the outer layer has no hard cap.
|
|
82
|
+
|
|
83
|
+
The scope is restored on exit, so nesting is safe: an inner, tighter
|
|
84
|
+
deadline reverts to the outer one when its ``with`` block ends.
|
|
85
|
+
"""
|
|
86
|
+
token = _node_deadline_monotonic.set(deadline_monotonic)
|
|
87
|
+
try:
|
|
88
|
+
yield
|
|
89
|
+
finally:
|
|
90
|
+
_node_deadline_monotonic.reset(token)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# Primary public alias — reads naturally at call sites that already hold an
|
|
94
|
+
# absolute monotonic deadline: ``with node_deadline(executor_deadline): ...``
|
|
95
|
+
node_deadline = node_deadline_scope
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@contextmanager
|
|
99
|
+
def node_deadline_in(seconds: float) -> Iterator[None]:
|
|
100
|
+
"""Convenience scope expressed as a *relative* budget from now.
|
|
101
|
+
|
|
102
|
+
``with node_deadline_in(30): ...`` is exactly
|
|
103
|
+
``with node_deadline_scope(time.monotonic() + 30): ...`` — use it at node
|
|
104
|
+
entry when you think in "this node gets N seconds" rather than in absolute
|
|
105
|
+
monotonic timestamps.
|
|
106
|
+
"""
|
|
107
|
+
with node_deadline_scope(time.monotonic() + seconds):
|
|
108
|
+
yield
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_node_deadline_remaining_secs() -> Optional[float]:
|
|
112
|
+
"""Seconds until the binding deadline, or ``None`` when no scope is active.
|
|
113
|
+
|
|
114
|
+
Never negative — a passed deadline reports ``0.0``.
|
|
115
|
+
"""
|
|
116
|
+
deadline = _node_deadline_monotonic.get()
|
|
117
|
+
if deadline is None:
|
|
118
|
+
return None
|
|
119
|
+
return max(0.0, deadline - time.monotonic())
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def node_deadline_exceeded() -> bool:
|
|
123
|
+
"""``True`` only when a scope is active *and* its deadline has passed.
|
|
124
|
+
|
|
125
|
+
Returns ``False`` when no scope is active (fail-open), so it is safe to use
|
|
126
|
+
as a cooperative loop guard: ``while not node_deadline_exceeded(): ...``.
|
|
127
|
+
"""
|
|
128
|
+
remaining = get_node_deadline_remaining_secs()
|
|
129
|
+
return remaining is not None and remaining <= 0.0
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def clamp_to_node_deadline(budget_secs: float, *, reserve_secs: float = 0.0) -> float:
|
|
133
|
+
"""Clamp a proposed budget/timeout to the real deadline remaining.
|
|
134
|
+
|
|
135
|
+
This is the core primitive: every inner layer that is about to start a
|
|
136
|
+
timed operation passes its desired budget through here, so it can never
|
|
137
|
+
exceed the binding node deadline.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
budget_secs: The timeout the inner layer *wants*.
|
|
141
|
+
reserve_secs: Headroom to carve below the deadline (e.g. a phase
|
|
142
|
+
transition / finalize buffer) so the clamped work still has time to
|
|
143
|
+
wrap up before the cooperative cancel.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
``budget_secs`` unchanged when no deadline scope is active (fail-open);
|
|
147
|
+
otherwise ``min(budget_secs, remaining - reserve_secs)``, floored at
|
|
148
|
+
``0.0`` so it is always a valid ``asyncio.wait_for`` timeout.
|
|
149
|
+
"""
|
|
150
|
+
remaining = get_node_deadline_remaining_secs()
|
|
151
|
+
if remaining is None:
|
|
152
|
+
return budget_secs
|
|
153
|
+
return max(0.0, min(budget_secs, remaining - reserve_secs))
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
async def cooperative_wait_for(
|
|
157
|
+
awaitable: Awaitable[_T],
|
|
158
|
+
budget_secs: float,
|
|
159
|
+
*,
|
|
160
|
+
reserve_secs: float = 0.0,
|
|
161
|
+
) -> _T:
|
|
162
|
+
"""``asyncio.wait_for`` that never outlasts the binding node deadline.
|
|
163
|
+
|
|
164
|
+
Equivalent to ``asyncio.wait_for(awaitable, clamp_to_node_deadline(...))``.
|
|
165
|
+
Because the clamped timeout fires *inside* the node, an
|
|
166
|
+
``asyncio.TimeoutError`` you catch here runs your salvage path **before**
|
|
167
|
+
an outer watchdog can cancel the node and discard the work.
|
|
168
|
+
|
|
169
|
+
With no active deadline scope this is a plain ``wait_for(awaitable,
|
|
170
|
+
budget_secs)`` — fail-open, no behavior change.
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
asyncio.TimeoutError: if the clamped budget elapses first.
|
|
174
|
+
"""
|
|
175
|
+
timeout = clamp_to_node_deadline(budget_secs, reserve_secs=reserve_secs)
|
|
176
|
+
return await asyncio.wait_for(awaitable, timeout)
|
|
File without changes
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langgraph-node-deadline
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: One binding deadline for every inner timeout in a LangGraph node — clamp inner budgets to the cooperative deadline so work salvages instead of getting killed by the watchdog.
|
|
5
|
+
Project-URL: Homepage, https://github.com/youknowfred/langgraph-node-deadline
|
|
6
|
+
Project-URL: Issues, https://github.com/youknowfred/langgraph-node-deadline/issues
|
|
7
|
+
Author: Fred Becker
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agents,asyncio,cancellation,deadline,langchain,langgraph,llm,reliability,timeout
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# langgraph-node-deadline
|
|
28
|
+
|
|
29
|
+
**One binding deadline for every inner timeout in a LangGraph node.** Clamp inner
|
|
30
|
+
budgets to the node's cooperative deadline so heavy work **salvages a partial
|
|
31
|
+
result** instead of getting hard-killed by the watchdog and discarding everything.
|
|
32
|
+
|
|
33
|
+
Zero runtime dependencies. ~120 lines. Python 3.9+.
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install langgraph-node-deadline
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## The problem
|
|
42
|
+
|
|
43
|
+
A LangGraph node that does real work has *several layers each re-deriving their
|
|
44
|
+
own clock*: an outer `TimeoutPolicy` watchdog, an inner agent/tool budget, a
|
|
45
|
+
retry loop, a sub-planner that "wants" 60 seconds. When those clocks disagree,
|
|
46
|
+
the inner layers happily dispatch work the outer watchdog is **guaranteed to
|
|
47
|
+
kill** — and the kill is uncooperative. It cancels the node and **throws away
|
|
48
|
+
everything**, including the partial answer you could have returned.
|
|
49
|
+
|
|
50
|
+
You've seen the symptom: a long run times out into *nothing* after burning
|
|
51
|
+
minutes of paid LLM calls, and the user just sees "it failed." The upstream
|
|
52
|
+
issue is real and open: [langchain-ai/langgraph#5672 — *Run Cancellation Causes
|
|
53
|
+
Loss of Streamed State Not Yet Persisted*](https://github.com/langchain-ai/langgraph/issues/5672).
|
|
54
|
+
|
|
55
|
+
The trap, distilled: if your cooperative cancel and the watchdog are pinned to
|
|
56
|
+
the **same** number, the watchdog clock starts at *node entry — before your code
|
|
57
|
+
runs* — so your cancel loses the race deterministically. Equal timeouts lose.
|
|
58
|
+
|
|
59
|
+
## The fix
|
|
60
|
+
|
|
61
|
+
Set **one** deadline at node entry. Make every inner timeout *clamp to it*
|
|
62
|
+
instead of re-deriving its own. Now inner calls yield at the node boundary, with
|
|
63
|
+
a little grace, **before** the watchdog fires — so your `try/except` actually
|
|
64
|
+
runs and you return a complete-but-shorter answer.
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
import asyncio
|
|
68
|
+
from langgraph_node_deadline import node_deadline_in, cooperative_wait_for
|
|
69
|
+
|
|
70
|
+
async def my_node(state):
|
|
71
|
+
# this node gets ~1.8s of cooperative runtime (a hair under its watchdog)
|
|
72
|
+
with node_deadline_in(1.8):
|
|
73
|
+
try:
|
|
74
|
+
# the planner asks for 5s, but gets clamped to what's actually left
|
|
75
|
+
result = await cooperative_wait_for(plan_and_write(state), budget_secs=5.0)
|
|
76
|
+
return {"draft": result}
|
|
77
|
+
except asyncio.TimeoutError:
|
|
78
|
+
# runs BEFORE the watchdog can kill us — keep the partial work
|
|
79
|
+
return {"draft": salvage_partial(state)}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## See it lose vs. salvage (30 seconds, no LangGraph needed)
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
python examples/salvage_demo.py
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
Outer watchdog (LangGraph TimeoutPolicy): 2.0s | inner planner wants ~5s
|
|
90
|
+
|
|
91
|
+
NAIVE (inner ignores the node deadline)
|
|
92
|
+
-> LOST in 2.00s — outer watchdog cancelled the node, salvage code never ran, ALL work discarded
|
|
93
|
+
|
|
94
|
+
CLAMPED (inner clamps to the node deadline)
|
|
95
|
+
-> SALVAGED in 1.80s — kept 3 steps: ['step 1', 'step 2', 'step 3']
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Same work, same watchdog. One import decides whether you keep anything.
|
|
99
|
+
|
|
100
|
+
## Wiring it into a real LangGraph node
|
|
101
|
+
|
|
102
|
+
Set the scope to a hair under whatever cap the executor enforces, then clamp
|
|
103
|
+
every inner timed call through it:
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from langgraph_node_deadline import node_deadline_in, clamp_to_node_deadline, cooperative_wait_for
|
|
107
|
+
|
|
108
|
+
NODE_CAP_SECS = 30.0 # match this to your TimeoutPolicy, minus a small grace
|
|
109
|
+
|
|
110
|
+
async def research_node(state):
|
|
111
|
+
with node_deadline_in(NODE_CAP_SECS - 1.0): # leave 1s of grace under the watchdog
|
|
112
|
+
# an inner retry loop, sub-agent, or tool call — all clamp to the same deadline
|
|
113
|
+
per_call = clamp_to_node_deadline(15.0, reserve_secs=2.0) # reserve finalize headroom
|
|
114
|
+
chunks = await cooperative_wait_for(retrieve(state), budget_secs=per_call)
|
|
115
|
+
return {"chunks": chunks}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Because the deadline lives in a `contextvars.ContextVar`, and `asyncio` copies
|
|
119
|
+
the ambient context when it creates a task, the scope you open before you
|
|
120
|
+
`await` is visible to the agent task **and every subagent task it spawns** — no
|
|
121
|
+
threading the deadline through call signatures.
|
|
122
|
+
|
|
123
|
+
## API
|
|
124
|
+
|
|
125
|
+
| Symbol | What it does |
|
|
126
|
+
| --- | --- |
|
|
127
|
+
| `node_deadline_in(seconds)` | Context manager. Set the binding deadline to `now + seconds`. Use at node entry. |
|
|
128
|
+
| `node_deadline_scope(deadline_monotonic)` | Context manager. Set the deadline to an absolute `time.monotonic()` timestamp (or `None` to clear). `node_deadline` is an alias. |
|
|
129
|
+
| `clamp_to_node_deadline(budget_secs, *, reserve_secs=0.0)` | **The core primitive.** Returns `min(budget_secs, remaining - reserve_secs)`, floored at 0. Returns `budget_secs` unchanged when no scope is active. |
|
|
130
|
+
| `cooperative_wait_for(awaitable, budget_secs, *, reserve_secs=0.0)` | `asyncio.wait_for` that never outlasts the node deadline. Raises `asyncio.TimeoutError` on the clamped budget. |
|
|
131
|
+
| `get_node_deadline_remaining_secs()` | Seconds left, or `None` if no scope. Never negative. |
|
|
132
|
+
| `node_deadline_exceeded()` | `True` only when a scope is active *and* its deadline has passed. Safe loop guard. |
|
|
133
|
+
|
|
134
|
+
**Fail-open by design.** With no active scope, every function behaves as if it
|
|
135
|
+
weren't there — so adding it to one node never changes the behavior of the rest
|
|
136
|
+
of your graph, your tests, or direct invocations.
|
|
137
|
+
|
|
138
|
+
## Why a whole package for ~120 lines
|
|
139
|
+
|
|
140
|
+
Because the *lesson* is the hard part, not the code. This is the
|
|
141
|
+
[`derive-don't-pin`](https://github.com/langchain-ai/langgraph/issues/5672)
|
|
142
|
+
discipline extracted from a production agent that paid for it: a synthesis pool
|
|
143
|
+
that believed it had 43.5 seconds left *nine seconds before* the watchdog killed
|
|
144
|
+
the node — because four inner layers each trusted their own clock and none knew
|
|
145
|
+
the one the executor was actually enforcing. One binding deadline fixes the
|
|
146
|
+
entire class of bug.
|
|
147
|
+
|
|
148
|
+
## License
|
|
149
|
+
|
|
150
|
+
MIT © 2026 Fred Becker. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
langgraph_node_deadline/__init__.py,sha256=KlXL27hH4VoJsU8hnq4TBti981gRZzb8aiPDQjUi5js,6837
|
|
2
|
+
langgraph_node_deadline/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
langgraph_node_deadline-0.1.0.dist-info/METADATA,sha256=0sk0CUg72YpzqEkUWwiZJ2Twj1b4PY-q3HziL9Wulfg,6914
|
|
4
|
+
langgraph_node_deadline-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
5
|
+
langgraph_node_deadline-0.1.0.dist-info/licenses/LICENSE,sha256=HPhk1qB5a83Tl7J8SisvMzDk0XCLEn_BzWTNSCeMNdo,1068
|
|
6
|
+
langgraph_node_deadline-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Fred Becker
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|