langgraph-node-deadline 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,176 @@
1
+ """langgraph-node-deadline — one binding deadline for every inner timeout.
2
+
3
+ The problem this solves
4
+ -----------------------
5
+ A LangGraph node that does real work usually has *several* layers each
6
+ re-deriving their own clock: an outer ``TimeoutPolicy`` watchdog, an inner
7
+ agent/tool budget, a retry loop, a sub-planner that "wants" 60 seconds. When
8
+ those clocks disagree, the inner layers dispatch work the outer watchdog is
9
+ guaranteed to kill — and the kill is uncooperative: it cancels the node and
10
+ **discards everything**, including any partial result you could have salvaged.
11
+ (See the long-standing upstream report: langchain-ai/langgraph#5672, "Run
12
+ Cancellation Causes Loss of Streamed State Not Yet Persisted as a Checkpoint".)
13
+
14
+ The fix
15
+ -------
16
+ Establish **one** binding deadline at node entry and make every inner timeout
17
+ *clamp to it* instead of re-deriving its own. Then your inner calls always
18
+ yield at the node boundary — with a few hundred ms of grace — *before* the
19
+ watchdog fires, so a ``try/except`` around the inner call actually runs and you
20
+ return a complete-but-shorter answer instead of nothing.
21
+
22
+ How it threads through async code
23
+ ---------------------------------
24
+ The deadline lives in a :class:`contextvars.ContextVar`. ``asyncio`` tasks copy
25
+ the ambient context at creation, so a scope opened before you ``await`` is
26
+ visible to the agent task *and every subagent task it spawns*. The default is
27
+ ``None`` (no scope) and every consumer **fails open** — code that runs outside a
28
+ scope (unit tests, direct invocations) keeps its existing arithmetic unchanged.
29
+
30
+ Quick start
31
+ -----------
32
+ >>> import asyncio
33
+ >>> from langgraph_node_deadline import node_deadline_in, cooperative_wait_for
34
+ >>> async def node():
35
+ ... # this node is allowed ~1.8s of cooperative runtime
36
+ ... with node_deadline_in(1.8):
37
+ ... try:
38
+ ... # the planner "wants" 5s but will be clamped to what's left
39
+ ... return await cooperative_wait_for(planner(), budget_secs=5.0)
40
+ ... except asyncio.TimeoutError:
41
+ ... return salvage_partial() # runs BEFORE the outer watchdog kills us
42
+
43
+ See ``examples/salvage_demo.py`` for a runnable, dependency-free contrast
44
+ between the naive path (work discarded) and the clamped path (work salvaged).
45
+ """
46
+
47
+ from __future__ import annotations
48
+
49
+ import asyncio
50
+ import time
51
+ from contextlib import contextmanager
52
+ from contextvars import ContextVar
53
+ from typing import Awaitable, Iterator, Optional, TypeVar
54
+
55
+ __all__ = [
56
+ "node_deadline",
57
+ "node_deadline_scope",
58
+ "node_deadline_in",
59
+ "get_node_deadline_remaining_secs",
60
+ "node_deadline_exceeded",
61
+ "clamp_to_node_deadline",
62
+ "cooperative_wait_for",
63
+ ]
64
+
65
+ __version__ = "0.1.0"
66
+
67
+ _T = TypeVar("_T")
68
+
69
+ _node_deadline_monotonic: ContextVar[Optional[float]] = ContextVar(
70
+ "node_deadline_monotonic", default=None
71
+ )
72
+
73
+
74
+ @contextmanager
75
+ def node_deadline_scope(deadline_monotonic: Optional[float]) -> Iterator[None]:
76
+ """Scope the binding cooperative deadline, on a ``time.monotonic()`` basis.
77
+
78
+ Args:
79
+ deadline_monotonic: An absolute ``time.monotonic()`` timestamp by which
80
+ inner work should have yielded. Pass ``None`` to explicitly clear
81
+ the scope (fail-open) — used when the outer layer has no hard cap.
82
+
83
+ The scope is restored on exit, so nesting is safe: an inner, tighter
84
+ deadline reverts to the outer one when its ``with`` block ends.
85
+ """
86
+ token = _node_deadline_monotonic.set(deadline_monotonic)
87
+ try:
88
+ yield
89
+ finally:
90
+ _node_deadline_monotonic.reset(token)
91
+
92
+
93
+ # Primary public alias — reads naturally at call sites that already hold an
94
+ # absolute monotonic deadline: ``with node_deadline(executor_deadline): ...``
95
+ node_deadline = node_deadline_scope
96
+
97
+
98
+ @contextmanager
99
+ def node_deadline_in(seconds: float) -> Iterator[None]:
100
+ """Convenience scope expressed as a *relative* budget from now.
101
+
102
+ ``with node_deadline_in(30): ...`` is exactly
103
+ ``with node_deadline_scope(time.monotonic() + 30): ...`` — use it at node
104
+ entry when you think in "this node gets N seconds" rather than in absolute
105
+ monotonic timestamps.
106
+ """
107
+ with node_deadline_scope(time.monotonic() + seconds):
108
+ yield
109
+
110
+
111
+ def get_node_deadline_remaining_secs() -> Optional[float]:
112
+ """Seconds until the binding deadline, or ``None`` when no scope is active.
113
+
114
+ Never negative — a passed deadline reports ``0.0``.
115
+ """
116
+ deadline = _node_deadline_monotonic.get()
117
+ if deadline is None:
118
+ return None
119
+ return max(0.0, deadline - time.monotonic())
120
+
121
+
122
+ def node_deadline_exceeded() -> bool:
123
+ """``True`` only when a scope is active *and* its deadline has passed.
124
+
125
+ Returns ``False`` when no scope is active (fail-open), so it is safe to use
126
+ as a cooperative loop guard: ``while not node_deadline_exceeded(): ...``.
127
+ """
128
+ remaining = get_node_deadline_remaining_secs()
129
+ return remaining is not None and remaining <= 0.0
130
+
131
+
132
+ def clamp_to_node_deadline(budget_secs: float, *, reserve_secs: float = 0.0) -> float:
133
+ """Clamp a proposed budget/timeout to the real deadline remaining.
134
+
135
+ This is the core primitive: every inner layer that is about to start a
136
+ timed operation passes its desired budget through here, so it can never
137
+ exceed the binding node deadline.
138
+
139
+ Args:
140
+ budget_secs: The timeout the inner layer *wants*.
141
+ reserve_secs: Headroom to carve below the deadline (e.g. a phase
142
+ transition / finalize buffer) so the clamped work still has time to
143
+ wrap up before the cooperative cancel.
144
+
145
+ Returns:
146
+ ``budget_secs`` unchanged when no deadline scope is active (fail-open);
147
+ otherwise ``min(budget_secs, remaining - reserve_secs)``, floored at
148
+ ``0.0`` so it is always a valid ``asyncio.wait_for`` timeout.
149
+ """
150
+ remaining = get_node_deadline_remaining_secs()
151
+ if remaining is None:
152
+ return budget_secs
153
+ return max(0.0, min(budget_secs, remaining - reserve_secs))
154
+
155
+
156
+ async def cooperative_wait_for(
157
+ awaitable: Awaitable[_T],
158
+ budget_secs: float,
159
+ *,
160
+ reserve_secs: float = 0.0,
161
+ ) -> _T:
162
+ """``asyncio.wait_for`` that never outlasts the binding node deadline.
163
+
164
+ Equivalent to ``asyncio.wait_for(awaitable, clamp_to_node_deadline(...))``.
165
+ Because the clamped timeout fires *inside* the node, an
166
+ ``asyncio.TimeoutError`` you catch here runs your salvage path **before**
167
+ an outer watchdog can cancel the node and discard the work.
168
+
169
+ With no active deadline scope this is a plain ``wait_for(awaitable,
170
+ budget_secs)`` — fail-open, no behavior change.
171
+
172
+ Raises:
173
+ asyncio.TimeoutError: if the clamped budget elapses first.
174
+ """
175
+ timeout = clamp_to_node_deadline(budget_secs, reserve_secs=reserve_secs)
176
+ return await asyncio.wait_for(awaitable, timeout)
File without changes
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.4
2
+ Name: langgraph-node-deadline
3
+ Version: 0.1.0
4
+ Summary: One binding deadline for every inner timeout in a LangGraph node — clamp inner budgets to the cooperative deadline so work salvages instead of getting killed by the watchdog.
5
+ Project-URL: Homepage, https://github.com/youknowfred/langgraph-node-deadline
6
+ Project-URL: Issues, https://github.com/youknowfred/langgraph-node-deadline/issues
7
+ Author: Fred Becker
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: agents,asyncio,cancellation,deadline,langchain,langgraph,llm,reliability,timeout
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.9
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
24
+ Requires-Dist: pytest>=7; extra == 'dev'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # langgraph-node-deadline
28
+
29
+ **One binding deadline for every inner timeout in a LangGraph node.** Clamp inner
30
+ budgets to the node's cooperative deadline so heavy work **salvages a partial
31
+ result** instead of getting hard-killed by the watchdog and discarding everything.
32
+
33
+ Zero runtime dependencies. ~120 lines. Python 3.9+.
34
+
35
+ ```bash
36
+ pip install langgraph-node-deadline
37
+ ```
38
+
39
+ ---
40
+
41
+ ## The problem
42
+
43
+ A LangGraph node that does real work has *several layers each re-deriving their
44
+ own clock*: an outer `TimeoutPolicy` watchdog, an inner agent/tool budget, a
45
+ retry loop, a sub-planner that "wants" 60 seconds. When those clocks disagree,
46
+ the inner layers happily dispatch work the outer watchdog is **guaranteed to
47
+ kill** — and the kill is uncooperative. It cancels the node and **throws away
48
+ everything**, including the partial answer you could have returned.
49
+
50
+ You've seen the symptom: a long run times out into *nothing* after burning
51
+ minutes of paid LLM calls, and the user just sees "it failed." The upstream
52
+ issue is real and open: [langchain-ai/langgraph#5672 — *Run Cancellation Causes
53
+ Loss of Streamed State Not Yet Persisted*](https://github.com/langchain-ai/langgraph/issues/5672).
54
+
55
+ The trap, distilled: if your cooperative cancel and the watchdog are pinned to
56
+ the **same** number, the watchdog clock starts at *node entry — before your code
57
+ runs* — so your cancel loses the race deterministically. Equal timeouts lose.
58
+
59
+ ## The fix
60
+
61
+ Set **one** deadline at node entry. Make every inner timeout *clamp to it*
62
+ instead of re-deriving its own. Now inner calls yield at the node boundary, with
63
+ a little grace, **before** the watchdog fires — so your `try/except` actually
64
+ runs and you return a complete-but-shorter answer.
65
+
66
+ ```python
67
+ import asyncio
68
+ from langgraph_node_deadline import node_deadline_in, cooperative_wait_for
69
+
70
+ async def my_node(state):
71
+ # this node gets ~1.8s of cooperative runtime (a hair under its watchdog)
72
+ with node_deadline_in(1.8):
73
+ try:
74
+ # the planner asks for 5s, but gets clamped to what's actually left
75
+ result = await cooperative_wait_for(plan_and_write(state), budget_secs=5.0)
76
+ return {"draft": result}
77
+ except asyncio.TimeoutError:
78
+ # runs BEFORE the watchdog can kill us — keep the partial work
79
+ return {"draft": salvage_partial(state)}
80
+ ```
81
+
82
+ ## See it lose vs. salvage (30 seconds, no LangGraph needed)
83
+
84
+ ```bash
85
+ python examples/salvage_demo.py
86
+ ```
87
+
88
+ ```
89
+ Outer watchdog (LangGraph TimeoutPolicy): 2.0s | inner planner wants ~5s
90
+
91
+ NAIVE (inner ignores the node deadline)
92
+ -> LOST in 2.00s — outer watchdog cancelled the node, salvage code never ran, ALL work discarded
93
+
94
+ CLAMPED (inner clamps to the node deadline)
95
+ -> SALVAGED in 1.80s — kept 3 steps: ['step 1', 'step 2', 'step 3']
96
+ ```
97
+
98
+ Same work, same watchdog. One import decides whether you keep anything.
99
+
100
+ ## Wiring it into a real LangGraph node
101
+
102
+ Set the scope to a hair under whatever cap the executor enforces, then clamp
103
+ every inner timed call through it:
104
+
105
+ ```python
106
+ from langgraph_node_deadline import node_deadline_in, clamp_to_node_deadline, cooperative_wait_for
107
+
108
+ NODE_CAP_SECS = 30.0 # match this to your TimeoutPolicy, minus a small grace
109
+
110
+ async def research_node(state):
111
+ with node_deadline_in(NODE_CAP_SECS - 1.0): # leave 1s of grace under the watchdog
112
+ # an inner retry loop, sub-agent, or tool call — all clamp to the same deadline
113
+ per_call = clamp_to_node_deadline(15.0, reserve_secs=2.0) # reserve finalize headroom
114
+ chunks = await cooperative_wait_for(retrieve(state), budget_secs=per_call)
115
+ return {"chunks": chunks}
116
+ ```
117
+
118
+ Because the deadline lives in a `contextvars.ContextVar`, and `asyncio` copies
119
+ the ambient context when it creates a task, the scope you open before you
120
+ `await` is visible to the agent task **and every subagent task it spawns** — no
121
+ threading the deadline through call signatures.
122
+
123
+ ## API
124
+
125
+ | Symbol | What it does |
126
+ | --- | --- |
127
+ | `node_deadline_in(seconds)` | Context manager. Set the binding deadline to `now + seconds`. Use at node entry. |
128
+ | `node_deadline_scope(deadline_monotonic)` | Context manager. Set the deadline to an absolute `time.monotonic()` timestamp (or `None` to clear). `node_deadline` is an alias. |
129
+ | `clamp_to_node_deadline(budget_secs, *, reserve_secs=0.0)` | **The core primitive.** Returns `min(budget_secs, remaining - reserve_secs)`, floored at 0. Returns `budget_secs` unchanged when no scope is active. |
130
+ | `cooperative_wait_for(awaitable, budget_secs, *, reserve_secs=0.0)` | `asyncio.wait_for` that never outlasts the node deadline. Raises `asyncio.TimeoutError` on the clamped budget. |
131
+ | `get_node_deadline_remaining_secs()` | Seconds left, or `None` if no scope. Never negative. |
132
+ | `node_deadline_exceeded()` | `True` only when a scope is active *and* its deadline has passed. Safe loop guard. |
133
+
134
+ **Fail-open by design.** With no active scope, every function behaves as if it
135
+ weren't there — so adding it to one node never changes the behavior of the rest
136
+ of your graph, your tests, or direct invocations.
137
+
138
+ ## Why a whole package for ~120 lines
139
+
140
+ Because the *lesson* is the hard part, not the code. This is the
141
+ [`derive-don't-pin`](https://github.com/langchain-ai/langgraph/issues/5672)
142
+ discipline extracted from a production agent that paid for it: a synthesis pool
143
+ that believed it had 43.5 seconds left *nine seconds before* the watchdog killed
144
+ the node — because four inner layers each trusted their own clock and none knew
145
+ the one the executor was actually enforcing. One binding deadline fixes the
146
+ entire class of bug.
147
+
148
+ ## License
149
+
150
+ MIT © 2026 Fred Becker. See [LICENSE](LICENSE).
@@ -0,0 +1,6 @@
1
+ langgraph_node_deadline/__init__.py,sha256=KlXL27hH4VoJsU8hnq4TBti981gRZzb8aiPDQjUi5js,6837
2
+ langgraph_node_deadline/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ langgraph_node_deadline-0.1.0.dist-info/METADATA,sha256=0sk0CUg72YpzqEkUWwiZJ2Twj1b4PY-q3HziL9Wulfg,6914
4
+ langgraph_node_deadline-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
5
+ langgraph_node_deadline-0.1.0.dist-info/licenses/LICENSE,sha256=HPhk1qB5a83Tl7J8SisvMzDk0XCLEn_BzWTNSCeMNdo,1068
6
+ langgraph_node_deadline-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Fred Becker
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.