langchain-failover 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .eggs/
5
+ build/
6
+ dist/
7
+ .pytest_cache/
8
+ .ruff_cache/
9
+ .venv/
10
+ venv/
11
+ .env
@@ -0,0 +1,11 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 (unreleased)
4
+
5
+ - Initial release.
6
+ - `FailoverChatModel`: primary/secondary failover with stateful recovery.
7
+ - Connection-aware failover that walks the exception cause/context chain.
8
+ - `bind_tools` preserved across failover (binds both legs).
9
+ - Mid-stream safety: only fails over before the first streamed token.
10
+ - `create_failover_llm` convenience constructor with `/models` auto-discovery.
11
+ - `extract_token_metrics` helper for OpenAI-compatible and Ollama metadata.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vinay Vobbilichetty
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,125 @@
1
+ Metadata-Version: 2.4
2
+ Name: langchain-failover
3
+ Version: 0.1.0
4
+ Summary: Primary/secondary failover wrapper for LangChain chat models, with tool-calling preserved across failover.
5
+ Project-URL: Homepage, https://github.com/vinayvobbili/langchain-failover
6
+ Project-URL: Repository, https://github.com/vinayvobbili/langchain-failover
7
+ Project-URL: Issues, https://github.com/vinayvobbili/langchain-failover/issues
8
+ Author-email: Vinay Vobbilichetty <vinayvobbilichetty11@gmail.com>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Vinay Vobbilichetty
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: chat-model,failover,fallback,high-availability,langchain,llm,resilience
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3.9
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
41
+ Requires-Python: >=3.9
42
+ Requires-Dist: langchain-core>=0.2
43
+ Provides-Extra: dev
44
+ Requires-Dist: build>=1.0; extra == 'dev'
45
+ Requires-Dist: langchain-openai>=0.1; extra == 'dev'
46
+ Requires-Dist: pytest>=7; extra == 'dev'
47
+ Requires-Dist: ruff>=0.4; extra == 'dev'
48
+ Requires-Dist: twine>=5.0; extra == 'dev'
49
+ Provides-Extra: openai
50
+ Requires-Dist: langchain-openai>=0.1; extra == 'openai'
51
+ Description-Content-Type: text/markdown
52
+
53
+ # langchain-failover
54
+
55
+ A tiny, dependency-light **primary/secondary failover wrapper** for LangChain chat
56
+ models. Point it at two chat models; it serves from the primary, transparently
57
+ falls back to the secondary on connection errors, and switches back the moment the
58
+ primary recovers — **and tool-calling keeps working across the failover.**
59
+
60
+ ```python
61
+ from langchain_openai import ChatOpenAI
62
+ from langchain_failover import FailoverChatModel
63
+
64
+ primary = ChatOpenAI(base_url="http://gpu-box:8001/v1", api_key="x", model="local")
65
+ backup = ChatOpenAI(base_url="http://cpu-box:8002/v1", api_key="x", model="local")
66
+
67
+ llm = FailoverChatModel(primary=primary, secondary=backup)
68
+
69
+ llm.invoke("Summarise this incident…") # served by primary
70
+ # …primary host dies…
71
+ llm.invoke("And the next one?") # transparently served by backup
72
+ # …primary comes back…
73
+ llm.invoke("One more") # back on primary, logged as recovered
74
+ ```
75
+
76
+ ## Install
77
+
78
+ ```bash
79
+ pip install langchain-failover # core
80
+ pip install "langchain-failover[openai]" # + langchain-openai for create_failover_llm
81
+ ```
82
+
83
+ ## Why not `RunnableWithFallbacks` / `.with_fallbacks()`?
84
+
85
+ LangChain ships per-invocation fallbacks, and they're great for what they do. This
86
+ package exists for the cases they don't cover well:
87
+
88
+ - **Stateful recovery.** `FailoverChatModel` remembers which leg it's on and logs
89
+ the transition both ways (`active` property tells you). `.with_fallbacks()` is
90
+ stateless — every call re-tries the (possibly still-dead) primary first.
91
+ - **Tool-calling survives failover.** `bind_tools` is overridden to bind on *both*
92
+ legs and return another `FailoverChatModel`. With strict langchain-core
93
+ (`>=1.4`, where `BaseChatModel.bind_tools` raises by default) naïve wrappers
94
+ break at bind time; agents using this one keep working.
95
+ - **Connection-aware, not blanket.** It only fails over on connection/network
96
+ errors (walking the exception's `__cause__`/`__context__` chain, so a socket
97
+ error wrapped three layers deep still counts). A `ValueError` from a bad prompt
98
+ propagates instead of being silently retried on a second endpoint.
99
+ - **Mid-stream safety.** During `stream()`, it only fails over if the primary dies
100
+ *before* the first token — so you never get duplicated, half-streamed output.
101
+
102
+ ## Local-model convenience
103
+
104
+ If you run local OpenAI-compatible servers (vLLM, mlx-lm, Ollama, LM Studio) and
105
+ don't want to hardcode model names, `create_failover_llm` auto-discovers the served
106
+ model id from each endpoint's `/models`:
107
+
108
+ ```python
109
+ from langchain_failover import create_failover_llm
110
+
111
+ llm = create_failover_llm(
112
+ primary_url="http://localhost:8001/v1",
113
+ secondary_url="http://localhost:8002/v1",
114
+ )
115
+ ```
116
+
117
+ ## Bonus helper
118
+
119
+ `extract_token_metrics(response.response_metadata)` normalises token counts and
120
+ timings across OpenAI-compatible and Ollama metadata shapes into a single
121
+ `{input_tokens, output_tokens, prompt_time, generation_time}` dict.
122
+
123
+ ## License
124
+
125
+ MIT
@@ -0,0 +1,73 @@
1
+ # langchain-failover
2
+
3
+ A tiny, dependency-light **primary/secondary failover wrapper** for LangChain chat
4
+ models. Point it at two chat models; it serves from the primary, transparently
5
+ falls back to the secondary on connection errors, and switches back the moment the
6
+ primary recovers — **and tool-calling keeps working across the failover.**
7
+
8
+ ```python
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain_failover import FailoverChatModel
11
+
12
+ primary = ChatOpenAI(base_url="http://gpu-box:8001/v1", api_key="x", model="local")
13
+ backup = ChatOpenAI(base_url="http://cpu-box:8002/v1", api_key="x", model="local")
14
+
15
+ llm = FailoverChatModel(primary=primary, secondary=backup)
16
+
17
+ llm.invoke("Summarise this incident…") # served by primary
18
+ # …primary host dies…
19
+ llm.invoke("And the next one?") # transparently served by backup
20
+ # …primary comes back…
21
+ llm.invoke("One more") # back on primary, logged as recovered
22
+ ```
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ pip install langchain-failover # core
28
+ pip install "langchain-failover[openai]" # + langchain-openai for create_failover_llm
29
+ ```
30
+
31
+ ## Why not `RunnableWithFallbacks` / `.with_fallbacks()`?
32
+
33
+ LangChain ships per-invocation fallbacks, and they're great for what they do. This
34
+ package exists for the cases they don't cover well:
35
+
36
+ - **Stateful recovery.** `FailoverChatModel` remembers which leg it's on and logs
37
+ the transition both ways (`active` property tells you). `.with_fallbacks()` is
38
+ stateless — every call re-tries the (possibly still-dead) primary first.
39
+ - **Tool-calling survives failover.** `bind_tools` is overridden to bind on *both*
40
+ legs and return another `FailoverChatModel`. With strict langchain-core
41
+ (`>=1.4`, where `BaseChatModel.bind_tools` raises by default) naïve wrappers
42
+ break at bind time; agents using this one keep working.
43
+ - **Connection-aware, not blanket.** It only fails over on connection/network
44
+ errors (walking the exception's `__cause__`/`__context__` chain, so a socket
45
+ error wrapped three layers deep still counts). A `ValueError` from a bad prompt
46
+ propagates instead of being silently retried on a second endpoint.
47
+ - **Mid-stream safety.** During `stream()`, it only fails over if the primary dies
48
+ *before* the first token — so you never get duplicated, half-streamed output.
49
+
50
+ ## Local-model convenience
51
+
52
+ If you run local OpenAI-compatible servers (vLLM, mlx-lm, Ollama, LM Studio) and
53
+ don't want to hardcode model names, `create_failover_llm` auto-discovers the served
54
+ model id from each endpoint's `/models`:
55
+
56
+ ```python
57
+ from langchain_failover import create_failover_llm
58
+
59
+ llm = create_failover_llm(
60
+ primary_url="http://localhost:8001/v1",
61
+ secondary_url="http://localhost:8002/v1",
62
+ )
63
+ ```
64
+
65
+ ## Bonus helper
66
+
67
+ `extract_token_metrics(response.response_metadata)` normalises token counts and
68
+ timings across OpenAI-compatible and Ollama metadata shapes into a single
69
+ `{input_tokens, output_tokens, prompt_time, generation_time}` dict.
70
+
71
+ ## License
72
+
73
+ MIT
@@ -0,0 +1,49 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "langchain-failover"
7
+ version = "0.1.0"
8
+ description = "Primary/secondary failover wrapper for LangChain chat models, with tool-calling preserved across failover."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { file = "LICENSE" }
12
+ authors = [{ name = "Vinay Vobbilichetty", email = "vinayvobbilichetty11@gmail.com" }]
13
+ keywords = ["langchain", "llm", "failover", "fallback", "resilience", "high-availability", "chat-model"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.9",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Software Development :: Libraries :: Python Modules",
24
+ ]
25
+ dependencies = [
26
+ "langchain-core>=0.2",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ openai = ["langchain-openai>=0.1"]
31
+ dev = [
32
+ "langchain-openai>=0.1",
33
+ "pytest>=7",
34
+ "ruff>=0.4",
35
+ "build>=1.0",
36
+ "twine>=5.0",
37
+ ]
38
+
39
+ [project.urls]
40
+ Homepage = "https://github.com/vinayvobbili/langchain-failover"
41
+ Repository = "https://github.com/vinayvobbili/langchain-failover"
42
+ Issues = "https://github.com/vinayvobbili/langchain-failover/issues"
43
+
44
+ [tool.hatch.build.targets.wheel]
45
+ packages = ["src/langchain_failover"]
46
+
47
+ [tool.ruff]
48
+ line-length = 100
49
+ target-version = "py39"
@@ -0,0 +1,17 @@
1
+ """langchain-failover — a primary/secondary failover wrapper for LangChain chat models."""
2
+ from langchain_failover.failover import (
3
+ FailoverChatModel,
4
+ create_failover_llm,
5
+ extract_token_metrics,
6
+ is_connection_error,
7
+ )
8
+
9
+ __version__ = "0.1.0"
10
+
11
+ __all__ = [
12
+ "FailoverChatModel",
13
+ "create_failover_llm",
14
+ "extract_token_metrics",
15
+ "is_connection_error",
16
+ "__version__",
17
+ ]
@@ -0,0 +1,248 @@
1
+ """A LangChain chat model that fails over between a primary and a secondary model.
2
+
3
+ The wrapper delegates every call to the primary chat model. If the primary raises
4
+ a connection-related error it transparently retries on the secondary, and it
5
+ switches back to the primary the moment the primary answers again. ``bind_tools``
6
+ is preserved across the failover so tool-calling agents keep working when either
7
+ leg is the one serving the request.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from typing import Any, Optional
13
+
14
+ from langchain_core.language_models import BaseChatModel
15
+ from pydantic import ConfigDict
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Exception *type names* (not classes) that we treat as "the endpoint is
20
+ # unreachable, try the other one." Matching by name keeps us independent of
21
+ # which HTTP/client library raised it (httpx, requests, urllib, openai, ...).
22
+ _CONNECTION_ERROR_NAMES = (
23
+ "ConnectionError",
24
+ "ConnectError",
25
+ "RemoteProtocolError",
26
+ "ConnectionRefusedError",
27
+ "TimeoutError",
28
+ "ReadTimeout",
29
+ "APIConnectionError",
30
+ )
31
+
32
+
33
+ def is_connection_error(exc: BaseException) -> bool:
34
+ """Return True if ``exc`` (or anything in its cause/context chain) looks like
35
+ a connection/network failure worth failing over on.
36
+
37
+ We walk ``__cause__``/``__context__`` because client libraries routinely wrap
38
+ the original socket error inside a higher-level exception, so the interesting
39
+ type is often several links down the chain.
40
+ """
41
+ seen: set[int] = set()
42
+ current: Optional[BaseException] = exc
43
+ while current is not None and id(current) not in seen:
44
+ seen.add(id(current))
45
+ name = type(current).__name__
46
+ if name in _CONNECTION_ERROR_NAMES:
47
+ return True
48
+ if "connection" in name.lower():
49
+ return True
50
+ if "connection" in str(current).lower()[:200]:
51
+ return True
52
+ current = current.__cause__ or current.__context__
53
+ return False
54
+
55
+
56
+ class FailoverChatModel(BaseChatModel):
57
+ """Wraps two chat models — tries ``primary``, falls back to ``secondary``.
58
+
59
+ All calls (``invoke``, ``stream``, ``generate``, tool-bound variants) are
60
+ delegated to ``primary``. On a connection-related error the call is retried
61
+ on ``secondary`` and the model remembers it is running degraded; the next
62
+ successful ``primary`` call flips it back and logs the recovery.
63
+
64
+ Example
65
+ -------
66
+ >>> from langchain_openai import ChatOpenAI
67
+ >>> from langchain_failover import FailoverChatModel
68
+ >>> primary = ChatOpenAI(base_url="http://localhost:8001/v1", api_key="x", model="local")
69
+ >>> backup = ChatOpenAI(base_url="http://localhost:8002/v1", api_key="x", model="local")
70
+ >>> llm = FailoverChatModel(primary=primary, secondary=backup)
71
+ >>> llm.invoke("hello").content # doctest: +SKIP
72
+ """
73
+
74
+ # ``Any`` rather than ``BaseChatModel`` on purpose: ``ChatModel.bind_tools``
75
+ # returns a ``Runnable`` binding (e.g. langchain's ``_ChatModelBinding``),
76
+ # not a ``BaseChatModel``. The binding still exposes ``_generate``/``_stream``,
77
+ # so wrapping it works — but a strict type would reject it.
78
+ model_config = ConfigDict(arbitrary_types_allowed=True)
79
+
80
+ primary: Any
81
+ secondary: Any
82
+ _active: str = "primary"
83
+
84
+ @property
85
+ def _llm_type(self) -> str:
86
+ return "failover"
87
+
88
+ def _mark_primary_recovered(self) -> None:
89
+ if self._active != "primary":
90
+ logger.info("Failover: primary recovered, switching back")
91
+ self._active = "primary"
92
+
93
+ def _switch_to_secondary(self, exc: BaseException) -> None:
94
+ logger.warning(
95
+ "Failover: primary down (%s), switching to secondary",
96
+ type(exc).__name__,
97
+ )
98
+ self._active = "secondary"
99
+
100
+ def _generate(self, messages, stop=None, run_manager=None, **kwargs):
101
+ try:
102
+ result = self.primary._generate(
103
+ messages, stop=stop, run_manager=run_manager, **kwargs
104
+ )
105
+ self._mark_primary_recovered()
106
+ return result
107
+ except Exception as exc:
108
+ if is_connection_error(exc):
109
+ self._switch_to_secondary(exc)
110
+ return self.secondary._generate(
111
+ messages, stop=stop, run_manager=run_manager, **kwargs
112
+ )
113
+ raise
114
+
115
+ def _stream(self, messages, stop=None, run_manager=None, **kwargs):
116
+ # Only fail over if the primary dies *before* emitting its first chunk;
117
+ # once tokens are flowing a mid-stream error is a real error, not a
118
+ # connect failure, and retrying would duplicate already-yielded output.
119
+ try:
120
+ started = False
121
+ for chunk in self.primary._stream(
122
+ messages, stop=stop, run_manager=run_manager, **kwargs
123
+ ):
124
+ if not started:
125
+ started = True
126
+ self._mark_primary_recovered()
127
+ yield chunk
128
+ except Exception as exc:
129
+ if is_connection_error(exc) and not started:
130
+ self._switch_to_secondary(exc)
131
+ yield from self.secondary._stream(
132
+ messages, stop=stop, run_manager=run_manager, **kwargs
133
+ )
134
+ else:
135
+ raise
136
+
137
+ def bind_tools(self, tools, **kwargs) -> "FailoverChatModel":
138
+ """Bind tools on both legs so failover preserves tool-calling.
139
+
140
+ langchain-core >=1.4 made ``BaseChatModel.bind_tools`` strict (it raises
141
+ ``NotImplementedError`` by default), so without this override any agent
142
+ that binds tools to a ``FailoverChatModel`` would fail at bind time. Each
143
+ bound leg is a ``Runnable`` that still exposes ``_generate``/``_stream``,
144
+ so the delegation above keeps working on the returned wrapper.
145
+ """
146
+ return FailoverChatModel(
147
+ primary=self.primary.bind_tools(tools, **kwargs),
148
+ secondary=self.secondary.bind_tools(tools, **kwargs),
149
+ )
150
+
151
+ @property
152
+ def active(self) -> str:
153
+ """Which leg served the most recent call: ``"primary"`` or ``"secondary"``."""
154
+ return self._active
155
+
156
+
157
+ def create_failover_llm(
158
+ primary_url: str,
159
+ secondary_url: str,
160
+ temperature: float = 0.1,
161
+ api_key: str = "not-needed",
162
+ **kwargs: Any,
163
+ ) -> FailoverChatModel:
164
+ """Build a :class:`FailoverChatModel` from two OpenAI-compatible base URLs.
165
+
166
+ The served model id is auto-discovered from each endpoint's ``/models`` list
167
+ (handy for local servers like vLLM, mlx-lm, Ollama, or LM Studio, where you
168
+ often don't want to hardcode the model name). Extra ``kwargs`` are forwarded
169
+ to both underlying ``ChatOpenAI`` instances.
170
+
171
+ Args:
172
+ primary_url: Primary endpoint base URL, e.g. ``http://localhost:8001/v1``.
173
+ secondary_url: Fallback endpoint base URL.
174
+ temperature: Sampling temperature for both legs.
175
+ api_key: Bearer token sent to both endpoints (many local servers ignore it).
176
+ """
177
+ try:
178
+ from langchain_openai import ChatOpenAI
179
+ except ImportError as exc: # pragma: no cover
180
+ raise ImportError(
181
+ "create_failover_llm requires langchain-openai. "
182
+ "Install it with `pip install langchain-failover[openai]`."
183
+ ) from exc
184
+
185
+ import urllib.request
186
+ import json
187
+
188
+ def _discover_model(base_url: str) -> str:
189
+ try:
190
+ req = urllib.request.Request(
191
+ f"{base_url.rstrip('/')}/models",
192
+ headers={"Authorization": f"Bearer {api_key}"},
193
+ )
194
+ with urllib.request.urlopen(req, timeout=5) as resp:
195
+ data = json.loads(resp.read()).get("data", [])
196
+ if data:
197
+ return data[0]["id"]
198
+ except Exception:
199
+ pass
200
+ return "default"
201
+
202
+ def _make_client(base_url: str) -> Any:
203
+ return ChatOpenAI(
204
+ model=_discover_model(base_url),
205
+ temperature=temperature,
206
+ base_url=base_url,
207
+ api_key=api_key,
208
+ **kwargs,
209
+ )
210
+
211
+ logger.info("Failover LLM: primary=%s, secondary=%s", primary_url, secondary_url)
212
+ return FailoverChatModel(
213
+ primary=_make_client(primary_url),
214
+ secondary=_make_client(secondary_url),
215
+ )
216
+
217
+
218
+ def extract_token_metrics(meta: Optional[dict]) -> dict:
219
+ """Pull token counts and timings out of a LangChain ``response_metadata`` dict.
220
+
221
+ Handles both OpenAI-compatible servers (``token_usage``/``usage``; no timing)
222
+ and Ollama (``prompt_eval_count``/``eval_count`` plus nanosecond durations).
223
+ Every field defaults to 0 when absent so callers never have to guard.
224
+
225
+ Returns:
226
+ ``{"input_tokens", "output_tokens", "prompt_time", "generation_time"}``.
227
+ """
228
+ if not meta:
229
+ return {
230
+ "input_tokens": 0,
231
+ "output_tokens": 0,
232
+ "prompt_time": 0.0,
233
+ "generation_time": 0.0,
234
+ }
235
+
236
+ usage = meta.get("token_usage") or meta.get("usage") or {}
237
+ input_tokens = usage.get("prompt_tokens", 0) or meta.get("prompt_eval_count", 0)
238
+ output_tokens = usage.get("completion_tokens", 0) or meta.get("eval_count", 0)
239
+
240
+ prompt_time = meta.get("prompt_eval_duration", 0) / 1e9 if "prompt_eval_duration" in meta else 0.0
241
+ generation_time = meta.get("eval_duration", 0) / 1e9 if "eval_duration" in meta else 0.0
242
+
243
+ return {
244
+ "input_tokens": input_tokens,
245
+ "output_tokens": output_tokens,
246
+ "prompt_time": prompt_time,
247
+ "generation_time": generation_time,
248
+ }
@@ -0,0 +1,107 @@
1
+ """Tests for FailoverChatModel — no network required.
2
+
3
+ Uses tiny fake chat models that either answer or raise, so the failover,
4
+ recovery, streaming, and bind_tools behaviour can be exercised deterministically.
5
+ """
6
+ from typing import Any, List, Optional
7
+
8
+ import pytest
9
+ from langchain_core.callbacks import CallbackManagerForLLMRun
10
+ from langchain_core.language_models import BaseChatModel
11
+ from langchain_core.messages import AIMessage
12
+ from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
13
+ from langchain_core.messages import AIMessageChunk
14
+
15
+ from langchain_failover import FailoverChatModel, is_connection_error
16
+
17
+
18
+ class _FakeChat(BaseChatModel):
19
+ """Answers with a fixed reply, or raises a chosen exception on every call."""
20
+
21
+ reply: str = "ok"
22
+ raises: Any = None
23
+ calls: int = 0
24
+
25
+ class Config:
26
+ arbitrary_types_allowed = True
27
+
28
+ @property
29
+ def _llm_type(self) -> str:
30
+ return "fake"
31
+
32
+ def _generate(
33
+ self,
34
+ messages,
35
+ stop: Optional[List[str]] = None,
36
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
37
+ **kwargs: Any,
38
+ ) -> ChatResult:
39
+ object.__setattr__(self, "calls", self.calls + 1)
40
+ if self.raises is not None:
41
+ raise self.raises
42
+ return ChatResult(generations=[ChatGeneration(message=AIMessage(content=self.reply))])
43
+
44
+ def _stream(self, messages, stop=None, run_manager=None, **kwargs):
45
+ object.__setattr__(self, "calls", self.calls + 1)
46
+ if self.raises is not None:
47
+ raise self.raises
48
+ yield ChatGenerationChunk(message=AIMessageChunk(content=self.reply))
49
+
50
+ def bind_tools(self, tools, **kwargs):
51
+ # Mirror the reply so a bound model is still identifiable in tests.
52
+ return _FakeChat(reply=f"bound:{self.reply}", raises=self.raises)
53
+
54
+
55
+ def test_primary_serves_when_healthy():
56
+ llm = FailoverChatModel(primary=_FakeChat(reply="primary"), secondary=_FakeChat(reply="secondary"))
57
+ assert llm.invoke("hi").content == "primary"
58
+ assert llm.active == "primary"
59
+
60
+
61
+ def test_fails_over_on_connection_error():
62
+ primary = _FakeChat(raises=ConnectionError("refused"))
63
+ llm = FailoverChatModel(primary=primary, secondary=_FakeChat(reply="secondary"))
64
+ assert llm.invoke("hi").content == "secondary"
65
+ assert llm.active == "secondary"
66
+
67
+
68
+ def test_non_connection_error_propagates():
69
+ primary = _FakeChat(raises=ValueError("bad prompt"))
70
+ llm = FailoverChatModel(primary=primary, secondary=_FakeChat(reply="secondary"))
71
+ with pytest.raises(ValueError):
72
+ llm.invoke("hi")
73
+
74
+
75
+ def test_recovers_back_to_primary():
76
+ primary = _FakeChat(raises=ConnectionError("down"))
77
+ secondary = _FakeChat(reply="secondary")
78
+ llm = FailoverChatModel(primary=primary, secondary=secondary)
79
+ assert llm.invoke("hi").content == "secondary"
80
+ assert llm.active == "secondary"
81
+ # Primary heals.
82
+ object.__setattr__(primary, "raises", None)
83
+ object.__setattr__(primary, "reply", "primary-back")
84
+ assert llm.invoke("hi").content == "primary-back"
85
+ assert llm.active == "primary"
86
+
87
+
88
+ def test_streaming_fails_over():
89
+ primary = _FakeChat(raises=ConnectionError("refused"))
90
+ llm = FailoverChatModel(primary=primary, secondary=_FakeChat(reply="streamed"))
91
+ chunks = list(llm.stream("hi"))
92
+ assert "".join(c.content for c in chunks) == "streamed"
93
+
94
+
95
+ def test_bind_tools_preserved_on_both_legs():
96
+ llm = FailoverChatModel(primary=_FakeChat(reply="p"), secondary=_FakeChat(reply="s"))
97
+ bound = llm.bind_tools([])
98
+ assert isinstance(bound, FailoverChatModel)
99
+ assert bound.invoke("hi").content == "bound:p"
100
+
101
+
102
+ def test_is_connection_error_walks_cause_chain():
103
+ inner = ConnectionRefusedError("nope")
104
+ outer = RuntimeError("wrapper")
105
+ outer.__cause__ = inner
106
+ assert is_connection_error(outer)
107
+ assert not is_connection_error(ValueError("totally unrelated"))