langchain-dev-utils 1.2.8__py3-none-any.whl → 1.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,577 +1,584 @@
1
- from __future__ import annotations
2
-
3
- from collections.abc import AsyncIterator, Iterator
4
- from json import JSONDecodeError
5
- from typing import (
6
- Any,
7
- Callable,
8
- List,
9
- Literal,
10
- Optional,
11
- Sequence,
12
- Type,
13
- TypeVar,
14
- Union,
15
- )
16
-
17
- import openai
18
- from langchain_core.callbacks import (
19
- AsyncCallbackManagerForLLMRun,
20
- CallbackManagerForLLMRun,
21
- )
22
- from langchain_core.language_models import LangSmithParams, LanguageModelInput
23
- from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage
24
- from langchain_core.outputs import ChatGenerationChunk, ChatResult
25
- from langchain_core.runnables import Runnable
26
- from langchain_core.tools import BaseTool
27
- from langchain_core.utils import from_env, secret_from_env
28
- from langchain_core.utils.function_calling import convert_to_openai_tool
29
- from langchain_openai.chat_models._compat import _convert_from_v1_to_chat_completions
30
- from langchain_openai.chat_models.base import BaseChatOpenAI, _convert_message_to_dict
31
- from pydantic import (
32
- BaseModel,
33
- ConfigDict,
34
- Field,
35
- PrivateAttr,
36
- SecretStr,
37
- create_model,
38
- model_validator,
39
- )
40
- from typing_extensions import Self
41
-
42
- from ..types import (
43
- CompatibilityOptions,
44
- ReasoningContentKeepType,
45
- ResponseFormatType,
46
- ToolChoiceType,
47
- )
48
-
49
- _BM = TypeVar("_BM", bound=BaseModel)
50
- _DictOrPydanticClass = Union[dict[str, Any], type[_BM], type]
51
- _DictOrPydantic = Union[dict, _BM]
52
-
53
-
54
- def _get_last_human_message_index(messages: list[BaseMessage]) -> int:
55
- """find the index of the last HumanMessage in the messages list, return -1 if not found."""
56
- return next(
57
- (
58
- i
59
- for i in range(len(messages) - 1, -1, -1)
60
- if isinstance(messages[i], HumanMessage)
61
- ),
62
- -1,
63
- )
64
-
65
-
66
- class _BaseChatOpenAICompatible(BaseChatOpenAI):
67
- """
68
- Base template class for OpenAI-compatible chat model implementations.
69
-
70
- This class provides a foundation for integrating various LLM providers that
71
- offer OpenAI-compatible APIs (such as vLLM, OpenRouter, ZAI, Moonshot,
72
- and many others). It enhances the base OpenAI functionality by:
73
-
74
- **1. Supports output of more types of reasoning content (reasoning_content)**
75
- ChatOpenAI can only output reasoning content natively supported by official
76
- OpenAI models, while OpenAICompatibleChatModel can output reasoning content
77
- from other model providers (e.g., OpenRouter, vLLM).
78
-
79
- **2. Dynamically adapts to choose the most suitable structured-output method**
80
- OpenAICompatibleChatModel adds method="auto" (default), which selects the best
81
- structured-output method (function_calling or json_schema) based on the actual
82
- capabilities of the model provider.
83
-
84
- **3. Supports configuration of related parameters**
85
- For cases where parameters differ from the official OpenAI API, this library
86
- provides the compatibility_options parameter to address this issue. For
87
- example, when different model providers have inconsistent support for
88
- tool_choice, you can adapt by setting supported_tool_choice in
89
- compatibility_options.
90
-
91
- Built on top of `langchain-openai`'s `BaseChatOpenAI`, this template class
92
- extends capabilities to better support diverse OpenAI-compatible model
93
- providers while maintaining full compatibility with LangChain's chat model
94
- interface.
95
-
96
- Note: This is a template class and should not be exported or instantiated
97
- directly. Instead, use it as a base class and provide the specific provider
98
- name through inheritance or the factory function
99
- `_create_openai_compatible_model()`.
100
- """
101
-
102
- model_name: str = Field(alias="model", default="openai compatible model")
103
- """The name of the model"""
104
- api_key: Optional[SecretStr] = Field(
105
- default_factory=secret_from_env("OPENAI_COMPATIBLE_API_KEY", default=None),
106
- )
107
- """OpenAI Compatible API key"""
108
- api_base: str = Field(
109
- default_factory=from_env("OPENAI_COMPATIBLE_API_BASE", default=""),
110
- )
111
- """OpenAI Compatible API base URL"""
112
-
113
- model_config = ConfigDict(populate_by_name=True)
114
-
115
- _provider: str = PrivateAttr(default="openai-compatible")
116
-
117
- """Provider Compatibility Options"""
118
- supported_tool_choice: ToolChoiceType = Field(default_factory=list)
119
- """Supported tool choice"""
120
- supported_response_format: ResponseFormatType = Field(default_factory=list)
121
- """Supported response format"""
122
- reasoning_content_keep_type: ReasoningContentKeepType = Field(default="discard")
123
- """How to keep reasoning content in the messages"""
124
- include_usage: bool = Field(default=True)
125
- """Whether to include usage information in the output"""
126
-
127
- @property
128
- def _llm_type(self) -> str:
129
- return f"chat-{self._provider}"
130
-
131
- @property
132
- def lc_secrets(self) -> dict[str, str]:
133
- return {"api_key": f"{self._provider.upper()}_API_KEY"}
134
-
135
- def _get_ls_params(
136
- self,
137
- stop: Optional[list[str]] = None,
138
- **kwargs: Any,
139
- ) -> LangSmithParams:
140
- ls_params = super()._get_ls_params(stop=stop, **kwargs)
141
- ls_params["ls_provider"] = self._provider
142
- return ls_params
143
-
144
- def _get_request_payload(
145
- self,
146
- input_: LanguageModelInput,
147
- *,
148
- stop: list[str] | None = None,
149
- **kwargs: Any,
150
- ) -> dict:
151
- payload = {**self._default_params, **kwargs}
152
-
153
- if self._use_responses_api(payload):
154
- return super()._get_request_payload(input_, stop=stop, **kwargs)
155
-
156
- messages = self._convert_input(input_).to_messages()
157
- if stop is not None:
158
- kwargs["stop"] = stop
159
-
160
- payload_messages = []
161
- last_human_index = -1
162
- if self.reasoning_content_keep_type == "temp":
163
- last_human_index = _get_last_human_message_index(messages)
164
-
165
- for index, m in enumerate(messages):
166
- if isinstance(m, AIMessage):
167
- msg_dict = _convert_message_to_dict(
168
- _convert_from_v1_to_chat_completions(m)
169
- )
170
- if (
171
- self.reasoning_content_keep_type == "retain"
172
- and m.additional_kwargs.get("reasoning_content")
173
- ):
174
- msg_dict["reasoning_content"] = m.additional_kwargs.get(
175
- "reasoning_content"
176
- )
177
- elif (
178
- self.reasoning_content_keep_type == "temp"
179
- and index > last_human_index
180
- and m.additional_kwargs.get("reasoning_content")
181
- ):
182
- msg_dict["reasoning_content"] = m.additional_kwargs.get(
183
- "reasoning_content"
184
- )
185
- payload_messages.append(msg_dict)
186
- else:
187
- payload_messages.append(_convert_message_to_dict(m))
188
-
189
- payload["messages"] = payload_messages
190
- return payload
191
-
192
- @model_validator(mode="after")
193
- def validate_environment(self) -> Self:
194
- if not (self.api_key and self.api_key.get_secret_value()):
195
- msg = f"{self._provider.upper()}_API_KEY must be set."
196
- raise ValueError(msg)
197
- client_params: dict = {
198
- k: v
199
- for k, v in {
200
- "api_key": self.api_key.get_secret_value() if self.api_key else None,
201
- "base_url": self.api_base,
202
- "timeout": self.request_timeout,
203
- "max_retries": self.max_retries,
204
- "default_headers": self.default_headers,
205
- "default_query": self.default_query,
206
- }.items()
207
- if v is not None
208
- }
209
-
210
- if not (self.client or None):
211
- sync_specific: dict = {"http_client": self.http_client}
212
- self.root_client = openai.OpenAI(**client_params, **sync_specific)
213
- self.client = self.root_client.chat.completions
214
- if not (self.async_client or None):
215
- async_specific: dict = {"http_client": self.http_async_client}
216
- self.root_async_client = openai.AsyncOpenAI(
217
- **client_params,
218
- **async_specific,
219
- )
220
- self.async_client = self.root_async_client.chat.completions
221
- return self
222
-
223
- @model_validator(mode="after")
224
- def _set_model_profile(self) -> Self:
225
- """Set model profile if not overridden."""
226
- if self.profile is None:
227
- self.profile = {}
228
- return self
229
-
230
- def _create_chat_result(
231
- self,
232
- response: Union[dict, openai.BaseModel],
233
- generation_info: Optional[dict] = None,
234
- ) -> ChatResult:
235
- """Convert API response to LangChain ChatResult with enhanced content processing.
236
-
237
- Extends base implementation to capture and preserve reasoning content from
238
- model responses, supporting advanced models that provide reasoning chains
239
- or thought processes alongside regular responses.
240
-
241
- Handles multiple response formats:
242
- - Standard OpenAI response objects with `reasoning_content` attribute
243
- - Responses with `model_extra` containing reasoning data
244
- - Dictionary responses (pass-through to base implementation)
245
-
246
- Args:
247
- response: Raw API response (OpenAI object or dict)
248
- generation_info: Additional generation metadata
249
-
250
- Returns:
251
- ChatResult with enhanced message containing reasoning content when available
252
- """
253
- rtn = super()._create_chat_result(response, generation_info)
254
-
255
- if not isinstance(response, openai.BaseModel):
256
- return rtn
257
-
258
- for generation in rtn.generations:
259
- if generation.message.response_metadata is None:
260
- generation.message.response_metadata = {}
261
- generation.message.response_metadata["model_provider"] = "openai-compatible"
262
-
263
- choices = getattr(response, "choices", None)
264
- if choices and hasattr(choices[0].message, "reasoning_content"):
265
- rtn.generations[0].message.additional_kwargs["reasoning_content"] = choices[
266
- 0
267
- ].message.reasoning_content
268
- elif choices and hasattr(choices[0].message, "model_extra"):
269
- model_extra = choices[0].message.model_extra
270
- if isinstance(model_extra, dict) and (
271
- reasoning := model_extra.get("reasoning")
272
- ):
273
- rtn.generations[0].message.additional_kwargs["reasoning_content"] = (
274
- reasoning
275
- )
276
-
277
- return rtn
278
-
279
- def _convert_chunk_to_generation_chunk(
280
- self,
281
- chunk: dict,
282
- default_chunk_class: type,
283
- base_generation_info: Optional[dict],
284
- ) -> Optional[ChatGenerationChunk]:
285
- """Convert streaming chunk to generation chunk with reasoning content support.
286
-
287
- Processes streaming response chunks to extract reasoning content alongside
288
- regular message content, enabling real-time streaming of both response
289
- text and reasoning chains from compatible models.
290
-
291
- Args:
292
- chunk: Raw streaming chunk from API
293
- default_chunk_class: Expected chunk type for validation
294
- base_generation_info: Base metadata for the generation
295
-
296
- Returns:
297
- ChatGenerationChunk with reasoning content when present in chunk data
298
- """
299
- generation_chunk = super()._convert_chunk_to_generation_chunk(
300
- chunk,
301
- default_chunk_class,
302
- base_generation_info,
303
- )
304
- if (choices := chunk.get("choices")) and generation_chunk:
305
- top = choices[0]
306
- if isinstance(generation_chunk.message, AIMessageChunk):
307
- generation_chunk.message.response_metadata = {
308
- **generation_chunk.message.response_metadata,
309
- "model_provider": "openai-compatible",
310
- }
311
- if (
312
- reasoning_content := top.get("delta", {}).get("reasoning_content")
313
- ) is not None:
314
- generation_chunk.message.additional_kwargs["reasoning_content"] = (
315
- reasoning_content
316
- )
317
- elif (reasoning := top.get("delta", {}).get("reasoning")) is not None:
318
- generation_chunk.message.additional_kwargs["reasoning_content"] = (
319
- reasoning
320
- )
321
-
322
- return generation_chunk
323
-
324
- def _stream(
325
- self,
326
- messages: List[BaseMessage],
327
- stop: Optional[List[str]] = None,
328
- run_manager: Optional[CallbackManagerForLLMRun] = None,
329
- **kwargs: Any,
330
- ) -> Iterator[ChatGenerationChunk]:
331
- if self.include_usage:
332
- kwargs["stream_options"] = {"include_usage": True}
333
- try:
334
- for chunk in super()._stream(
335
- messages, stop=stop, run_manager=run_manager, **kwargs
336
- ):
337
- yield chunk
338
- except JSONDecodeError as e:
339
- raise JSONDecodeError(
340
- f"{self._provider.title()} API returned an invalid response. "
341
- "Please check the API status and try again.",
342
- e.doc,
343
- e.pos,
344
- ) from e
345
-
346
- async def _astream(
347
- self,
348
- messages: List[BaseMessage],
349
- stop: Optional[List[str]] = None,
350
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
351
- **kwargs: Any,
352
- ) -> AsyncIterator[ChatGenerationChunk]:
353
- if self.include_usage:
354
- kwargs["stream_options"] = {"include_usage": True}
355
- try:
356
- async for chunk in super()._astream(
357
- messages, stop=stop, run_manager=run_manager, **kwargs
358
- ):
359
- yield chunk
360
- except JSONDecodeError as e:
361
- raise JSONDecodeError(
362
- f"{self._provider.title()} API returned an invalid response. "
363
- "Please check the API status and try again.",
364
- e.doc,
365
- e.pos,
366
- ) from e
367
-
368
- def _generate(
369
- self,
370
- messages: List[BaseMessage],
371
- stop: Optional[List[str]] = None,
372
- run_manager: Optional[CallbackManagerForLLMRun] = None,
373
- **kwargs: Any,
374
- ) -> ChatResult:
375
- try:
376
- return super()._generate(
377
- messages, stop=stop, run_manager=run_manager, **kwargs
378
- )
379
- except JSONDecodeError as e:
380
- raise JSONDecodeError(
381
- f"{self._provider.title()} API returned an invalid response. "
382
- "Please check the API status and try again.",
383
- e.doc,
384
- e.pos,
385
- ) from e
386
-
387
- async def _agenerate(
388
- self,
389
- messages: List[BaseMessage],
390
- stop: Optional[List[str]] = None,
391
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
392
- **kwargs: Any,
393
- ) -> ChatResult:
394
- try:
395
- return await super()._agenerate(
396
- messages, stop=stop, run_manager=run_manager, **kwargs
397
- )
398
- except JSONDecodeError as e:
399
- raise JSONDecodeError(
400
- f"{self._provider.title()} API returned an invalid response. "
401
- "Please check the API status and try again.",
402
- e.doc,
403
- e.pos,
404
- ) from e
405
-
406
- def bind_tools(
407
- self,
408
- tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
409
- *,
410
- tool_choice: dict | str | bool | None = None,
411
- strict: bool | None = None,
412
- parallel_tool_calls: bool | None = None,
413
- **kwargs: Any,
414
- ) -> Runnable[LanguageModelInput, AIMessage]:
415
- if parallel_tool_calls is not None:
416
- kwargs["parallel_tool_calls"] = parallel_tool_calls
417
- formatted_tools = [
418
- convert_to_openai_tool(tool, strict=strict) for tool in tools
419
- ]
420
-
421
- tool_names = []
422
- for tool in formatted_tools:
423
- if "function" in tool:
424
- tool_names.append(tool["function"]["name"])
425
- elif "name" in tool:
426
- tool_names.append(tool["name"])
427
- else:
428
- pass
429
-
430
- support_tool_choice = False
431
- if tool_choice is not None:
432
- if isinstance(tool_choice, bool):
433
- tool_choice = "required"
434
- if isinstance(tool_choice, str):
435
- if (
436
- tool_choice in ["auto", "none", "required"]
437
- and tool_choice in self.supported_tool_choice
438
- ):
439
- support_tool_choice = True
440
-
441
- elif "specific" in self.supported_tool_choice:
442
- if tool_choice in tool_names:
443
- support_tool_choice = True
444
- tool_choice = {
445
- "type": "function",
446
- "function": {"name": tool_choice},
447
- }
448
- tool_choice = tool_choice if support_tool_choice else None
449
- if tool_choice:
450
- kwargs["tool_choice"] = tool_choice
451
- return super().bind(tools=formatted_tools, **kwargs)
452
-
453
- def with_structured_output(
454
- self,
455
- schema: Optional[_DictOrPydanticClass] = None,
456
- *,
457
- method: Literal[
458
- "auto",
459
- "function_calling",
460
- "json_mode",
461
- "json_schema",
462
- ] = "auto",
463
- include_raw: bool = False,
464
- strict: Optional[bool] = None,
465
- **kwargs: Any,
466
- ) -> Runnable[LanguageModelInput, _DictOrPydantic]:
467
- """Configure structured output extraction with provider compatibility handling.
468
-
469
- Enables parsing of model outputs into structured formats (Pydantic models
470
- or dictionaries) while handling provider-specific method compatibility.
471
- Falls back from json_schema to function_calling for providers that don't
472
- support the json_schema method.
473
-
474
- Args:
475
- schema: Output schema (Pydantic model class or dictionary definition)
476
- method: Extraction method - defaults to auto,it will choice best method based on provider supported response format
477
- include_raw: Whether to include raw model response alongside parsed output
478
- strict: Schema enforcement strictness (provider-dependent)
479
- **kwargs: Additional structured output parameters
480
-
481
- Returns:
482
- Runnable configured for structured output extraction
483
- """
484
- if method not in ["auto", "function_calling", "json_mode", "json_schema"]:
485
- raise ValueError(
486
- f"Unsupported method: {method}. Please choose from 'auto', 'function_calling', 'json_mode', 'json_schema'."
487
- )
488
- if method == "auto":
489
- if "json_schema" in self.supported_response_format:
490
- method = "json_schema"
491
- else:
492
- method = "function_calling"
493
- elif (
494
- method == "json_schema"
495
- and "json_schema" not in self.supported_response_format
496
- ):
497
- method = "function_calling"
498
- elif (
499
- method == "json_mode" and "json_mode" not in self.supported_response_format
500
- ):
501
- method = "function_calling"
502
-
503
- return super().with_structured_output(
504
- schema,
505
- method=method,
506
- include_raw=include_raw,
507
- strict=strict,
508
- **kwargs,
509
- )
510
-
511
-
512
- def _create_openai_compatible_model(
513
- provider: str,
514
- base_url: str,
515
- compatibility_options: Optional[CompatibilityOptions] = None,
516
- ) -> Type[_BaseChatOpenAICompatible]:
517
- """Factory function for creating provider-specific OpenAI-compatible model classes.
518
-
519
- Dynamically generates model classes for different OpenAI-compatible providers,
520
- configuring environment variable mappings and default base URLs specific to each provider.
521
-
522
- Args:
523
- provider: Provider identifier (e.g., `vllm`,`openrouter`)
524
- base_url: Default API base URL for the provider
525
- compatibility_options: Optional configuration for the provider
526
-
527
- Returns:
528
- Configured model class ready for instantiation with provider-specific settings
529
- """
530
- chat_model_cls_name = f"Chat{provider.title()}"
531
- if compatibility_options is None:
532
- compatibility_options = {}
533
-
534
- return create_model(
535
- chat_model_cls_name,
536
- __base__=_BaseChatOpenAICompatible,
537
- api_base=(
538
- str,
539
- Field(
540
- default_factory=from_env(
541
- f"{provider.upper()}_API_BASE", default=base_url
542
- ),
543
- ),
544
- ),
545
- api_key=(
546
- str,
547
- Field(
548
- default_factory=secret_from_env(
549
- f"{provider.upper()}_API_KEY", default=None
550
- ),
551
- ),
552
- ),
553
- _provider=(
554
- str,
555
- PrivateAttr(default=provider),
556
- ),
557
- supported_tool_choice=(
558
- ToolChoiceType,
559
- Field(default=compatibility_options.get("supported_tool_choice", ["auto"])),
560
- ),
561
- reasoning_content_keep_type=(
562
- ReasoningContentKeepType,
563
- Field(
564
- default=compatibility_options.get(
565
- "reasoning_content_keep_type", "discard"
566
- )
567
- ),
568
- ),
569
- supported_response_format=(
570
- ResponseFormatType,
571
- Field(default=compatibility_options.get("supported_response_format", [])),
572
- ),
573
- include_usage=(
574
- bool,
575
- Field(default=compatibility_options.get("include_usage", True)),
576
- ),
577
- )
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import AsyncIterator, Iterator
4
+ from json import JSONDecodeError
5
+ from typing import (
6
+ Any,
7
+ Callable,
8
+ List,
9
+ Literal,
10
+ Optional,
11
+ Sequence,
12
+ Type,
13
+ TypeVar,
14
+ Union,
15
+ )
16
+
17
+ import openai
18
+ from langchain_core.callbacks import (
19
+ AsyncCallbackManagerForLLMRun,
20
+ CallbackManagerForLLMRun,
21
+ )
22
+ from langchain_core.language_models import LangSmithParams, LanguageModelInput
23
+ from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage
24
+ from langchain_core.outputs import ChatGenerationChunk, ChatResult
25
+ from langchain_core.runnables import Runnable
26
+ from langchain_core.tools import BaseTool
27
+ from langchain_core.utils import from_env, secret_from_env
28
+ from langchain_core.utils.function_calling import convert_to_openai_tool
29
+ from langchain_openai.chat_models._compat import _convert_from_v1_to_chat_completions
30
+ from langchain_openai.chat_models.base import BaseChatOpenAI, _convert_message_to_dict
31
+ from pydantic import (
32
+ BaseModel,
33
+ ConfigDict,
34
+ Field,
35
+ PrivateAttr,
36
+ SecretStr,
37
+ create_model,
38
+ model_validator,
39
+ )
40
+ from typing_extensions import Self
41
+
42
+ from ..types import (
43
+ CompatibilityOptions,
44
+ ReasoningKeepPolicy,
45
+ ResponseFormatType,
46
+ ToolChoiceType,
47
+ )
48
+
49
+ _BM = TypeVar("_BM", bound=BaseModel)
50
+ _DictOrPydanticClass = Union[dict[str, Any], type[_BM], type]
51
+ _DictOrPydantic = Union[dict, _BM]
52
+
53
+
54
+ def _get_last_human_message_index(messages: list[BaseMessage]) -> int:
55
+ """find the index of the last HumanMessage in the messages list, return -1 if not found."""
56
+ return next(
57
+ (
58
+ i
59
+ for i in range(len(messages) - 1, -1, -1)
60
+ if isinstance(messages[i], HumanMessage)
61
+ ),
62
+ -1,
63
+ )
64
+
65
+
66
+ class _BaseChatOpenAICompatible(BaseChatOpenAI):
67
+ """
68
+ Base template class for OpenAI-compatible chat model implementations.
69
+
70
+ This class provides a foundation for integrating various LLM providers that
71
+ offer OpenAI-compatible APIs (such as vLLM, OpenRouter, ZAI, Moonshot,
72
+ and many others). It enhances the base OpenAI functionality by:
73
+
74
+ **1. Supports output of more types of reasoning content (reasoning_content)**
75
+ ChatOpenAI can only output reasoning content natively supported by official
76
+ OpenAI models, while OpenAICompatibleChatModel can output reasoning content
77
+ from other model providers (e.g., OpenRouter, vLLM).
78
+
79
+ **2. Dynamically adapts to choose the most suitable structured-output method**
80
+ OpenAICompatibleChatModel adds method="auto" (default), which selects the best
81
+ structured-output method (function_calling or json_schema) based on the actual
82
+ capabilities of the model provider.
83
+
84
+ **3. Supports configuration of related parameters**
85
+ For cases where parameters differ from the official OpenAI API, this library
86
+ provides the compatibility_options parameter to address this issue. For
87
+ example, when different model providers have inconsistent support for
88
+ tool_choice, you can adapt by setting supported_tool_choice in
89
+ compatibility_options.
90
+
91
+ Built on top of `langchain-openai`'s `BaseChatOpenAI`, this template class
92
+ extends capabilities to better support diverse OpenAI-compatible model
93
+ providers while maintaining full compatibility with LangChain's chat model
94
+ interface.
95
+
96
+ Note: This is a template class and should not be exported or instantiated
97
+ directly. Instead, use it as a base class and provide the specific provider
98
+ name through inheritance or the factory function
99
+ `_create_openai_compatible_model()`.
100
+ """
101
+
102
+ model_name: str = Field(alias="model", default="openai compatible model")
103
+ """The name of the model"""
104
+ api_key: Optional[SecretStr] = Field(
105
+ default_factory=secret_from_env("OPENAI_COMPATIBLE_API_KEY", default=None),
106
+ )
107
+ """OpenAI Compatible API key"""
108
+ api_base: str = Field(
109
+ default_factory=from_env("OPENAI_COMPATIBLE_API_BASE", default=""),
110
+ )
111
+ """OpenAI Compatible API base URL"""
112
+
113
+ model_config = ConfigDict(populate_by_name=True)
114
+
115
+ _provider: str = PrivateAttr(default="openai-compatible")
116
+
117
+ """Provider Compatibility Options"""
118
+ supported_tool_choice: ToolChoiceType = Field(default_factory=list)
119
+ """Supported tool choice"""
120
+ supported_response_format: ResponseFormatType = Field(default_factory=list)
121
+ """Supported response format"""
122
+ reasoning_keep_policy: ReasoningKeepPolicy = Field(default="never")
123
+ """How to keep reasoning content in the messages"""
124
+ include_usage: bool = Field(default=True)
125
+ """Whether to include usage information in the output"""
126
+
127
+ @property
128
+ def _llm_type(self) -> str:
129
+ return f"chat-{self._provider}"
130
+
131
+ @property
132
+ def lc_secrets(self) -> dict[str, str]:
133
+ return {"api_key": f"{self._provider.upper()}_API_KEY"}
134
+
135
+ def _get_ls_params(
136
+ self,
137
+ stop: Optional[list[str]] = None,
138
+ **kwargs: Any,
139
+ ) -> LangSmithParams:
140
+ ls_params = super()._get_ls_params(stop=stop, **kwargs)
141
+ ls_params["ls_provider"] = self._provider
142
+ return ls_params
143
+
144
+ def _get_request_payload(
145
+ self,
146
+ input_: LanguageModelInput,
147
+ *,
148
+ stop: list[str] | None = None,
149
+ **kwargs: Any,
150
+ ) -> dict:
151
+ payload = {**self._default_params, **kwargs}
152
+
153
+ if self._use_responses_api(payload):
154
+ return super()._get_request_payload(input_, stop=stop, **kwargs)
155
+
156
+ messages = self._convert_input(input_).to_messages()
157
+ if stop is not None:
158
+ kwargs["stop"] = stop
159
+
160
+ payload_messages = []
161
+ last_human_index = -1
162
+ if self.reasoning_keep_policy == "current":
163
+ last_human_index = _get_last_human_message_index(messages)
164
+
165
+ for index, m in enumerate(messages):
166
+ if isinstance(m, AIMessage):
167
+ msg_dict = _convert_message_to_dict(
168
+ _convert_from_v1_to_chat_completions(m)
169
+ )
170
+ if self.reasoning_keep_policy == "all" and m.additional_kwargs.get(
171
+ "reasoning_content"
172
+ ):
173
+ msg_dict["reasoning_content"] = m.additional_kwargs.get(
174
+ "reasoning_content"
175
+ )
176
+ elif (
177
+ self.reasoning_keep_policy == "current"
178
+ and index > last_human_index
179
+ and m.additional_kwargs.get("reasoning_content")
180
+ ):
181
+ msg_dict["reasoning_content"] = m.additional_kwargs.get(
182
+ "reasoning_content"
183
+ )
184
+ payload_messages.append(msg_dict)
185
+ else:
186
+ payload_messages.append(_convert_message_to_dict(m))
187
+
188
+ payload["messages"] = payload_messages
189
+ return payload
190
+
191
+ @model_validator(mode="after")
192
+ def validate_environment(self) -> Self:
193
+ if not (self.api_key and self.api_key.get_secret_value()):
194
+ msg = f"{self._provider.upper()}_API_KEY must be set."
195
+ raise ValueError(msg)
196
+ client_params: dict = {
197
+ k: v
198
+ for k, v in {
199
+ "api_key": self.api_key.get_secret_value() if self.api_key else None,
200
+ "base_url": self.api_base,
201
+ "timeout": self.request_timeout,
202
+ "max_retries": self.max_retries,
203
+ "default_headers": self.default_headers,
204
+ "default_query": self.default_query,
205
+ }.items()
206
+ if v is not None
207
+ }
208
+
209
+ if not (self.client or None):
210
+ sync_specific: dict = {"http_client": self.http_client}
211
+ self.root_client = openai.OpenAI(**client_params, **sync_specific)
212
+ self.client = self.root_client.chat.completions
213
+ if not (self.async_client or None):
214
+ async_specific: dict = {"http_client": self.http_async_client}
215
+ self.root_async_client = openai.AsyncOpenAI(
216
+ **client_params,
217
+ **async_specific,
218
+ )
219
+ self.async_client = self.root_async_client.chat.completions
220
+ return self
221
+
222
+ @model_validator(mode="after")
223
+ def _set_model_profile(self) -> Self:
224
+ """Set model profile if not overridden."""
225
+ if self.profile is None:
226
+ self.profile = {}
227
+ return self
228
+
229
+ def _create_chat_result(
230
+ self,
231
+ response: Union[dict, openai.BaseModel],
232
+ generation_info: Optional[dict] = None,
233
+ ) -> ChatResult:
234
+ """Convert API response to LangChain ChatResult with enhanced content processing.
235
+
236
+ Extends base implementation to capture and preserve reasoning content from
237
+ model responses, supporting advanced models that provide reasoning chains
238
+ or thought processes alongside regular responses.
239
+
240
+ Handles multiple response formats:
241
+ - Standard OpenAI response objects with `reasoning_content` attribute
242
+ - Responses with `model_extra` containing reasoning data
243
+ - Dictionary responses (pass-through to base implementation)
244
+
245
+ Args:
246
+ response: Raw API response (OpenAI object or dict)
247
+ generation_info: Additional generation metadata
248
+
249
+ Returns:
250
+ ChatResult with enhanced message containing reasoning content when available
251
+ """
252
+ rtn = super()._create_chat_result(response, generation_info)
253
+
254
+ if not isinstance(response, openai.BaseModel):
255
+ return rtn
256
+
257
+ for generation in rtn.generations:
258
+ if generation.message.response_metadata is None:
259
+ generation.message.response_metadata = {}
260
+ generation.message.response_metadata["model_provider"] = "openai-compatible"
261
+
262
+ choices = getattr(response, "choices", None)
263
+ if choices and hasattr(choices[0].message, "reasoning_content"):
264
+ rtn.generations[0].message.additional_kwargs["reasoning_content"] = choices[
265
+ 0
266
+ ].message.reasoning_content
267
+ elif choices and hasattr(choices[0].message, "model_extra"):
268
+ model_extra = choices[0].message.model_extra
269
+ if isinstance(model_extra, dict) and (
270
+ reasoning := model_extra.get("reasoning")
271
+ ):
272
+ rtn.generations[0].message.additional_kwargs["reasoning_content"] = (
273
+ reasoning
274
+ )
275
+
276
+ return rtn
277
+
278
+ def _convert_chunk_to_generation_chunk(
279
+ self,
280
+ chunk: dict,
281
+ default_chunk_class: type,
282
+ base_generation_info: Optional[dict],
283
+ ) -> Optional[ChatGenerationChunk]:
284
+ """Convert streaming chunk to generation chunk with reasoning content support.
285
+
286
+ Processes streaming response chunks to extract reasoning content alongside
287
+ regular message content, enabling real-time streaming of both response
288
+ text and reasoning chains from compatible models.
289
+
290
+ Args:
291
+ chunk: Raw streaming chunk from API
292
+ default_chunk_class: Expected chunk type for validation
293
+ base_generation_info: Base metadata for the generation
294
+
295
+ Returns:
296
+ ChatGenerationChunk with reasoning content when present in chunk data
297
+ """
298
+ generation_chunk = super()._convert_chunk_to_generation_chunk(
299
+ chunk,
300
+ default_chunk_class,
301
+ base_generation_info,
302
+ )
303
+ if (choices := chunk.get("choices")) and generation_chunk:
304
+ top = choices[0]
305
+ if isinstance(generation_chunk.message, AIMessageChunk):
306
+ generation_chunk.message.response_metadata = {
307
+ **generation_chunk.message.response_metadata,
308
+ "model_provider": "openai-compatible",
309
+ }
310
+ if (
311
+ reasoning_content := top.get("delta", {}).get("reasoning_content")
312
+ ) is not None:
313
+ generation_chunk.message.additional_kwargs["reasoning_content"] = (
314
+ reasoning_content
315
+ )
316
+ elif (reasoning := top.get("delta", {}).get("reasoning")) is not None:
317
+ generation_chunk.message.additional_kwargs["reasoning_content"] = (
318
+ reasoning
319
+ )
320
+
321
+ return generation_chunk
322
+
323
+ def _stream(
324
+ self,
325
+ messages: List[BaseMessage],
326
+ stop: Optional[List[str]] = None,
327
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
328
+ **kwargs: Any,
329
+ ) -> Iterator[ChatGenerationChunk]:
330
+ if self._use_responses_api({**kwargs, **self.model_kwargs}):
331
+ for chunk in super()._stream_responses(
332
+ messages, stop=stop, run_manager=run_manager, **kwargs
333
+ ):
334
+ yield chunk
335
+ else:
336
+ if self.include_usage:
337
+ kwargs["stream_options"] = {"include_usage": True}
338
+ try:
339
+ for chunk in super()._stream(
340
+ messages, stop=stop, run_manager=run_manager, **kwargs
341
+ ):
342
+ yield chunk
343
+ except JSONDecodeError as e:
344
+ raise JSONDecodeError(
345
+ f"{self._provider.title()} API returned an invalid response. "
346
+ "Please check the API status and try again.",
347
+ e.doc,
348
+ e.pos,
349
+ ) from e
350
+
351
+ async def _astream(
352
+ self,
353
+ messages: List[BaseMessage],
354
+ stop: Optional[List[str]] = None,
355
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
356
+ **kwargs: Any,
357
+ ) -> AsyncIterator[ChatGenerationChunk]:
358
+ if self._use_responses_api({**kwargs, **self.model_kwargs}):
359
+ async for chunk in super()._astream_responses(
360
+ messages, stop=stop, run_manager=run_manager, **kwargs
361
+ ):
362
+ yield chunk
363
+ else:
364
+ if self.include_usage:
365
+ kwargs["stream_options"] = {"include_usage": True}
366
+ try:
367
+ async for chunk in super()._astream(
368
+ messages, stop=stop, run_manager=run_manager, **kwargs
369
+ ):
370
+ yield chunk
371
+ except JSONDecodeError as e:
372
+ raise JSONDecodeError(
373
+ f"{self._provider.title()} API returned an invalid response. "
374
+ "Please check the API status and try again.",
375
+ e.doc,
376
+ e.pos,
377
+ ) from e
378
+
379
+ def _generate(
380
+ self,
381
+ messages: List[BaseMessage],
382
+ stop: Optional[List[str]] = None,
383
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
384
+ **kwargs: Any,
385
+ ) -> ChatResult:
386
+ try:
387
+ return super()._generate(
388
+ messages, stop=stop, run_manager=run_manager, **kwargs
389
+ )
390
+ except JSONDecodeError as e:
391
+ raise JSONDecodeError(
392
+ f"{self._provider.title()} API returned an invalid response. "
393
+ "Please check the API status and try again.",
394
+ e.doc,
395
+ e.pos,
396
+ ) from e
397
+
398
+ async def _agenerate(
399
+ self,
400
+ messages: List[BaseMessage],
401
+ stop: Optional[List[str]] = None,
402
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
403
+ **kwargs: Any,
404
+ ) -> ChatResult:
405
+ try:
406
+ return await super()._agenerate(
407
+ messages, stop=stop, run_manager=run_manager, **kwargs
408
+ )
409
+ except JSONDecodeError as e:
410
+ raise JSONDecodeError(
411
+ f"{self._provider.title()} API returned an invalid response. "
412
+ "Please check the API status and try again.",
413
+ e.doc,
414
+ e.pos,
415
+ ) from e
416
+
417
+ def bind_tools(
418
+ self,
419
+ tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
420
+ *,
421
+ tool_choice: dict | str | bool | None = None,
422
+ strict: bool | None = None,
423
+ parallel_tool_calls: bool | None = None,
424
+ **kwargs: Any,
425
+ ) -> Runnable[LanguageModelInput, AIMessage]:
426
+ if parallel_tool_calls is not None:
427
+ kwargs["parallel_tool_calls"] = parallel_tool_calls
428
+ formatted_tools = [
429
+ convert_to_openai_tool(tool, strict=strict) for tool in tools
430
+ ]
431
+
432
+ tool_names = []
433
+ for tool in formatted_tools:
434
+ if "function" in tool:
435
+ tool_names.append(tool["function"]["name"])
436
+ elif "name" in tool:
437
+ tool_names.append(tool["name"])
438
+ else:
439
+ pass
440
+
441
+ support_tool_choice = False
442
+ if tool_choice is not None:
443
+ if isinstance(tool_choice, bool):
444
+ tool_choice = "required"
445
+ if isinstance(tool_choice, str):
446
+ if (
447
+ tool_choice in ["auto", "none", "required"]
448
+ and tool_choice in self.supported_tool_choice
449
+ ):
450
+ support_tool_choice = True
451
+
452
+ elif "specific" in self.supported_tool_choice:
453
+ if tool_choice in tool_names:
454
+ support_tool_choice = True
455
+ tool_choice = {
456
+ "type": "function",
457
+ "function": {"name": tool_choice},
458
+ }
459
+ tool_choice = tool_choice if support_tool_choice else None
460
+ if tool_choice:
461
+ kwargs["tool_choice"] = tool_choice
462
+ return super().bind(tools=formatted_tools, **kwargs)
463
+
464
+ def with_structured_output(
465
+ self,
466
+ schema: Optional[_DictOrPydanticClass] = None,
467
+ *,
468
+ method: Literal[
469
+ "auto",
470
+ "function_calling",
471
+ "json_mode",
472
+ "json_schema",
473
+ ] = "auto",
474
+ include_raw: bool = False,
475
+ strict: Optional[bool] = None,
476
+ **kwargs: Any,
477
+ ) -> Runnable[LanguageModelInput, _DictOrPydantic]:
478
+ """Configure structured output extraction with provider compatibility handling.
479
+
480
+ Enables parsing of model outputs into structured formats (Pydantic models
481
+ or dictionaries) while handling provider-specific method compatibility.
482
+ Falls back from json_schema to function_calling for providers that don't
483
+ support the json_schema method.
484
+
485
+ Args:
486
+ schema: Output schema (Pydantic model class or dictionary definition)
487
+ method: Extraction method - defaults to auto,it will choice best method based on provider supported response format
488
+ include_raw: Whether to include raw model response alongside parsed output
489
+ strict: Schema enforcement strictness (provider-dependent)
490
+ **kwargs: Additional structured output parameters
491
+
492
+ Returns:
493
+ Runnable configured for structured output extraction
494
+ """
495
+ if method not in ["auto", "function_calling", "json_mode", "json_schema"]:
496
+ raise ValueError(
497
+ f"Unsupported method: {method}. Please choose from 'auto', 'function_calling', 'json_mode', 'json_schema'."
498
+ )
499
+ if method == "auto":
500
+ if "json_schema" in self.supported_response_format:
501
+ method = "json_schema"
502
+ else:
503
+ method = "function_calling"
504
+ elif (
505
+ method == "json_schema"
506
+ and "json_schema" not in self.supported_response_format
507
+ ):
508
+ method = "function_calling"
509
+ elif (
510
+ method == "json_mode" and "json_mode" not in self.supported_response_format
511
+ ):
512
+ method = "function_calling"
513
+
514
+ return super().with_structured_output(
515
+ schema,
516
+ method=method,
517
+ include_raw=include_raw,
518
+ strict=strict,
519
+ **kwargs,
520
+ )
521
+
522
+
523
+ def _create_openai_compatible_model(
524
+ provider: str,
525
+ base_url: str,
526
+ compatibility_options: Optional[CompatibilityOptions] = None,
527
+ ) -> Type[_BaseChatOpenAICompatible]:
528
+ """Factory function for creating provider-specific OpenAI-compatible model classes.
529
+
530
+ Dynamically generates model classes for different OpenAI-compatible providers,
531
+ configuring environment variable mappings and default base URLs specific to each provider.
532
+
533
+ Args:
534
+ provider: Provider identifier (e.g., `vllm`,`openrouter`)
535
+ base_url: Default API base URL for the provider
536
+ compatibility_options: Optional configuration for the provider
537
+
538
+ Returns:
539
+ Configured model class ready for instantiation with provider-specific settings
540
+ """
541
+ chat_model_cls_name = f"Chat{provider.title()}"
542
+ if compatibility_options is None:
543
+ compatibility_options = {}
544
+
545
+ return create_model(
546
+ chat_model_cls_name,
547
+ __base__=_BaseChatOpenAICompatible,
548
+ api_base=(
549
+ str,
550
+ Field(
551
+ default_factory=from_env(
552
+ f"{provider.upper()}_API_BASE", default=base_url
553
+ ),
554
+ ),
555
+ ),
556
+ api_key=(
557
+ str,
558
+ Field(
559
+ default_factory=secret_from_env(
560
+ f"{provider.upper()}_API_KEY", default=None
561
+ ),
562
+ ),
563
+ ),
564
+ _provider=(
565
+ str,
566
+ PrivateAttr(default=provider),
567
+ ),
568
+ supported_tool_choice=(
569
+ ToolChoiceType,
570
+ Field(default=compatibility_options.get("supported_tool_choice", ["auto"])),
571
+ ),
572
+ reasoning_keep_policy=(
573
+ ReasoningKeepPolicy,
574
+ Field(default=compatibility_options.get("reasoning_keep_policy", "never")),
575
+ ),
576
+ supported_response_format=(
577
+ ResponseFormatType,
578
+ Field(default=compatibility_options.get("supported_response_format", [])),
579
+ ),
580
+ include_usage=(
581
+ bool,
582
+ Field(default=compatibility_options.get("include_usage", True)),
583
+ ),
584
+ )