langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (74) hide show
  1. langchain_core/_api/beta_decorator.py +2 -2
  2. langchain_core/_api/deprecation.py +1 -1
  3. langchain_core/beta/runnables/context.py +1 -1
  4. langchain_core/callbacks/base.py +14 -23
  5. langchain_core/callbacks/file.py +13 -2
  6. langchain_core/callbacks/manager.py +74 -157
  7. langchain_core/callbacks/streaming_stdout.py +3 -4
  8. langchain_core/callbacks/usage.py +2 -12
  9. langchain_core/chat_history.py +6 -6
  10. langchain_core/documents/base.py +1 -1
  11. langchain_core/documents/compressor.py +9 -6
  12. langchain_core/indexing/base.py +2 -2
  13. langchain_core/language_models/_utils.py +230 -101
  14. langchain_core/language_models/base.py +35 -23
  15. langchain_core/language_models/chat_models.py +245 -53
  16. langchain_core/language_models/fake_chat_models.py +28 -81
  17. langchain_core/load/dump.py +3 -4
  18. langchain_core/messages/__init__.py +38 -22
  19. langchain_core/messages/ai.py +188 -30
  20. langchain_core/messages/base.py +164 -25
  21. langchain_core/messages/block_translators/__init__.py +89 -0
  22. langchain_core/messages/block_translators/anthropic.py +451 -0
  23. langchain_core/messages/block_translators/bedrock.py +45 -0
  24. langchain_core/messages/block_translators/bedrock_converse.py +47 -0
  25. langchain_core/messages/block_translators/google_genai.py +45 -0
  26. langchain_core/messages/block_translators/google_vertexai.py +47 -0
  27. langchain_core/messages/block_translators/groq.py +45 -0
  28. langchain_core/messages/block_translators/langchain_v0.py +297 -0
  29. langchain_core/messages/block_translators/ollama.py +45 -0
  30. langchain_core/messages/block_translators/openai.py +586 -0
  31. langchain_core/messages/{content_blocks.py → content.py} +346 -213
  32. langchain_core/messages/human.py +29 -9
  33. langchain_core/messages/system.py +29 -9
  34. langchain_core/messages/tool.py +94 -13
  35. langchain_core/messages/utils.py +32 -234
  36. langchain_core/output_parsers/base.py +14 -50
  37. langchain_core/output_parsers/json.py +2 -5
  38. langchain_core/output_parsers/list.py +2 -7
  39. langchain_core/output_parsers/openai_functions.py +5 -28
  40. langchain_core/output_parsers/openai_tools.py +49 -90
  41. langchain_core/output_parsers/pydantic.py +2 -3
  42. langchain_core/output_parsers/transform.py +12 -53
  43. langchain_core/output_parsers/xml.py +9 -17
  44. langchain_core/prompt_values.py +8 -112
  45. langchain_core/prompts/chat.py +1 -3
  46. langchain_core/runnables/base.py +500 -451
  47. langchain_core/runnables/branch.py +1 -1
  48. langchain_core/runnables/fallbacks.py +4 -4
  49. langchain_core/runnables/history.py +1 -1
  50. langchain_core/runnables/passthrough.py +3 -3
  51. langchain_core/runnables/retry.py +1 -1
  52. langchain_core/runnables/router.py +1 -1
  53. langchain_core/structured_query.py +3 -7
  54. langchain_core/tools/base.py +14 -41
  55. langchain_core/tools/convert.py +2 -22
  56. langchain_core/tools/retriever.py +1 -8
  57. langchain_core/tools/structured.py +2 -10
  58. langchain_core/tracers/_streaming.py +6 -7
  59. langchain_core/tracers/base.py +7 -14
  60. langchain_core/tracers/core.py +4 -27
  61. langchain_core/tracers/event_stream.py +4 -15
  62. langchain_core/tracers/langchain.py +3 -14
  63. langchain_core/tracers/log_stream.py +2 -3
  64. langchain_core/utils/_merge.py +45 -7
  65. langchain_core/utils/function_calling.py +22 -9
  66. langchain_core/utils/utils.py +29 -0
  67. langchain_core/version.py +1 -1
  68. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a1.dist-info}/METADATA +7 -9
  69. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a1.dist-info}/RECORD +71 -64
  70. langchain_core/v1/__init__.py +0 -1
  71. langchain_core/v1/chat_models.py +0 -1047
  72. langchain_core/v1/messages.py +0 -755
  73. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a1.dist-info}/WHEEL +0 -0
  74. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a1.dist-info}/entry_points.txt +0 -0
@@ -27,7 +27,10 @@ from langchain_core.callbacks import (
27
27
  Callbacks,
28
28
  )
29
29
  from langchain_core.globals import get_llm_cache
30
- from langchain_core.language_models._utils import _normalize_messages
30
+ from langchain_core.language_models._utils import (
31
+ _normalize_messages,
32
+ _update_message_content_to_blocks,
33
+ )
31
34
  from langchain_core.language_models.base import (
32
35
  BaseLanguageModel,
33
36
  LangSmithParams,
@@ -36,16 +39,16 @@ from langchain_core.language_models.base import (
36
39
  from langchain_core.load import dumpd, dumps
37
40
  from langchain_core.messages import (
38
41
  AIMessage,
42
+ AIMessageChunk,
39
43
  AnyMessage,
40
44
  BaseMessage,
41
- BaseMessageChunk,
42
45
  HumanMessage,
43
46
  convert_to_messages,
47
+ convert_to_openai_data_block,
44
48
  convert_to_openai_image_block,
45
49
  is_data_content_block,
46
50
  message_chunk_to_message,
47
51
  )
48
- from langchain_core.messages.ai import _LC_ID_PREFIX
49
52
  from langchain_core.outputs import (
50
53
  ChatGeneration,
51
54
  ChatGenerationChunk,
@@ -65,6 +68,7 @@ from langchain_core.utils.function_calling import (
65
68
  convert_to_openai_tool,
66
69
  )
67
70
  from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
71
+ from langchain_core.utils.utils import LC_ID_PREFIX, from_env
68
72
 
69
73
  if TYPE_CHECKING:
70
74
  import uuid
@@ -78,6 +82,11 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
78
82
  if hasattr(error, "response"):
79
83
  response = error.response
80
84
  metadata: dict = {}
85
+ if hasattr(response, "json"):
86
+ try:
87
+ metadata["body"] = response.json()
88
+ except Exception:
89
+ metadata["body"] = getattr(response, "text", None)
81
90
  if hasattr(response, "headers"):
82
91
  try:
83
92
  metadata["headers"] = dict(response.headers)
@@ -97,17 +106,18 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
97
106
 
98
107
 
99
108
  def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
100
- """Format messages for tracing in on_chat_model_start.
109
+ """Format messages for tracing in ``on_chat_model_start``.
101
110
 
102
111
  - Update image content blocks to OpenAI Chat Completions format (backward
103
112
  compatibility).
104
- - Add "type" key to content blocks that have a single key.
113
+ - Add ``type`` key to content blocks that have a single key.
105
114
 
106
115
  Args:
107
116
  messages: List of messages to format.
108
117
 
109
118
  Returns:
110
119
  List of messages formatted for tracing.
120
+
111
121
  """
112
122
  messages_to_trace = []
113
123
  for message in messages:
@@ -119,7 +129,7 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
119
129
  if (
120
130
  block.get("type") == "image"
121
131
  and is_data_content_block(block)
122
- and block.get("source_type") != "id"
132
+ and not ("file_id" in block or block.get("source_type") == "id")
123
133
  ):
124
134
  if message_to_trace is message:
125
135
  # Shallow copy
@@ -129,6 +139,19 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
129
139
  message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
130
140
  convert_to_openai_image_block(block)
131
141
  )
142
+ elif (
143
+ block.get("type") == "file"
144
+ and is_data_content_block(block)
145
+ and "base64" in block
146
+ ):
147
+ if message_to_trace is message:
148
+ # Shallow copy
149
+ message_to_trace = message.model_copy()
150
+ message_to_trace.content = list(message_to_trace.content)
151
+
152
+ message_to_trace.content[idx] = convert_to_openai_data_block( # type: ignore[index]
153
+ block
154
+ )
132
155
  elif len(block) == 1 and "type" not in block:
133
156
  # Tracing assumes all content blocks have a "type" key. Here
134
157
  # we add this key if it is missing, and there's an obvious
@@ -153,10 +176,11 @@ def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
153
176
  """Generate from a stream.
154
177
 
155
178
  Args:
156
- stream: Iterator of ChatGenerationChunk.
179
+ stream: Iterator of ``ChatGenerationChunk``.
157
180
 
158
181
  Returns:
159
182
  ChatResult: Chat result.
183
+
160
184
  """
161
185
  generation = next(stream, None)
162
186
  if generation:
@@ -180,10 +204,11 @@ async def agenerate_from_stream(
180
204
  """Async generate from a stream.
181
205
 
182
206
  Args:
183
- stream: Iterator of ChatGenerationChunk.
207
+ stream: Iterator of ``ChatGenerationChunk``.
184
208
 
185
209
  Returns:
186
210
  ChatResult: Chat result.
211
+
187
212
  """
188
213
  chunks = [chunk async for chunk in stream]
189
214
  return await run_in_executor(None, generate_from_stream, iter(chunks))
@@ -208,7 +233,7 @@ def _format_ls_structured_output(ls_structured_output_format: Optional[dict]) ->
208
233
  return ls_structured_output_format_dict
209
234
 
210
235
 
211
- class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
236
+ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
212
237
  """Base class for chat models.
213
238
 
214
239
  Key imperative methods:
@@ -311,15 +336,38 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
311
336
  provided. This offers the best of both worlds.
312
337
  - If False (default), will always use streaming case if available.
313
338
 
314
- The main reason for this flag is that code might be written using ``.stream()`` and
339
+ The main reason for this flag is that code might be written using ``stream()`` and
315
340
  a user may want to swap out a given model for another model whose the implementation
316
341
  does not properly support streaming.
342
+
343
+ """
344
+
345
+ output_version: Optional[str] = Field(
346
+ default_factory=from_env("LC_OUTPUT_VERSION", default=None)
347
+ )
348
+ """Version of ``AIMessage`` output format to store in message content.
349
+
350
+ ``AIMessage.content_blocks`` will lazily parse the contents of ``content`` into a
351
+ standard format. This flag can be used to additionally store the standard format
352
+ in message content, e.g., for serialization purposes.
353
+
354
+ Supported values:
355
+
356
+ - ``"v0"``: provider-specific format in content (can lazily-parse with
357
+ ``.content_blocks``)
358
+ - ``"v1"``: standardized format in content (consistent with ``.content_blocks``)
359
+
360
+ Partner packages (e.g., ``langchain-openai``) can also use this field to roll out
361
+ new content formats in a backward-compatible way.
362
+
363
+ .. versionadded:: 1.0
364
+
317
365
  """
318
366
 
319
367
  @model_validator(mode="before")
320
368
  @classmethod
321
369
  def raise_deprecation(cls, values: dict) -> Any:
322
- """Raise deprecation warning if callback_manager is used.
370
+ """Raise deprecation warning if ``callback_manager`` is used.
323
371
 
324
372
  Args:
325
373
  values (Dict): Values to validate.
@@ -328,7 +376,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
328
376
  Dict: Validated values.
329
377
 
330
378
  Raises:
331
- DeprecationWarning: If callback_manager is used.
379
+ DeprecationWarning: If ``callback_manager`` is used.
380
+
332
381
  """
333
382
  if values.get("callback_manager") is not None:
334
383
  warnings.warn(
@@ -376,21 +425,24 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
376
425
  *,
377
426
  stop: Optional[list[str]] = None,
378
427
  **kwargs: Any,
379
- ) -> BaseMessage:
428
+ ) -> AIMessage:
380
429
  config = ensure_config(config)
381
430
  return cast(
382
- "ChatGeneration",
383
- self.generate_prompt(
384
- [self._convert_input(input)],
385
- stop=stop,
386
- callbacks=config.get("callbacks"),
387
- tags=config.get("tags"),
388
- metadata=config.get("metadata"),
389
- run_name=config.get("run_name"),
390
- run_id=config.pop("run_id", None),
391
- **kwargs,
392
- ).generations[0][0],
393
- ).message
431
+ "AIMessage",
432
+ cast(
433
+ "ChatGeneration",
434
+ self.generate_prompt(
435
+ [self._convert_input(input)],
436
+ stop=stop,
437
+ callbacks=config.get("callbacks"),
438
+ tags=config.get("tags"),
439
+ metadata=config.get("metadata"),
440
+ run_name=config.get("run_name"),
441
+ run_id=config.pop("run_id", None),
442
+ **kwargs,
443
+ ).generations[0][0],
444
+ ).message,
445
+ )
394
446
 
395
447
  @override
396
448
  async def ainvoke(
@@ -400,7 +452,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
400
452
  *,
401
453
  stop: Optional[list[str]] = None,
402
454
  **kwargs: Any,
403
- ) -> BaseMessage:
455
+ ) -> AIMessage:
404
456
  config = ensure_config(config)
405
457
  llm_result = await self.agenerate_prompt(
406
458
  [self._convert_input(input)],
@@ -412,7 +464,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
412
464
  run_id=config.pop("run_id", None),
413
465
  **kwargs,
414
466
  )
415
- return cast("ChatGeneration", llm_result.generations[0][0]).message
467
+ return cast(
468
+ "AIMessage", cast("ChatGeneration", llm_result.generations[0][0]).message
469
+ )
416
470
 
417
471
  def _should_stream(
418
472
  self,
@@ -457,11 +511,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
457
511
  *,
458
512
  stop: Optional[list[str]] = None,
459
513
  **kwargs: Any,
460
- ) -> Iterator[BaseMessageChunk]:
514
+ ) -> Iterator[AIMessageChunk]:
461
515
  if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):
462
516
  # model doesn't implement streaming, so use default implementation
463
517
  yield cast(
464
- "BaseMessageChunk",
518
+ "AIMessageChunk",
465
519
  self.invoke(input, config=config, stop=stop, **kwargs),
466
520
  )
467
521
  else:
@@ -506,16 +560,41 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
506
560
 
507
561
  try:
508
562
  input_messages = _normalize_messages(messages)
509
- run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
563
+ run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
564
+ yielded = False
510
565
  for chunk in self._stream(input_messages, stop=stop, **kwargs):
511
566
  if chunk.message.id is None:
512
567
  chunk.message.id = run_id
513
568
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
569
+ if self.output_version == "v1":
570
+ # Overwrite .content with .content_blocks
571
+ chunk.message = _update_message_content_to_blocks(
572
+ chunk.message, "v1"
573
+ )
514
574
  run_manager.on_llm_new_token(
515
575
  cast("str", chunk.message.content), chunk=chunk
516
576
  )
517
577
  chunks.append(chunk)
518
- yield chunk.message
578
+ yield cast("AIMessageChunk", chunk.message)
579
+ yielded = True
580
+
581
+ # Yield a final empty chunk with chunk_position="last" if not yet
582
+ # yielded
583
+ if (
584
+ yielded
585
+ and isinstance(chunk.message, AIMessageChunk)
586
+ and not chunk.message.chunk_position
587
+ ):
588
+ empty_content: Union[str, list] = (
589
+ "" if isinstance(chunk.message.content, str) else []
590
+ )
591
+ msg_chunk = AIMessageChunk(
592
+ content=empty_content, chunk_position="last", id=run_id
593
+ )
594
+ run_manager.on_llm_new_token(
595
+ "", chunk=ChatGenerationChunk(message=msg_chunk)
596
+ )
597
+ yield msg_chunk
519
598
  except BaseException as e:
520
599
  generations_with_error_metadata = _generate_response_from_error(e)
521
600
  chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -528,7 +607,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
528
607
  generations = [generations_with_error_metadata]
529
608
  run_manager.on_llm_error(
530
609
  e,
531
- response=LLMResult(generations=generations), # type: ignore[arg-type]
610
+ response=LLMResult(generations=generations),
532
611
  )
533
612
  raise
534
613
 
@@ -548,11 +627,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
548
627
  *,
549
628
  stop: Optional[list[str]] = None,
550
629
  **kwargs: Any,
551
- ) -> AsyncIterator[BaseMessageChunk]:
630
+ ) -> AsyncIterator[AIMessageChunk]:
552
631
  if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):
553
632
  # No async or sync stream is implemented, so fall back to ainvoke
554
633
  yield cast(
555
- "BaseMessageChunk",
634
+ "AIMessageChunk",
556
635
  await self.ainvoke(input, config=config, stop=stop, **kwargs),
557
636
  )
558
637
  return
@@ -599,7 +678,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
599
678
 
600
679
  try:
601
680
  input_messages = _normalize_messages(messages)
602
- run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
681
+ run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
682
+ yielded = False
603
683
  async for chunk in self._astream(
604
684
  input_messages,
605
685
  stop=stop,
@@ -608,11 +688,34 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
608
688
  if chunk.message.id is None:
609
689
  chunk.message.id = run_id
610
690
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
691
+ if self.output_version == "v1":
692
+ # Overwrite .content with .content_blocks
693
+ chunk.message = _update_message_content_to_blocks(
694
+ chunk.message, "v1"
695
+ )
611
696
  await run_manager.on_llm_new_token(
612
697
  cast("str", chunk.message.content), chunk=chunk
613
698
  )
614
699
  chunks.append(chunk)
615
- yield chunk.message
700
+ yield cast("AIMessageChunk", chunk.message)
701
+ yielded = True
702
+
703
+ # Yield a final empty chunk with chunk_position="last" if not yet yielded
704
+ if (
705
+ yielded
706
+ and isinstance(chunk.message, AIMessageChunk)
707
+ and not chunk.message.chunk_position
708
+ ):
709
+ empty_content: Union[str, list] = (
710
+ "" if isinstance(chunk.message.content, str) else []
711
+ )
712
+ msg_chunk = AIMessageChunk(
713
+ content=empty_content, chunk_position="last", id=run_id
714
+ )
715
+ await run_manager.on_llm_new_token(
716
+ "", chunk=ChatGenerationChunk(message=msg_chunk)
717
+ )
718
+ yield msg_chunk
616
719
  except BaseException as e:
617
720
  generations_with_error_metadata = _generate_response_from_error(e)
618
721
  chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -622,7 +725,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
622
725
  generations = [generations_with_error_metadata]
623
726
  await run_manager.on_llm_error(
624
727
  e,
625
- response=LLMResult(generations=generations), # type: ignore[arg-type]
728
+ response=LLMResult(generations=generations),
626
729
  )
627
730
  raise
628
731
 
@@ -653,6 +756,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
653
756
 
654
757
  Returns:
655
758
  List of ChatGeneration objects.
759
+
656
760
  """
657
761
  converted_generations = []
658
762
  for gen in cache_val:
@@ -666,6 +770,16 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
666
770
  converted_generations.append(chat_gen)
667
771
  else:
668
772
  # Already a ChatGeneration or other expected type
773
+ if hasattr(gen, "message") and isinstance(gen.message, AIMessage):
774
+ # We zero out cost on cache hits
775
+ gen.message = gen.message.model_copy(
776
+ update={
777
+ "usage_metadata": {
778
+ **(gen.message.usage_metadata or {}),
779
+ "total_cost": 0,
780
+ }
781
+ }
782
+ )
669
783
  converted_generations.append(gen)
670
784
  return converted_generations
671
785
 
@@ -768,7 +882,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
768
882
 
769
883
  Returns:
770
884
  An LLMResult, which contains a list of candidate Generations for each input
771
- prompt and additional model provider-specific output.
885
+ prompt and additional model provider-specific output.
886
+
772
887
  """
773
888
  ls_structured_output_format = kwargs.pop(
774
889
  "ls_structured_output_format", None
@@ -825,17 +940,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
825
940
  run_managers[i].on_llm_error(
826
941
  e,
827
942
  response=LLMResult(
828
- generations=[generations_with_error_metadata] # type: ignore[list-item]
943
+ generations=[generations_with_error_metadata]
829
944
  ),
830
945
  )
831
946
  raise
832
947
  flattened_outputs = [
833
- LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[list-item]
948
+ LLMResult(generations=[res.generations], llm_output=res.llm_output)
834
949
  for res in results
835
950
  ]
836
951
  llm_output = self._combine_llm_outputs([res.llm_output for res in results])
837
952
  generations = [res.generations for res in results]
838
- output = LLMResult(generations=generations, llm_output=llm_output) # type: ignore[arg-type]
953
+ output = LLMResult(generations=generations, llm_output=llm_output)
839
954
  if run_managers:
840
955
  run_infos = []
841
956
  for manager, flattened_output in zip(run_managers, flattened_outputs):
@@ -882,7 +997,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
882
997
 
883
998
  Returns:
884
999
  An LLMResult, which contains a list of candidate Generations for each input
885
- prompt and additional model provider-specific output.
1000
+ prompt and additional model provider-specific output.
1001
+
886
1002
  """
887
1003
  ls_structured_output_format = kwargs.pop(
888
1004
  "ls_structured_output_format", None
@@ -944,7 +1060,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
944
1060
  await run_managers[i].on_llm_error(
945
1061
  res,
946
1062
  response=LLMResult(
947
- generations=[generations_with_error_metadata] # type: ignore[list-item]
1063
+ generations=[generations_with_error_metadata]
948
1064
  ),
949
1065
  )
950
1066
  exceptions.append(res)
@@ -954,7 +1070,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
954
1070
  *[
955
1071
  run_manager.on_llm_end(
956
1072
  LLMResult(
957
- generations=[res.generations], # type: ignore[list-item, union-attr]
1073
+ generations=[res.generations], # type: ignore[union-attr]
958
1074
  llm_output=res.llm_output, # type: ignore[union-attr]
959
1075
  )
960
1076
  )
@@ -964,12 +1080,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
964
1080
  )
965
1081
  raise exceptions[0]
966
1082
  flattened_outputs = [
967
- LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[list-item, union-attr]
1083
+ LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[union-attr]
968
1084
  for res in results
969
1085
  ]
970
1086
  llm_output = self._combine_llm_outputs([res.llm_output for res in results]) # type: ignore[union-attr]
971
1087
  generations = [res.generations for res in results] # type: ignore[union-attr]
972
- output = LLMResult(generations=generations, llm_output=llm_output) # type: ignore[arg-type]
1088
+ output = LLMResult(generations=generations, llm_output=llm_output)
973
1089
  await asyncio.gather(
974
1090
  *[
975
1091
  run_manager.on_llm_end(flattened_output)
@@ -1048,15 +1164,43 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1048
1164
  **kwargs,
1049
1165
  ):
1050
1166
  chunks: list[ChatGenerationChunk] = []
1167
+ run_id: Optional[str] = (
1168
+ f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
1169
+ )
1170
+ yielded = False
1051
1171
  for chunk in self._stream(messages, stop=stop, **kwargs):
1052
1172
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
1173
+ if self.output_version == "v1":
1174
+ # Overwrite .content with .content_blocks
1175
+ chunk.message = _update_message_content_to_blocks(
1176
+ chunk.message, "v1"
1177
+ )
1053
1178
  if run_manager:
1054
1179
  if chunk.message.id is None:
1055
- chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
1180
+ chunk.message.id = run_id
1056
1181
  run_manager.on_llm_new_token(
1057
1182
  cast("str", chunk.message.content), chunk=chunk
1058
1183
  )
1059
1184
  chunks.append(chunk)
1185
+ yielded = True
1186
+
1187
+ # Yield a final empty chunk with chunk_position="last" if not yet yielded
1188
+ if (
1189
+ yielded
1190
+ and isinstance(chunk.message, AIMessageChunk)
1191
+ and not chunk.message.chunk_position
1192
+ ):
1193
+ empty_content: Union[str, list] = (
1194
+ "" if isinstance(chunk.message.content, str) else []
1195
+ )
1196
+ chunk = ChatGenerationChunk(
1197
+ message=AIMessageChunk(
1198
+ content=empty_content, chunk_position="last", id=run_id
1199
+ )
1200
+ )
1201
+ if run_manager:
1202
+ run_manager.on_llm_new_token("", chunk=chunk)
1203
+ chunks.append(chunk)
1060
1204
  result = generate_from_stream(iter(chunks))
1061
1205
  elif inspect.signature(self._generate).parameters.get("run_manager"):
1062
1206
  result = self._generate(
@@ -1065,10 +1209,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1065
1209
  else:
1066
1210
  result = self._generate(messages, stop=stop, **kwargs)
1067
1211
 
1212
+ if self.output_version == "v1":
1213
+ # Overwrite .content with .content_blocks
1214
+ for generation in result.generations:
1215
+ generation.message = _update_message_content_to_blocks(
1216
+ generation.message, "v1"
1217
+ )
1218
+
1068
1219
  # Add response metadata to each generation
1069
1220
  for idx, generation in enumerate(result.generations):
1070
1221
  if run_manager and generation.message.id is None:
1071
- generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1222
+ generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1072
1223
  generation.message.response_metadata = _gen_info_and_msg_metadata(
1073
1224
  generation
1074
1225
  )
@@ -1121,15 +1272,43 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1121
1272
  **kwargs,
1122
1273
  ):
1123
1274
  chunks: list[ChatGenerationChunk] = []
1275
+ run_id: Optional[str] = (
1276
+ f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
1277
+ )
1278
+ yielded = False
1124
1279
  async for chunk in self._astream(messages, stop=stop, **kwargs):
1125
1280
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
1281
+ if self.output_version == "v1":
1282
+ # Overwrite .content with .content_blocks
1283
+ chunk.message = _update_message_content_to_blocks(
1284
+ chunk.message, "v1"
1285
+ )
1126
1286
  if run_manager:
1127
1287
  if chunk.message.id is None:
1128
- chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
1288
+ chunk.message.id = run_id
1129
1289
  await run_manager.on_llm_new_token(
1130
1290
  cast("str", chunk.message.content), chunk=chunk
1131
1291
  )
1132
1292
  chunks.append(chunk)
1293
+ yielded = True
1294
+
1295
+ # Yield a final empty chunk with chunk_position="last" if not yet yielded
1296
+ if (
1297
+ yielded
1298
+ and isinstance(chunk.message, AIMessageChunk)
1299
+ and not chunk.message.chunk_position
1300
+ ):
1301
+ empty_content: Union[str, list] = (
1302
+ "" if isinstance(chunk.message.content, str) else []
1303
+ )
1304
+ chunk = ChatGenerationChunk(
1305
+ message=AIMessageChunk(
1306
+ content=empty_content, chunk_position="last", id=run_id
1307
+ )
1308
+ )
1309
+ if run_manager:
1310
+ await run_manager.on_llm_new_token("", chunk=chunk)
1311
+ chunks.append(chunk)
1133
1312
  result = generate_from_stream(iter(chunks))
1134
1313
  elif inspect.signature(self._agenerate).parameters.get("run_manager"):
1135
1314
  result = await self._agenerate(
@@ -1138,10 +1317,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1138
1317
  else:
1139
1318
  result = await self._agenerate(messages, stop=stop, **kwargs)
1140
1319
 
1320
+ if self.output_version == "v1":
1321
+ # Overwrite .content with .content_blocks
1322
+ for generation in result.generations:
1323
+ generation.message = _update_message_content_to_blocks(
1324
+ generation.message, "v1"
1325
+ )
1326
+
1141
1327
  # Add response metadata to each generation
1142
1328
  for idx, generation in enumerate(result.generations):
1143
1329
  if run_manager and generation.message.id is None:
1144
- generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1330
+ generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1145
1331
  generation.message.response_metadata = _gen_info_and_msg_metadata(
1146
1332
  generation
1147
1333
  )
@@ -1238,6 +1424,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1238
1424
 
1239
1425
  Returns:
1240
1426
  The model output message.
1427
+
1241
1428
  """
1242
1429
  generation = self.generate(
1243
1430
  [messages], stop=stop, callbacks=callbacks, **kwargs
@@ -1278,6 +1465,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1278
1465
 
1279
1466
  Returns:
1280
1467
  The model output string.
1468
+
1281
1469
  """
1282
1470
  return self.predict(message, stop=stop, **kwargs)
1283
1471
 
@@ -1297,6 +1485,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1297
1485
 
1298
1486
  Returns:
1299
1487
  The predicted output string.
1488
+
1300
1489
  """
1301
1490
  stop_ = None if stop is None else list(stop)
1302
1491
  result = self([HumanMessage(content=text)], stop=stop_, **kwargs)
@@ -1363,7 +1552,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1363
1552
  *,
1364
1553
  tool_choice: Optional[Union[str]] = None,
1365
1554
  **kwargs: Any,
1366
- ) -> Runnable[LanguageModelInput, BaseMessage]:
1555
+ ) -> Runnable[LanguageModelInput, AIMessage]:
1367
1556
  """Bind tools to the model.
1368
1557
 
1369
1558
  Args:
@@ -1372,6 +1561,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1372
1561
 
1373
1562
  Returns:
1374
1563
  A Runnable that returns a message.
1564
+
1375
1565
  """
1376
1566
  raise NotImplementedError
1377
1567
 
@@ -1534,8 +1724,10 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1534
1724
  class SimpleChatModel(BaseChatModel):
1535
1725
  """Simplified implementation for a chat model to inherit from.
1536
1726
 
1537
- **Note** This implementation is primarily here for backwards compatibility.
1538
- For new implementations, please use `BaseChatModel` directly.
1727
+ .. note::
1728
+ This implementation is primarily here for backwards compatibility. For new
1729
+ implementations, please use ``BaseChatModel`` directly.
1730
+
1539
1731
  """
1540
1732
 
1541
1733
  def _generate(