langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. langchain_core/_api/beta_decorator.py +2 -2
  2. langchain_core/_api/deprecation.py +1 -1
  3. langchain_core/beta/runnables/context.py +1 -1
  4. langchain_core/callbacks/base.py +14 -23
  5. langchain_core/callbacks/file.py +13 -2
  6. langchain_core/callbacks/manager.py +74 -157
  7. langchain_core/callbacks/streaming_stdout.py +3 -4
  8. langchain_core/callbacks/usage.py +2 -12
  9. langchain_core/chat_history.py +6 -6
  10. langchain_core/documents/base.py +1 -1
  11. langchain_core/documents/compressor.py +9 -6
  12. langchain_core/indexing/base.py +2 -2
  13. langchain_core/language_models/_utils.py +232 -101
  14. langchain_core/language_models/base.py +35 -23
  15. langchain_core/language_models/chat_models.py +248 -54
  16. langchain_core/language_models/fake_chat_models.py +28 -81
  17. langchain_core/load/dump.py +3 -4
  18. langchain_core/messages/__init__.py +30 -24
  19. langchain_core/messages/ai.py +188 -30
  20. langchain_core/messages/base.py +164 -25
  21. langchain_core/messages/block_translators/__init__.py +89 -0
  22. langchain_core/messages/block_translators/anthropic.py +451 -0
  23. langchain_core/messages/block_translators/bedrock.py +45 -0
  24. langchain_core/messages/block_translators/bedrock_converse.py +47 -0
  25. langchain_core/messages/block_translators/google_genai.py +45 -0
  26. langchain_core/messages/block_translators/google_vertexai.py +47 -0
  27. langchain_core/messages/block_translators/groq.py +45 -0
  28. langchain_core/messages/block_translators/langchain_v0.py +164 -0
  29. langchain_core/messages/block_translators/ollama.py +45 -0
  30. langchain_core/messages/block_translators/openai.py +798 -0
  31. langchain_core/messages/{content_blocks.py → content.py} +303 -278
  32. langchain_core/messages/human.py +29 -9
  33. langchain_core/messages/system.py +29 -9
  34. langchain_core/messages/tool.py +94 -13
  35. langchain_core/messages/utils.py +34 -234
  36. langchain_core/output_parsers/base.py +14 -50
  37. langchain_core/output_parsers/json.py +2 -5
  38. langchain_core/output_parsers/list.py +2 -7
  39. langchain_core/output_parsers/openai_functions.py +5 -28
  40. langchain_core/output_parsers/openai_tools.py +49 -90
  41. langchain_core/output_parsers/pydantic.py +2 -3
  42. langchain_core/output_parsers/transform.py +12 -53
  43. langchain_core/output_parsers/xml.py +9 -17
  44. langchain_core/prompt_values.py +8 -112
  45. langchain_core/prompts/chat.py +1 -3
  46. langchain_core/runnables/base.py +500 -451
  47. langchain_core/runnables/branch.py +1 -1
  48. langchain_core/runnables/fallbacks.py +4 -4
  49. langchain_core/runnables/history.py +1 -1
  50. langchain_core/runnables/passthrough.py +3 -3
  51. langchain_core/runnables/retry.py +1 -1
  52. langchain_core/runnables/router.py +1 -1
  53. langchain_core/structured_query.py +3 -7
  54. langchain_core/tools/base.py +14 -41
  55. langchain_core/tools/convert.py +2 -22
  56. langchain_core/tools/retriever.py +1 -8
  57. langchain_core/tools/structured.py +2 -10
  58. langchain_core/tracers/_streaming.py +6 -7
  59. langchain_core/tracers/base.py +7 -14
  60. langchain_core/tracers/core.py +4 -27
  61. langchain_core/tracers/event_stream.py +4 -15
  62. langchain_core/tracers/langchain.py +3 -14
  63. langchain_core/tracers/log_stream.py +2 -3
  64. langchain_core/utils/_merge.py +45 -7
  65. langchain_core/utils/function_calling.py +22 -9
  66. langchain_core/utils/utils.py +29 -0
  67. langchain_core/version.py +1 -1
  68. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/METADATA +7 -9
  69. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/RECORD +71 -64
  70. langchain_core/v1/__init__.py +0 -1
  71. langchain_core/v1/chat_models.py +0 -1047
  72. langchain_core/v1/messages.py +0 -755
  73. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/WHEEL +0 -0
  74. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/entry_points.txt +0 -0
@@ -27,7 +27,10 @@ from langchain_core.callbacks import (
27
27
  Callbacks,
28
28
  )
29
29
  from langchain_core.globals import get_llm_cache
30
- from langchain_core.language_models._utils import _normalize_messages
30
+ from langchain_core.language_models._utils import (
31
+ _normalize_messages,
32
+ _update_message_content_to_blocks,
33
+ )
31
34
  from langchain_core.language_models.base import (
32
35
  BaseLanguageModel,
33
36
  LangSmithParams,
@@ -36,16 +39,18 @@ from langchain_core.language_models.base import (
36
39
  from langchain_core.load import dumpd, dumps
37
40
  from langchain_core.messages import (
38
41
  AIMessage,
42
+ AIMessageChunk,
39
43
  AnyMessage,
40
44
  BaseMessage,
41
- BaseMessageChunk,
42
45
  HumanMessage,
43
46
  convert_to_messages,
44
- convert_to_openai_image_block,
45
47
  is_data_content_block,
46
48
  message_chunk_to_message,
47
49
  )
48
- from langchain_core.messages.ai import _LC_ID_PREFIX
50
+ from langchain_core.messages.block_translators.openai import (
51
+ convert_to_openai_data_block,
52
+ convert_to_openai_image_block,
53
+ )
49
54
  from langchain_core.outputs import (
50
55
  ChatGeneration,
51
56
  ChatGenerationChunk,
@@ -65,6 +70,7 @@ from langchain_core.utils.function_calling import (
65
70
  convert_to_openai_tool,
66
71
  )
67
72
  from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
73
+ from langchain_core.utils.utils import LC_ID_PREFIX, from_env
68
74
 
69
75
  if TYPE_CHECKING:
70
76
  import uuid
@@ -78,6 +84,11 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
78
84
  if hasattr(error, "response"):
79
85
  response = error.response
80
86
  metadata: dict = {}
87
+ if hasattr(response, "json"):
88
+ try:
89
+ metadata["body"] = response.json()
90
+ except Exception:
91
+ metadata["body"] = getattr(response, "text", None)
81
92
  if hasattr(response, "headers"):
82
93
  try:
83
94
  metadata["headers"] = dict(response.headers)
@@ -97,17 +108,18 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
97
108
 
98
109
 
99
110
  def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
100
- """Format messages for tracing in on_chat_model_start.
111
+ """Format messages for tracing in ``on_chat_model_start``.
101
112
 
102
113
  - Update image content blocks to OpenAI Chat Completions format (backward
103
114
  compatibility).
104
- - Add "type" key to content blocks that have a single key.
115
+ - Add ``type`` key to content blocks that have a single key.
105
116
 
106
117
  Args:
107
118
  messages: List of messages to format.
108
119
 
109
120
  Returns:
110
121
  List of messages formatted for tracing.
122
+
111
123
  """
112
124
  messages_to_trace = []
113
125
  for message in messages:
@@ -119,7 +131,7 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
119
131
  if (
120
132
  block.get("type") == "image"
121
133
  and is_data_content_block(block)
122
- and block.get("source_type") != "id"
134
+ and not ("file_id" in block or block.get("source_type") == "id")
123
135
  ):
124
136
  if message_to_trace is message:
125
137
  # Shallow copy
@@ -129,6 +141,19 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
129
141
  message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
130
142
  convert_to_openai_image_block(block)
131
143
  )
144
+ elif (
145
+ block.get("type") == "file"
146
+ and is_data_content_block(block)
147
+ and "base64" in block
148
+ ):
149
+ if message_to_trace is message:
150
+ # Shallow copy
151
+ message_to_trace = message.model_copy()
152
+ message_to_trace.content = list(message_to_trace.content)
153
+
154
+ message_to_trace.content[idx] = convert_to_openai_data_block( # type: ignore[index]
155
+ block
156
+ )
132
157
  elif len(block) == 1 and "type" not in block:
133
158
  # Tracing assumes all content blocks have a "type" key. Here
134
159
  # we add this key if it is missing, and there's an obvious
@@ -153,10 +178,11 @@ def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
153
178
  """Generate from a stream.
154
179
 
155
180
  Args:
156
- stream: Iterator of ChatGenerationChunk.
181
+ stream: Iterator of ``ChatGenerationChunk``.
157
182
 
158
183
  Returns:
159
184
  ChatResult: Chat result.
185
+
160
186
  """
161
187
  generation = next(stream, None)
162
188
  if generation:
@@ -180,10 +206,11 @@ async def agenerate_from_stream(
180
206
  """Async generate from a stream.
181
207
 
182
208
  Args:
183
- stream: Iterator of ChatGenerationChunk.
209
+ stream: Iterator of ``ChatGenerationChunk``.
184
210
 
185
211
  Returns:
186
212
  ChatResult: Chat result.
213
+
187
214
  """
188
215
  chunks = [chunk async for chunk in stream]
189
216
  return await run_in_executor(None, generate_from_stream, iter(chunks))
@@ -208,7 +235,7 @@ def _format_ls_structured_output(ls_structured_output_format: Optional[dict]) ->
208
235
  return ls_structured_output_format_dict
209
236
 
210
237
 
211
- class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
238
+ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
212
239
  """Base class for chat models.
213
240
 
214
241
  Key imperative methods:
@@ -311,15 +338,38 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
311
338
  provided. This offers the best of both worlds.
312
339
  - If False (default), will always use streaming case if available.
313
340
 
314
- The main reason for this flag is that code might be written using ``.stream()`` and
341
+ The main reason for this flag is that code might be written using ``stream()`` and
315
342
  a user may want to swap out a given model for another model whose the implementation
316
343
  does not properly support streaming.
344
+
345
+ """
346
+
347
+ output_version: Optional[str] = Field(
348
+ default_factory=from_env("LC_OUTPUT_VERSION", default=None)
349
+ )
350
+ """Version of ``AIMessage`` output format to store in message content.
351
+
352
+ ``AIMessage.content_blocks`` will lazily parse the contents of ``content`` into a
353
+ standard format. This flag can be used to additionally store the standard format
354
+ in message content, e.g., for serialization purposes.
355
+
356
+ Supported values:
357
+
358
+ - ``"v0"``: provider-specific format in content (can lazily-parse with
359
+ ``.content_blocks``)
360
+ - ``"v1"``: standardized format in content (consistent with ``.content_blocks``)
361
+
362
+ Partner packages (e.g., ``langchain-openai``) can also use this field to roll out
363
+ new content formats in a backward-compatible way.
364
+
365
+ .. versionadded:: 1.0
366
+
317
367
  """
318
368
 
319
369
  @model_validator(mode="before")
320
370
  @classmethod
321
371
  def raise_deprecation(cls, values: dict) -> Any:
322
- """Raise deprecation warning if callback_manager is used.
372
+ """Raise deprecation warning if ``callback_manager`` is used.
323
373
 
324
374
  Args:
325
375
  values (Dict): Values to validate.
@@ -328,7 +378,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
328
378
  Dict: Validated values.
329
379
 
330
380
  Raises:
331
- DeprecationWarning: If callback_manager is used.
381
+ DeprecationWarning: If ``callback_manager`` is used.
382
+
332
383
  """
333
384
  if values.get("callback_manager") is not None:
334
385
  warnings.warn(
@@ -376,21 +427,24 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
376
427
  *,
377
428
  stop: Optional[list[str]] = None,
378
429
  **kwargs: Any,
379
- ) -> BaseMessage:
430
+ ) -> AIMessage:
380
431
  config = ensure_config(config)
381
432
  return cast(
382
- "ChatGeneration",
383
- self.generate_prompt(
384
- [self._convert_input(input)],
385
- stop=stop,
386
- callbacks=config.get("callbacks"),
387
- tags=config.get("tags"),
388
- metadata=config.get("metadata"),
389
- run_name=config.get("run_name"),
390
- run_id=config.pop("run_id", None),
391
- **kwargs,
392
- ).generations[0][0],
393
- ).message
433
+ "AIMessage",
434
+ cast(
435
+ "ChatGeneration",
436
+ self.generate_prompt(
437
+ [self._convert_input(input)],
438
+ stop=stop,
439
+ callbacks=config.get("callbacks"),
440
+ tags=config.get("tags"),
441
+ metadata=config.get("metadata"),
442
+ run_name=config.get("run_name"),
443
+ run_id=config.pop("run_id", None),
444
+ **kwargs,
445
+ ).generations[0][0],
446
+ ).message,
447
+ )
394
448
 
395
449
  @override
396
450
  async def ainvoke(
@@ -400,7 +454,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
400
454
  *,
401
455
  stop: Optional[list[str]] = None,
402
456
  **kwargs: Any,
403
- ) -> BaseMessage:
457
+ ) -> AIMessage:
404
458
  config = ensure_config(config)
405
459
  llm_result = await self.agenerate_prompt(
406
460
  [self._convert_input(input)],
@@ -412,7 +466,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
412
466
  run_id=config.pop("run_id", None),
413
467
  **kwargs,
414
468
  )
415
- return cast("ChatGeneration", llm_result.generations[0][0]).message
469
+ return cast(
470
+ "AIMessage", cast("ChatGeneration", llm_result.generations[0][0]).message
471
+ )
416
472
 
417
473
  def _should_stream(
418
474
  self,
@@ -457,11 +513,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
457
513
  *,
458
514
  stop: Optional[list[str]] = None,
459
515
  **kwargs: Any,
460
- ) -> Iterator[BaseMessageChunk]:
516
+ ) -> Iterator[AIMessageChunk]:
461
517
  if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):
462
518
  # model doesn't implement streaming, so use default implementation
463
519
  yield cast(
464
- "BaseMessageChunk",
520
+ "AIMessageChunk",
465
521
  self.invoke(input, config=config, stop=stop, **kwargs),
466
522
  )
467
523
  else:
@@ -506,16 +562,41 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
506
562
 
507
563
  try:
508
564
  input_messages = _normalize_messages(messages)
509
- run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
565
+ run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
566
+ yielded = False
510
567
  for chunk in self._stream(input_messages, stop=stop, **kwargs):
511
568
  if chunk.message.id is None:
512
569
  chunk.message.id = run_id
513
570
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
571
+ if self.output_version == "v1":
572
+ # Overwrite .content with .content_blocks
573
+ chunk.message = _update_message_content_to_blocks(
574
+ chunk.message, "v1"
575
+ )
514
576
  run_manager.on_llm_new_token(
515
577
  cast("str", chunk.message.content), chunk=chunk
516
578
  )
517
579
  chunks.append(chunk)
518
- yield chunk.message
580
+ yield cast("AIMessageChunk", chunk.message)
581
+ yielded = True
582
+
583
+ # Yield a final empty chunk with chunk_position="last" if not yet
584
+ # yielded
585
+ if (
586
+ yielded
587
+ and isinstance(chunk.message, AIMessageChunk)
588
+ and not chunk.message.chunk_position
589
+ ):
590
+ empty_content: Union[str, list] = (
591
+ "" if isinstance(chunk.message.content, str) else []
592
+ )
593
+ msg_chunk = AIMessageChunk(
594
+ content=empty_content, chunk_position="last", id=run_id
595
+ )
596
+ run_manager.on_llm_new_token(
597
+ "", chunk=ChatGenerationChunk(message=msg_chunk)
598
+ )
599
+ yield msg_chunk
519
600
  except BaseException as e:
520
601
  generations_with_error_metadata = _generate_response_from_error(e)
521
602
  chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -528,7 +609,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
528
609
  generations = [generations_with_error_metadata]
529
610
  run_manager.on_llm_error(
530
611
  e,
531
- response=LLMResult(generations=generations), # type: ignore[arg-type]
612
+ response=LLMResult(generations=generations),
532
613
  )
533
614
  raise
534
615
 
@@ -548,11 +629,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
548
629
  *,
549
630
  stop: Optional[list[str]] = None,
550
631
  **kwargs: Any,
551
- ) -> AsyncIterator[BaseMessageChunk]:
632
+ ) -> AsyncIterator[AIMessageChunk]:
552
633
  if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):
553
634
  # No async or sync stream is implemented, so fall back to ainvoke
554
635
  yield cast(
555
- "BaseMessageChunk",
636
+ "AIMessageChunk",
556
637
  await self.ainvoke(input, config=config, stop=stop, **kwargs),
557
638
  )
558
639
  return
@@ -599,7 +680,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
599
680
 
600
681
  try:
601
682
  input_messages = _normalize_messages(messages)
602
- run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
683
+ run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
684
+ yielded = False
603
685
  async for chunk in self._astream(
604
686
  input_messages,
605
687
  stop=stop,
@@ -608,11 +690,34 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
608
690
  if chunk.message.id is None:
609
691
  chunk.message.id = run_id
610
692
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
693
+ if self.output_version == "v1":
694
+ # Overwrite .content with .content_blocks
695
+ chunk.message = _update_message_content_to_blocks(
696
+ chunk.message, "v1"
697
+ )
611
698
  await run_manager.on_llm_new_token(
612
699
  cast("str", chunk.message.content), chunk=chunk
613
700
  )
614
701
  chunks.append(chunk)
615
- yield chunk.message
702
+ yield cast("AIMessageChunk", chunk.message)
703
+ yielded = True
704
+
705
+ # Yield a final empty chunk with chunk_position="last" if not yet yielded
706
+ if (
707
+ yielded
708
+ and isinstance(chunk.message, AIMessageChunk)
709
+ and not chunk.message.chunk_position
710
+ ):
711
+ empty_content: Union[str, list] = (
712
+ "" if isinstance(chunk.message.content, str) else []
713
+ )
714
+ msg_chunk = AIMessageChunk(
715
+ content=empty_content, chunk_position="last", id=run_id
716
+ )
717
+ await run_manager.on_llm_new_token(
718
+ "", chunk=ChatGenerationChunk(message=msg_chunk)
719
+ )
720
+ yield msg_chunk
616
721
  except BaseException as e:
617
722
  generations_with_error_metadata = _generate_response_from_error(e)
618
723
  chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -622,7 +727,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
622
727
  generations = [generations_with_error_metadata]
623
728
  await run_manager.on_llm_error(
624
729
  e,
625
- response=LLMResult(generations=generations), # type: ignore[arg-type]
730
+ response=LLMResult(generations=generations),
626
731
  )
627
732
  raise
628
733
 
@@ -653,6 +758,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
653
758
 
654
759
  Returns:
655
760
  List of ChatGeneration objects.
761
+
656
762
  """
657
763
  converted_generations = []
658
764
  for gen in cache_val:
@@ -666,6 +772,16 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
666
772
  converted_generations.append(chat_gen)
667
773
  else:
668
774
  # Already a ChatGeneration or other expected type
775
+ if hasattr(gen, "message") and isinstance(gen.message, AIMessage):
776
+ # We zero out cost on cache hits
777
+ gen.message = gen.message.model_copy(
778
+ update={
779
+ "usage_metadata": {
780
+ **(gen.message.usage_metadata or {}),
781
+ "total_cost": 0,
782
+ }
783
+ }
784
+ )
669
785
  converted_generations.append(gen)
670
786
  return converted_generations
671
787
 
@@ -768,7 +884,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
768
884
 
769
885
  Returns:
770
886
  An LLMResult, which contains a list of candidate Generations for each input
771
- prompt and additional model provider-specific output.
887
+ prompt and additional model provider-specific output.
888
+
772
889
  """
773
890
  ls_structured_output_format = kwargs.pop(
774
891
  "ls_structured_output_format", None
@@ -825,17 +942,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
825
942
  run_managers[i].on_llm_error(
826
943
  e,
827
944
  response=LLMResult(
828
- generations=[generations_with_error_metadata] # type: ignore[list-item]
945
+ generations=[generations_with_error_metadata]
829
946
  ),
830
947
  )
831
948
  raise
832
949
  flattened_outputs = [
833
- LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[list-item]
950
+ LLMResult(generations=[res.generations], llm_output=res.llm_output)
834
951
  for res in results
835
952
  ]
836
953
  llm_output = self._combine_llm_outputs([res.llm_output for res in results])
837
954
  generations = [res.generations for res in results]
838
- output = LLMResult(generations=generations, llm_output=llm_output) # type: ignore[arg-type]
955
+ output = LLMResult(generations=generations, llm_output=llm_output)
839
956
  if run_managers:
840
957
  run_infos = []
841
958
  for manager, flattened_output in zip(run_managers, flattened_outputs):
@@ -882,7 +999,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
882
999
 
883
1000
  Returns:
884
1001
  An LLMResult, which contains a list of candidate Generations for each input
885
- prompt and additional model provider-specific output.
1002
+ prompt and additional model provider-specific output.
1003
+
886
1004
  """
887
1005
  ls_structured_output_format = kwargs.pop(
888
1006
  "ls_structured_output_format", None
@@ -944,7 +1062,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
944
1062
  await run_managers[i].on_llm_error(
945
1063
  res,
946
1064
  response=LLMResult(
947
- generations=[generations_with_error_metadata] # type: ignore[list-item]
1065
+ generations=[generations_with_error_metadata]
948
1066
  ),
949
1067
  )
950
1068
  exceptions.append(res)
@@ -954,7 +1072,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
954
1072
  *[
955
1073
  run_manager.on_llm_end(
956
1074
  LLMResult(
957
- generations=[res.generations], # type: ignore[list-item, union-attr]
1075
+ generations=[res.generations], # type: ignore[union-attr]
958
1076
  llm_output=res.llm_output, # type: ignore[union-attr]
959
1077
  )
960
1078
  )
@@ -964,12 +1082,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
964
1082
  )
965
1083
  raise exceptions[0]
966
1084
  flattened_outputs = [
967
- LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[list-item, union-attr]
1085
+ LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[union-attr]
968
1086
  for res in results
969
1087
  ]
970
1088
  llm_output = self._combine_llm_outputs([res.llm_output for res in results]) # type: ignore[union-attr]
971
1089
  generations = [res.generations for res in results] # type: ignore[union-attr]
972
- output = LLMResult(generations=generations, llm_output=llm_output) # type: ignore[arg-type]
1090
+ output = LLMResult(generations=generations, llm_output=llm_output)
973
1091
  await asyncio.gather(
974
1092
  *[
975
1093
  run_manager.on_llm_end(flattened_output)
@@ -1048,15 +1166,43 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1048
1166
  **kwargs,
1049
1167
  ):
1050
1168
  chunks: list[ChatGenerationChunk] = []
1169
+ run_id: Optional[str] = (
1170
+ f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
1171
+ )
1172
+ yielded = False
1051
1173
  for chunk in self._stream(messages, stop=stop, **kwargs):
1052
1174
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
1175
+ if self.output_version == "v1":
1176
+ # Overwrite .content with .content_blocks
1177
+ chunk.message = _update_message_content_to_blocks(
1178
+ chunk.message, "v1"
1179
+ )
1053
1180
  if run_manager:
1054
1181
  if chunk.message.id is None:
1055
- chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
1182
+ chunk.message.id = run_id
1056
1183
  run_manager.on_llm_new_token(
1057
1184
  cast("str", chunk.message.content), chunk=chunk
1058
1185
  )
1059
1186
  chunks.append(chunk)
1187
+ yielded = True
1188
+
1189
+ # Yield a final empty chunk with chunk_position="last" if not yet yielded
1190
+ if (
1191
+ yielded
1192
+ and isinstance(chunk.message, AIMessageChunk)
1193
+ and not chunk.message.chunk_position
1194
+ ):
1195
+ empty_content: Union[str, list] = (
1196
+ "" if isinstance(chunk.message.content, str) else []
1197
+ )
1198
+ chunk = ChatGenerationChunk(
1199
+ message=AIMessageChunk(
1200
+ content=empty_content, chunk_position="last", id=run_id
1201
+ )
1202
+ )
1203
+ if run_manager:
1204
+ run_manager.on_llm_new_token("", chunk=chunk)
1205
+ chunks.append(chunk)
1060
1206
  result = generate_from_stream(iter(chunks))
1061
1207
  elif inspect.signature(self._generate).parameters.get("run_manager"):
1062
1208
  result = self._generate(
@@ -1065,10 +1211,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1065
1211
  else:
1066
1212
  result = self._generate(messages, stop=stop, **kwargs)
1067
1213
 
1214
+ if self.output_version == "v1":
1215
+ # Overwrite .content with .content_blocks
1216
+ for generation in result.generations:
1217
+ generation.message = _update_message_content_to_blocks(
1218
+ generation.message, "v1"
1219
+ )
1220
+
1068
1221
  # Add response metadata to each generation
1069
1222
  for idx, generation in enumerate(result.generations):
1070
1223
  if run_manager and generation.message.id is None:
1071
- generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1224
+ generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1072
1225
  generation.message.response_metadata = _gen_info_and_msg_metadata(
1073
1226
  generation
1074
1227
  )
@@ -1121,15 +1274,43 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1121
1274
  **kwargs,
1122
1275
  ):
1123
1276
  chunks: list[ChatGenerationChunk] = []
1277
+ run_id: Optional[str] = (
1278
+ f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
1279
+ )
1280
+ yielded = False
1124
1281
  async for chunk in self._astream(messages, stop=stop, **kwargs):
1125
1282
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
1283
+ if self.output_version == "v1":
1284
+ # Overwrite .content with .content_blocks
1285
+ chunk.message = _update_message_content_to_blocks(
1286
+ chunk.message, "v1"
1287
+ )
1126
1288
  if run_manager:
1127
1289
  if chunk.message.id is None:
1128
- chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
1290
+ chunk.message.id = run_id
1129
1291
  await run_manager.on_llm_new_token(
1130
1292
  cast("str", chunk.message.content), chunk=chunk
1131
1293
  )
1132
1294
  chunks.append(chunk)
1295
+ yielded = True
1296
+
1297
+ # Yield a final empty chunk with chunk_position="last" if not yet yielded
1298
+ if (
1299
+ yielded
1300
+ and isinstance(chunk.message, AIMessageChunk)
1301
+ and not chunk.message.chunk_position
1302
+ ):
1303
+ empty_content: Union[str, list] = (
1304
+ "" if isinstance(chunk.message.content, str) else []
1305
+ )
1306
+ chunk = ChatGenerationChunk(
1307
+ message=AIMessageChunk(
1308
+ content=empty_content, chunk_position="last", id=run_id
1309
+ )
1310
+ )
1311
+ if run_manager:
1312
+ await run_manager.on_llm_new_token("", chunk=chunk)
1313
+ chunks.append(chunk)
1133
1314
  result = generate_from_stream(iter(chunks))
1134
1315
  elif inspect.signature(self._agenerate).parameters.get("run_manager"):
1135
1316
  result = await self._agenerate(
@@ -1138,10 +1319,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1138
1319
  else:
1139
1320
  result = await self._agenerate(messages, stop=stop, **kwargs)
1140
1321
 
1322
+ if self.output_version == "v1":
1323
+ # Overwrite .content with .content_blocks
1324
+ for generation in result.generations:
1325
+ generation.message = _update_message_content_to_blocks(
1326
+ generation.message, "v1"
1327
+ )
1328
+
1141
1329
  # Add response metadata to each generation
1142
1330
  for idx, generation in enumerate(result.generations):
1143
1331
  if run_manager and generation.message.id is None:
1144
- generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1332
+ generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1145
1333
  generation.message.response_metadata = _gen_info_and_msg_metadata(
1146
1334
  generation
1147
1335
  )
@@ -1238,6 +1426,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1238
1426
 
1239
1427
  Returns:
1240
1428
  The model output message.
1429
+
1241
1430
  """
1242
1431
  generation = self.generate(
1243
1432
  [messages], stop=stop, callbacks=callbacks, **kwargs
@@ -1278,6 +1467,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1278
1467
 
1279
1468
  Returns:
1280
1469
  The model output string.
1470
+
1281
1471
  """
1282
1472
  return self.predict(message, stop=stop, **kwargs)
1283
1473
 
@@ -1297,6 +1487,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1297
1487
 
1298
1488
  Returns:
1299
1489
  The predicted output string.
1490
+
1300
1491
  """
1301
1492
  stop_ = None if stop is None else list(stop)
1302
1493
  result = self([HumanMessage(content=text)], stop=stop_, **kwargs)
@@ -1363,7 +1554,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1363
1554
  *,
1364
1555
  tool_choice: Optional[Union[str]] = None,
1365
1556
  **kwargs: Any,
1366
- ) -> Runnable[LanguageModelInput, BaseMessage]:
1557
+ ) -> Runnable[LanguageModelInput, AIMessage]:
1367
1558
  """Bind tools to the model.
1368
1559
 
1369
1560
  Args:
@@ -1372,6 +1563,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1372
1563
 
1373
1564
  Returns:
1374
1565
  A Runnable that returns a message.
1566
+
1375
1567
  """
1376
1568
  raise NotImplementedError
1377
1569
 
@@ -1534,8 +1726,10 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1534
1726
  class SimpleChatModel(BaseChatModel):
1535
1727
  """Simplified implementation for a chat model to inherit from.
1536
1728
 
1537
- **Note** This implementation is primarily here for backwards compatibility.
1538
- For new implementations, please use `BaseChatModel` directly.
1729
+ .. note::
1730
+ This implementation is primarily here for backwards compatibility. For new
1731
+ implementations, please use ``BaseChatModel`` directly.
1732
+
1539
1733
  """
1540
1734
 
1541
1735
  def _generate(