langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. langchain_core/_api/beta_decorator.py +2 -2
  2. langchain_core/_api/deprecation.py +1 -1
  3. langchain_core/beta/runnables/context.py +1 -1
  4. langchain_core/callbacks/base.py +14 -23
  5. langchain_core/callbacks/file.py +13 -2
  6. langchain_core/callbacks/manager.py +74 -157
  7. langchain_core/callbacks/streaming_stdout.py +3 -4
  8. langchain_core/callbacks/usage.py +2 -12
  9. langchain_core/chat_history.py +6 -6
  10. langchain_core/documents/base.py +1 -1
  11. langchain_core/documents/compressor.py +9 -6
  12. langchain_core/indexing/base.py +2 -2
  13. langchain_core/language_models/_utils.py +232 -101
  14. langchain_core/language_models/base.py +35 -23
  15. langchain_core/language_models/chat_models.py +248 -54
  16. langchain_core/language_models/fake_chat_models.py +28 -81
  17. langchain_core/load/dump.py +3 -4
  18. langchain_core/messages/__init__.py +30 -24
  19. langchain_core/messages/ai.py +188 -30
  20. langchain_core/messages/base.py +164 -25
  21. langchain_core/messages/block_translators/__init__.py +89 -0
  22. langchain_core/messages/block_translators/anthropic.py +451 -0
  23. langchain_core/messages/block_translators/bedrock.py +45 -0
  24. langchain_core/messages/block_translators/bedrock_converse.py +47 -0
  25. langchain_core/messages/block_translators/google_genai.py +45 -0
  26. langchain_core/messages/block_translators/google_vertexai.py +47 -0
  27. langchain_core/messages/block_translators/groq.py +45 -0
  28. langchain_core/messages/block_translators/langchain_v0.py +164 -0
  29. langchain_core/messages/block_translators/ollama.py +45 -0
  30. langchain_core/messages/block_translators/openai.py +798 -0
  31. langchain_core/messages/{content_blocks.py → content.py} +303 -278
  32. langchain_core/messages/human.py +29 -9
  33. langchain_core/messages/system.py +29 -9
  34. langchain_core/messages/tool.py +94 -13
  35. langchain_core/messages/utils.py +34 -234
  36. langchain_core/output_parsers/base.py +14 -50
  37. langchain_core/output_parsers/json.py +2 -5
  38. langchain_core/output_parsers/list.py +2 -7
  39. langchain_core/output_parsers/openai_functions.py +5 -28
  40. langchain_core/output_parsers/openai_tools.py +49 -90
  41. langchain_core/output_parsers/pydantic.py +2 -3
  42. langchain_core/output_parsers/transform.py +12 -53
  43. langchain_core/output_parsers/xml.py +9 -17
  44. langchain_core/prompt_values.py +8 -112
  45. langchain_core/prompts/chat.py +1 -3
  46. langchain_core/runnables/base.py +500 -451
  47. langchain_core/runnables/branch.py +1 -1
  48. langchain_core/runnables/fallbacks.py +4 -4
  49. langchain_core/runnables/history.py +1 -1
  50. langchain_core/runnables/passthrough.py +3 -3
  51. langchain_core/runnables/retry.py +1 -1
  52. langchain_core/runnables/router.py +1 -1
  53. langchain_core/structured_query.py +3 -7
  54. langchain_core/tools/base.py +14 -41
  55. langchain_core/tools/convert.py +2 -22
  56. langchain_core/tools/retriever.py +1 -8
  57. langchain_core/tools/structured.py +2 -10
  58. langchain_core/tracers/_streaming.py +6 -7
  59. langchain_core/tracers/base.py +7 -14
  60. langchain_core/tracers/core.py +4 -27
  61. langchain_core/tracers/event_stream.py +4 -15
  62. langchain_core/tracers/langchain.py +3 -14
  63. langchain_core/tracers/log_stream.py +2 -3
  64. langchain_core/utils/_merge.py +45 -7
  65. langchain_core/utils/function_calling.py +22 -9
  66. langchain_core/utils/utils.py +29 -0
  67. langchain_core/version.py +1 -1
  68. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/METADATA +7 -9
  69. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/RECORD +71 -64
  70. langchain_core/v1/__init__.py +0 -1
  71. langchain_core/v1/chat_models.py +0 -1047
  72. langchain_core/v1/messages.py +0 -755
  73. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/WHEEL +0 -0
  74. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/entry_points.txt +0 -0
@@ -4,132 +4,143 @@
4
4
  This module is under active development. The API is unstable and subject to
5
5
  change in future releases.
6
6
 
7
- This module provides a standardized data structure for representing inputs to and
8
- outputs from Large Language Models. The core abstraction is the **Content Block**, a
9
- ``TypedDict`` that can represent a piece of text, an image, a tool call, or other
10
- structured data.
7
+ This module provides standardized data structures for representing inputs to and
8
+ outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict``.
9
+
10
+ **Rationale**
11
+
12
+ Different LLM providers use distinct and incompatible API schemas. This module
13
+ provides a unified, provider-agnostic format to facilitate these interactions. A
14
+ message to or from a model is simply a list of content blocks, allowing for the natural
15
+ interleaving of text, images, and other content in a single ordered sequence.
16
+
17
+ An adapter for a specific provider is responsible for translating this standard list of
18
+ blocks into the format required by its API.
19
+
20
+ **Extensibility**
11
21
 
12
22
  Data **not yet mapped** to a standard block may be represented using the
13
23
  ``NonStandardContentBlock``, which allows for provider-specific data to be included
14
24
  without losing the benefits of type checking and validation.
15
25
 
16
26
  Furthermore, provider-specific fields **within** a standard block are fully supported
17
- by default. However, since current type checkers do not recognize this, we are temporarily
18
- applying type ignore comments to suppress warnings. In the future,
19
- `PEP 728 <https://peps.python.org/pep-0728/>`__ will add an extra param, ``extra_items=Any``.
20
- When this is supported, we will apply it to block signatures to signify to type checkers
21
- that additional provider-specific fields are allowed.
22
-
23
- **Example with PEP 728 provider-specific fields:**
27
+ by default in the ``extras`` field of each block. This allows for additional metadata
28
+ to be included without breaking the standard structure.
24
29
 
25
- .. code-block:: python
30
+ .. warning::
31
+ Do not heavily rely on the ``extras`` field for provider-specific data! This field
32
+ is subject to deprecation in future releases as we move towards PEP 728.
26
33
 
27
- # Note `extra_items=Any`
28
- class TextContentBlock(TypedDict, extra_items=Any):
29
- type: Literal["text"]
30
- id: NotRequired[str]
31
- text: str
32
- annotations: NotRequired[list[Annotation]]
33
- index: NotRequired[int]
34
+ .. note::
35
+ Following widespread adoption of `PEP 728 <https://peps.python.org/pep-0728/>`__, we
36
+ will add ``extra_items=Any`` as a param to Content Blocks. This will signify to type
37
+ checkers that additional provider-specific fields are allowed outside of the
38
+ ``extras`` field, and that will become the new standard approach to adding
39
+ provider-specific metadata.
34
40
 
35
- .. code-block:: python
41
+ .. dropdown::
36
42
 
37
- from langchain_core.messages.content_blocks import TextContentBlock
43
+ **Example with PEP 728 provider-specific fields:**
38
44
 
39
- my_block: TextContentBlock = {
40
- # Add required fields
41
- "type": "text",
42
- "text": "Hello, world!",
43
- # Additional fields not specified in the TypedDict
44
- # These are valid with PEP 728 and are typed as Any
45
- "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
46
- "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
47
- "custom_field": "any value",
48
- }
45
+ .. code-block:: python
49
46
 
50
- openai_data = my_block["openai_metadata"] # Type: Any
47
+ # Content block definition
48
+ # NOTE: `extra_items=Any`
49
+ class TextContentBlock(TypedDict, extra_items=Any):
50
+ type: Literal["text"]
51
+ id: NotRequired[str]
52
+ text: str
53
+ annotations: NotRequired[list[Annotation]]
54
+ index: NotRequired[int]
51
55
 
52
- .. note::
53
- PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings
54
- from type checkers that don't yet support it. The functionality works correctly
55
- in Python 3.13+ and will be fully supported as the ecosystem catches up.
56
+ .. code-block:: python
56
57
 
57
- **Rationale**
58
+ from langchain_core.messages.content import TextContentBlock
59
+
60
+ # Create a text content block with provider-specific fields
61
+ my_block: TextContentBlock = {
62
+ # Add required fields
63
+ "type": "text",
64
+ "text": "Hello, world!",
65
+ # Additional fields not specified in the TypedDict
66
+ # These are valid with PEP 728 and are typed as Any
67
+ "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
68
+ "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
69
+ "custom_field": "any value",
70
+ }
58
71
 
59
- Different LLM providers use distinct and incompatible API schemas. This module
60
- introduces a unified, provider-agnostic format to standardize these interactions. A
61
- message to or from a model is simply a `list` of `ContentBlock` objects, allowing for
62
- the natural interleaving of text, images, and other content in a single, ordered
63
- sequence.
72
+ # Mutating an existing block to add provider-specific fields
73
+ openai_data = my_block["openai_metadata"] # Type: Any
64
74
 
65
- An adapter for a specific provider is responsible for translating this standard list of
66
- blocks into the format required by its API.
75
+ PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress
76
+ warnings from type checkers that don't yet support it. The functionality works
77
+ correctly in Python 3.13+ and will be fully supported as the ecosystem catches
78
+ up.
67
79
 
68
80
  **Key Block Types**
69
81
 
70
82
  The module defines several types of content blocks, including:
71
83
 
72
- - ``TextContentBlock``: Standard text.
73
- - ``ImageContentBlock``, ``Audio...``, ``Video...``, ``PlainText...``, ``File...``: For multimodal data.
74
- - ``ToolCallContentBlock``, ``ToolOutputContentBlock``: For function calling.
84
+ - ``TextContentBlock``: Standard text output.
85
+ - ``Citation``: For annotations that link text output to a source document.
86
+ - ``ToolCall``: For function calling.
75
87
  - ``ReasoningContentBlock``: To capture a model's thought process.
76
- - ``Citation``: For annotations that link generated text to a source document.
88
+ - Multimodal data:
89
+ - ``ImageContentBlock``
90
+ - ``AudioContentBlock``
91
+ - ``VideoContentBlock``
92
+ - ``PlainTextContentBlock`` (e.g. .txt or .md files)
93
+ - ``FileContentBlock`` (e.g. PDFs, etc.)
77
94
 
78
95
  **Example Usage**
79
96
 
80
97
  .. code-block:: python
81
98
 
82
99
  # Direct construction:
83
- from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock
84
-
85
- multimodal_message: AIMessage = [
86
- TextContentBlock(type="text", text="What is shown in this image?"),
87
- ImageContentBlock(
88
- type="image",
89
- url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
90
- mime_type="image/png",
91
- ),
92
- ]
93
-
94
- from langchain_core.messages.content_blocks import create_text_block, create_image_block
95
-
96
- # Using factory functions:
97
- multimodal_message: AIMessage = [
98
- create_text_block("What is shown in this image?"),
99
- create_image_block(
100
- url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
101
- mime_type="image/png",
102
- ),
103
- ]
104
- """ # noqa: E501
105
-
106
- import warnings
107
- from typing import Any, Literal, Optional, Union
108
- from uuid import uuid4
100
+ from langchain_core.messages.content import TextContentBlock, ImageContentBlock
101
+
102
+ multimodal_message: AIMessage(content_blocks=
103
+ [
104
+ TextContentBlock(type="text", text="What is shown in this image?"),
105
+ ImageContentBlock(
106
+ type="image",
107
+ url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
108
+ mime_type="image/png",
109
+ ),
110
+ ]
111
+ )
109
112
 
110
- from typing_extensions import NotRequired, TypedDict, get_args, get_origin
113
+ # Using factories:
114
+ from langchain_core.messages.content import create_text_block, create_image_block
115
+
116
+ multimodal_message: AIMessage(content=
117
+ [
118
+ create_text_block("What is shown in this image?"),
119
+ create_image_block(
120
+ url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
121
+ mime_type="image/png",
122
+ ),
123
+ ]
124
+ )
111
125
 
126
+ Factory functions offer benefits such as:
127
+ - Automatic ID generation (when not provided)
128
+ - No need to manually specify the ``type`` field
112
129
 
113
- def _ensure_id(id_val: Optional[str]) -> str:
114
- """Ensure the ID is a valid string, generating a new UUID if not provided.
130
+ """
115
131
 
116
- Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are
117
- LangChain-generated IDs.
132
+ from typing import Any, Literal, Optional, Union, get_args, get_type_hints
118
133
 
119
- Args:
120
- id_val: Optional string ID value to validate.
134
+ from typing_extensions import NotRequired, TypedDict
121
135
 
122
- Returns:
123
- A valid string ID, either the provided value or a new UUID.
124
- """
125
- return id_val or str(f"lc_{uuid4()}")
136
+ from langchain_core.utils.utils import ensure_id
126
137
 
127
138
 
128
139
  class Citation(TypedDict):
129
140
  """Annotation for citing data from a document.
130
141
 
131
142
  .. note::
132
- ``start/end`` indices refer to the **response text**,
143
+ ``start``/``end`` indices refer to the **response text**,
133
144
  not the source text. This means that the indices are relative to the model's
134
145
  response, not the original document (as specified in the ``url``).
135
146
 
@@ -150,18 +161,12 @@ class Citation(TypedDict):
150
161
 
151
162
  - Generated by the provider (e.g., OpenAI's file ID)
152
163
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
164
+
153
165
  """
154
166
 
155
167
  url: NotRequired[str]
156
168
  """URL of the document source."""
157
169
 
158
- # For future consideration, if needed:
159
- # provenance: NotRequired[str]
160
- # """Provenance of the document, e.g., "Wikipedia", "arXiv", etc.
161
-
162
- # Included for future compatibility; not currently implemented.
163
- # """
164
-
165
170
  title: NotRequired[str]
166
171
  """Source document title.
167
172
 
@@ -169,12 +174,10 @@ class Citation(TypedDict):
169
174
  """
170
175
 
171
176
  start_index: NotRequired[int]
172
- """Start index of the **response text** (``TextContentBlock.text``) for which the
173
- annotation applies."""
177
+ """Start index of the **response text** (``TextContentBlock.text``)."""
174
178
 
175
179
  end_index: NotRequired[int]
176
- """End index of the **response text** (``TextContentBlock.text``) for which the
177
- annotation applies."""
180
+ """End index of the **response text** (``TextContentBlock.text``)"""
178
181
 
179
182
  cited_text: NotRequired[str]
180
183
  """Excerpt of source text being cited."""
@@ -196,10 +199,12 @@ class NonStandardAnnotation(TypedDict):
196
199
  """Type of the content block. Used for discrimination."""
197
200
 
198
201
  id: NotRequired[str]
199
- """Content block identifier. Either:
202
+ """Content block identifier.
200
203
 
204
+ Either:
201
205
  - Generated by the provider (e.g., OpenAI's file ID)
202
206
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
207
+
203
208
  """
204
209
 
205
210
  value: dict[str, Any]
@@ -228,19 +233,21 @@ class TextContentBlock(TypedDict):
228
233
  """Type of the content block. Used for discrimination."""
229
234
 
230
235
  id: NotRequired[str]
231
- """Content block identifier. Either:
236
+ """Content block identifier.
232
237
 
238
+ Either:
233
239
  - Generated by the provider (e.g., OpenAI's file ID)
234
240
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
241
+
235
242
  """
236
243
 
237
244
  text: str
238
245
  """Block text."""
239
246
 
240
247
  annotations: NotRequired[list[Annotation]]
241
- """Citations and other annotations."""
248
+ """``Citation``s and other annotations."""
242
249
 
243
- index: NotRequired[int]
250
+ index: NotRequired[Union[int, str]]
244
251
  """Index of block in aggregate response. Used during streaming."""
245
252
 
246
253
  extras: NotRequired[dict[str, Any]]
@@ -280,6 +287,7 @@ class ToolCall(TypedDict):
280
287
 
281
288
  An identifier is needed to associate a tool call request with a tool
282
289
  call result in events when multiple concurrent tool calls are made.
290
+
283
291
  """
284
292
  # TODO: Consider making this NotRequired[str] in the future.
285
293
 
@@ -289,7 +297,7 @@ class ToolCall(TypedDict):
289
297
  args: dict[str, Any]
290
298
  """The arguments to the tool call."""
291
299
 
292
- index: NotRequired[int]
300
+ index: NotRequired[Union[int, str]]
293
301
  """Index of block in aggregate response. Used during streaming."""
294
302
 
295
303
  extras: NotRequired[dict[str, Any]]
@@ -299,7 +307,7 @@ class ToolCall(TypedDict):
299
307
  class ToolCallChunk(TypedDict):
300
308
  """A chunk of a tool call (e.g., as part of a stream).
301
309
 
302
- When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
310
+ When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``),
303
311
  all string attributes are concatenated. Chunks are only merged if their
304
312
  values of ``index`` are equal and not ``None``.
305
313
 
@@ -314,15 +322,21 @@ class ToolCallChunk(TypedDict):
314
322
  AIMessageChunk(content="", tool_call_chunks=left_chunks)
315
323
  + AIMessageChunk(content="", tool_call_chunks=right_chunks)
316
324
  ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)]
325
+
317
326
  """
318
327
 
319
328
  # TODO: Consider making fields NotRequired[str] in the future.
320
329
 
321
- type: NotRequired[Literal["tool_call_chunk"]]
330
+ type: Literal["tool_call_chunk"]
322
331
  """Used for serialization."""
323
332
 
324
333
  id: Optional[str]
325
- """An identifier associated with the tool call."""
334
+ """An identifier associated with the tool call.
335
+
336
+ An identifier is needed to associate a tool call request with a tool
337
+ call result in events when multiple concurrent tool calls are made.
338
+
339
+ """
326
340
 
327
341
  name: Optional[str]
328
342
  """The name of the tool to be called."""
@@ -330,7 +344,7 @@ class ToolCallChunk(TypedDict):
330
344
  args: Optional[str]
331
345
  """The arguments to the tool call."""
332
346
 
333
- index: Optional[int]
347
+ index: NotRequired[Union[int, str]]
334
348
  """The index of the tool call in a sequence."""
335
349
 
336
350
  extras: NotRequired[dict[str, Any]]
@@ -342,6 +356,7 @@ class InvalidToolCall(TypedDict):
342
356
 
343
357
  Here we add an ``error`` key to surface errors made during generation
344
358
  (e.g., invalid JSON arguments.)
359
+
345
360
  """
346
361
 
347
362
  # TODO: Consider making fields NotRequired[str] in the future.
@@ -350,7 +365,12 @@ class InvalidToolCall(TypedDict):
350
365
  """Used for discrimination."""
351
366
 
352
367
  id: Optional[str]
353
- """An identifier associated with the tool call."""
368
+ """An identifier associated with the tool call.
369
+
370
+ An identifier is needed to associate a tool call request with a tool
371
+ call result in events when multiple concurrent tool calls are made.
372
+
373
+ """
354
374
 
355
375
  name: Optional[str]
356
376
  """The name of the tool to be called."""
@@ -361,15 +381,13 @@ class InvalidToolCall(TypedDict):
361
381
  error: Optional[str]
362
382
  """An error message associated with the tool call."""
363
383
 
364
- index: NotRequired[int]
384
+ index: NotRequired[Union[int, str]]
365
385
  """Index of block in aggregate response. Used during streaming."""
366
386
 
367
387
  extras: NotRequired[dict[str, Any]]
368
388
  """Provider-specific metadata."""
369
389
 
370
390
 
371
- # Note: These are not standard tool calls, but rather provider-specific built-in tools.
372
- # Web search
373
391
  class WebSearchCall(TypedDict):
374
392
  """Built-in web search tool call."""
375
393
 
@@ -377,16 +395,18 @@ class WebSearchCall(TypedDict):
377
395
  """Type of the content block. Used for discrimination."""
378
396
 
379
397
  id: NotRequired[str]
380
- """Content block identifier. Either:
398
+ """Content block identifier.
381
399
 
400
+ Either:
382
401
  - Generated by the provider (e.g., OpenAI's file ID)
383
402
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
403
+
384
404
  """
385
405
 
386
406
  query: NotRequired[str]
387
407
  """The search query used in the web search tool call."""
388
408
 
389
- index: NotRequired[int]
409
+ index: NotRequired[Union[int, str]]
390
410
  """Index of block in aggregate response. Used during streaming."""
391
411
 
392
412
  extras: NotRequired[dict[str, Any]]
@@ -400,16 +420,18 @@ class WebSearchResult(TypedDict):
400
420
  """Type of the content block. Used for discrimination."""
401
421
 
402
422
  id: NotRequired[str]
403
- """Content block identifier. Either:
423
+ """Content block identifier.
404
424
 
425
+ Either:
405
426
  - Generated by the provider (e.g., OpenAI's file ID)
406
427
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
428
+
407
429
  """
408
430
 
409
431
  urls: NotRequired[list[str]]
410
432
  """List of URLs returned by the web search tool call."""
411
433
 
412
- index: NotRequired[int]
434
+ index: NotRequired[Union[int, str]]
413
435
  """Index of block in aggregate response. Used during streaming."""
414
436
 
415
437
  extras: NotRequired[dict[str, Any]]
@@ -423,10 +445,12 @@ class CodeInterpreterCall(TypedDict):
423
445
  """Type of the content block. Used for discrimination."""
424
446
 
425
447
  id: NotRequired[str]
426
- """Content block identifier. Either:
448
+ """Content block identifier.
427
449
 
450
+ Either:
428
451
  - Generated by the provider (e.g., OpenAI's file ID)
429
452
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
453
+
430
454
  """
431
455
 
432
456
  language: NotRequired[str]
@@ -435,7 +459,7 @@ class CodeInterpreterCall(TypedDict):
435
459
  code: NotRequired[str]
436
460
  """The code to be executed by the code interpreter."""
437
461
 
438
- index: NotRequired[int]
462
+ index: NotRequired[Union[int, str]]
439
463
  """Index of block in aggregate response. Used during streaming."""
440
464
 
441
465
  extras: NotRequired[dict[str, Any]]
@@ -447,22 +471,26 @@ class CodeInterpreterOutput(TypedDict):
447
471
 
448
472
  Full output of a code interpreter tool call is represented by
449
473
  ``CodeInterpreterResult`` which is a list of these blocks.
474
+
450
475
  """
451
476
 
452
477
  type: Literal["code_interpreter_output"]
453
478
  """Type of the content block. Used for discrimination."""
454
479
 
455
480
  id: NotRequired[str]
456
- """Content block identifier. Either:
481
+ """Content block identifier.
457
482
 
483
+ Either:
458
484
  - Generated by the provider (e.g., OpenAI's file ID)
459
485
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
486
+
460
487
  """
461
488
 
462
489
  return_code: NotRequired[int]
463
490
  """Return code of the executed code.
464
491
 
465
492
  Example: ``0`` for success, non-zero for failure.
493
+
466
494
  """
467
495
 
468
496
  stderr: NotRequired[str]
@@ -474,12 +502,6 @@ class CodeInterpreterOutput(TypedDict):
474
502
  file_ids: NotRequired[list[str]]
475
503
  """List of file IDs generated by the code interpreter."""
476
504
 
477
- index: NotRequired[int]
478
- """Index of block in aggregate response. Used during streaming."""
479
-
480
- extras: NotRequired[dict[str, Any]]
481
- """Provider-specific metadata."""
482
-
483
505
 
484
506
  class CodeInterpreterResult(TypedDict):
485
507
  """Result of a code interpreter tool call."""
@@ -488,16 +510,18 @@ class CodeInterpreterResult(TypedDict):
488
510
  """Type of the content block. Used for discrimination."""
489
511
 
490
512
  id: NotRequired[str]
491
- """Content block identifier. Either:
513
+ """Content block identifier.
492
514
 
515
+ Either:
493
516
  - Generated by the provider (e.g., OpenAI's file ID)
494
517
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
518
+
495
519
  """
496
520
 
497
521
  output: list[CodeInterpreterOutput]
498
522
  """List of outputs from the code interpreter tool call."""
499
523
 
500
- index: NotRequired[int]
524
+ index: NotRequired[Union[int, str]]
501
525
  """Index of block in aggregate response. Used during streaming."""
502
526
 
503
527
  extras: NotRequired[dict[str, Any]]
@@ -520,10 +544,12 @@ class ReasoningContentBlock(TypedDict):
520
544
  """Type of the content block. Used for discrimination."""
521
545
 
522
546
  id: NotRequired[str]
523
- """Content block identifier. Either:
547
+ """Content block identifier.
524
548
 
549
+ Either:
525
550
  - Generated by the provider (e.g., OpenAI's file ID)
526
551
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
552
+
527
553
  """
528
554
 
529
555
  reasoning: NotRequired[str]
@@ -531,9 +557,10 @@ class ReasoningContentBlock(TypedDict):
531
557
 
532
558
  Either the thought summary or the raw reasoning text itself. This is often parsed
533
559
  from ``<think>`` tags in the model's response.
560
+
534
561
  """
535
562
 
536
- index: NotRequired[int]
563
+ index: NotRequired[Union[int, str]]
537
564
  """Index of block in aggregate response. Used during streaming."""
538
565
 
539
566
  extras: NotRequired[dict[str, Any]]
@@ -559,10 +586,12 @@ class ImageContentBlock(TypedDict):
559
586
  """Type of the content block. Used for discrimination."""
560
587
 
561
588
  id: NotRequired[str]
562
- """Content block identifier. Either:
589
+ """Content block identifier.
563
590
 
591
+ Either:
564
592
  - Generated by the provider (e.g., OpenAI's file ID)
565
593
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
594
+
566
595
  """
567
596
 
568
597
  file_id: NotRequired[str]
@@ -572,9 +601,10 @@ class ImageContentBlock(TypedDict):
572
601
  """MIME type of the image. Required for base64.
573
602
 
574
603
  `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#image>`__
604
+
575
605
  """
576
606
 
577
- index: NotRequired[int]
607
+ index: NotRequired[Union[int, str]]
578
608
  """Index of block in aggregate response. Used during streaming."""
579
609
 
580
610
  url: NotRequired[str]
@@ -584,7 +614,7 @@ class ImageContentBlock(TypedDict):
584
614
  """Data as a base64 string."""
585
615
 
586
616
  extras: NotRequired[dict[str, Any]]
587
- """Provider-specific metadata."""
617
+ """Provider-specific metadata. This shouldn't be used for the image data itself."""
588
618
 
589
619
 
590
620
  class VideoContentBlock(TypedDict):
@@ -603,10 +633,12 @@ class VideoContentBlock(TypedDict):
603
633
  """Type of the content block. Used for discrimination."""
604
634
 
605
635
  id: NotRequired[str]
606
- """Content block identifier. Either:
636
+ """Content block identifier.
607
637
 
638
+ Either:
608
639
  - Generated by the provider (e.g., OpenAI's file ID)
609
640
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
641
+
610
642
  """
611
643
 
612
644
  file_id: NotRequired[str]
@@ -616,9 +648,10 @@ class VideoContentBlock(TypedDict):
616
648
  """MIME type of the video. Required for base64.
617
649
 
618
650
  `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#video>`__
651
+
619
652
  """
620
653
 
621
- index: NotRequired[int]
654
+ index: NotRequired[Union[int, str]]
622
655
  """Index of block in aggregate response. Used during streaming."""
623
656
 
624
657
  url: NotRequired[str]
@@ -628,7 +661,7 @@ class VideoContentBlock(TypedDict):
628
661
  """Data as a base64 string."""
629
662
 
630
663
  extras: NotRequired[dict[str, Any]]
631
- """Provider-specific metadata."""
664
+ """Provider-specific metadata. This shouldn't be used for the video data itself."""
632
665
 
633
666
 
634
667
  class AudioContentBlock(TypedDict):
@@ -637,7 +670,6 @@ class AudioContentBlock(TypedDict):
637
670
  .. note::
638
671
  ``create_audio_block`` may also be used as a factory to create an
639
672
  ``AudioContentBlock``. Benefits include:
640
-
641
673
  * Automatic ID generation (when not provided)
642
674
  * Required arguments strictly validated at creation time
643
675
 
@@ -647,10 +679,12 @@ class AudioContentBlock(TypedDict):
647
679
  """Type of the content block. Used for discrimination."""
648
680
 
649
681
  id: NotRequired[str]
650
- """Content block identifier. Either:
682
+ """Content block identifier.
651
683
 
684
+ Either:
652
685
  - Generated by the provider (e.g., OpenAI's file ID)
653
686
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
687
+
654
688
  """
655
689
 
656
690
  file_id: NotRequired[str]
@@ -660,9 +694,10 @@ class AudioContentBlock(TypedDict):
660
694
  """MIME type of the audio. Required for base64.
661
695
 
662
696
  `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#audio>`__
697
+
663
698
  """
664
699
 
665
- index: NotRequired[int]
700
+ index: NotRequired[Union[int, str]]
666
701
  """Index of block in aggregate response. Used during streaming."""
667
702
 
668
703
  url: NotRequired[str]
@@ -672,7 +707,7 @@ class AudioContentBlock(TypedDict):
672
707
  """Data as a base64 string."""
673
708
 
674
709
  extras: NotRequired[dict[str, Any]]
675
- """Provider-specific metadata."""
710
+ """Provider-specific metadata. This shouldn't be used for the audio data itself."""
676
711
 
677
712
 
678
713
  class PlainTextContentBlock(TypedDict):
@@ -695,10 +730,12 @@ class PlainTextContentBlock(TypedDict):
695
730
  """Type of the content block. Used for discrimination."""
696
731
 
697
732
  id: NotRequired[str]
698
- """Content block identifier. Either:
733
+ """Content block identifier.
699
734
 
735
+ Either:
700
736
  - Generated by the provider (e.g., OpenAI's file ID)
701
737
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
738
+
702
739
  """
703
740
 
704
741
  file_id: NotRequired[str]
@@ -707,7 +744,7 @@ class PlainTextContentBlock(TypedDict):
707
744
  mime_type: Literal["text/plain"]
708
745
  """MIME type of the file. Required for base64."""
709
746
 
710
- index: NotRequired[int]
747
+ index: NotRequired[Union[int, str]]
711
748
  """Index of block in aggregate response. Used during streaming."""
712
749
 
713
750
  url: NotRequired[str]
@@ -726,7 +763,7 @@ class PlainTextContentBlock(TypedDict):
726
763
  """Context for the text, e.g., a description or summary of the text's content."""
727
764
 
728
765
  extras: NotRequired[dict[str, Any]]
729
- """Provider-specific metadata."""
766
+ """Provider-specific metadata. This shouldn't be used for the data itself."""
730
767
 
731
768
 
732
769
  class FileContentBlock(TypedDict):
@@ -752,10 +789,12 @@ class FileContentBlock(TypedDict):
752
789
  """Type of the content block. Used for discrimination."""
753
790
 
754
791
  id: NotRequired[str]
755
- """Content block identifier. Either:
792
+ """Content block identifier.
756
793
 
794
+ Either:
757
795
  - Generated by the provider (e.g., OpenAI's file ID)
758
796
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
797
+
759
798
  """
760
799
 
761
800
  file_id: NotRequired[str]
@@ -765,9 +804,10 @@ class FileContentBlock(TypedDict):
765
804
  """MIME type of the file. Required for base64.
766
805
 
767
806
  `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml>`__
807
+
768
808
  """
769
809
 
770
- index: NotRequired[int]
810
+ index: NotRequired[Union[int, str]]
771
811
  """Index of block in aggregate response. Used during streaming."""
772
812
 
773
813
  url: NotRequired[str]
@@ -777,7 +817,7 @@ class FileContentBlock(TypedDict):
777
817
  """Data as a base64 string."""
778
818
 
779
819
  extras: NotRequired[dict[str, Any]]
780
- """Provider-specific metadata."""
820
+ """Provider-specific metadata. This shouldn't be used for the file data itself."""
781
821
 
782
822
 
783
823
  # Future modalities to consider:
@@ -793,7 +833,10 @@ class NonStandardContentBlock(TypedDict):
793
833
  The purpose of this block should be to simply hold a provider-specific payload.
794
834
  If a provider's non-standard output includes reasoning and tool calls, it should be
795
835
  the adapter's job to parse that payload and emit the corresponding standard
796
- ReasoningContentBlock and ToolCallContentBlocks.
836
+ ``ReasoningContentBlock`` and ``ToolCalls``.
837
+
838
+ Has no ``extras`` field, as provider-specific data should be included in the
839
+ ``value`` field.
797
840
 
798
841
  .. note::
799
842
  ``create_non_standard_block`` may also be used as a factory to create a
@@ -808,16 +851,18 @@ class NonStandardContentBlock(TypedDict):
808
851
  """Type of the content block. Used for discrimination."""
809
852
 
810
853
  id: NotRequired[str]
811
- """Content block identifier. Either:
854
+ """Content block identifier.
812
855
 
856
+ Either:
813
857
  - Generated by the provider (e.g., OpenAI's file ID)
814
858
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
859
+
815
860
  """
816
861
 
817
862
  value: dict[str, Any]
818
863
  """Provider-specific data."""
819
864
 
820
- index: NotRequired[int]
865
+ index: NotRequired[Union[int, str]]
821
866
  """Index of block in aggregate response. Used during streaming."""
822
867
 
823
868
 
@@ -832,8 +877,8 @@ DataContentBlock = Union[
832
877
 
833
878
  ToolContentBlock = Union[
834
879
  ToolCall,
880
+ ToolCallChunk,
835
881
  CodeInterpreterCall,
836
- CodeInterpreterOutput,
837
882
  CodeInterpreterResult,
838
883
  WebSearchCall,
839
884
  WebSearchResult,
@@ -841,9 +886,7 @@ ToolContentBlock = Union[
841
886
 
842
887
  ContentBlock = Union[
843
888
  TextContentBlock,
844
- ToolCall,
845
889
  InvalidToolCall,
846
- ToolCallChunk,
847
890
  ReasoningContentBlock,
848
891
  NonStandardContentBlock,
849
892
  DataContentBlock,
@@ -851,135 +894,75 @@ ContentBlock = Union[
851
894
  ]
852
895
 
853
896
 
854
- def _extract_typedict_type_values(union_type: Any) -> set[str]:
855
- """Extract the values of the 'type' field from a TypedDict union type."""
856
- result: set[str] = set()
857
- for value in get_args(union_type):
858
- annotation = value.__annotations__["type"]
859
- if get_origin(annotation) is Literal:
860
- result.update(get_args(annotation))
861
- else:
862
- msg = f"{value} 'type' is not a Literal"
863
- raise ValueError(msg)
864
- return result
865
-
866
-
867
897
  KNOWN_BLOCK_TYPES = {
898
+ # Text output
868
899
  "text",
869
- "text-plain",
900
+ "reasoning",
901
+ # Tools
870
902
  "tool_call",
871
903
  "invalid_tool_call",
872
904
  "tool_call_chunk",
873
- "reasoning",
874
- "non_standard",
905
+ # Multimodal data
875
906
  "image",
876
907
  "audio",
877
908
  "file",
909
+ "text-plain",
878
910
  "video",
911
+ # Server-side tool calls
879
912
  "code_interpreter_call",
880
- "code_interpreter_output",
881
913
  "code_interpreter_result",
882
914
  "web_search_call",
883
915
  "web_search_result",
916
+ # Catch-all
917
+ "non_standard",
884
918
  }
885
919
 
886
920
 
921
+ def _get_data_content_block_types() -> tuple[str, ...]:
922
+ """Get type literals from DataContentBlock union members dynamically."""
923
+ data_block_types = []
924
+
925
+ for block_type in get_args(DataContentBlock):
926
+ hints = get_type_hints(block_type)
927
+ if "type" in hints:
928
+ type_annotation = hints["type"]
929
+ if hasattr(type_annotation, "__args__"):
930
+ # This is a Literal type, get the literal value
931
+ literal_value = type_annotation.__args__[0]
932
+ data_block_types.append(literal_value)
933
+
934
+ return tuple(data_block_types)
935
+
936
+
887
937
  def is_data_content_block(block: dict) -> bool:
888
- """Check if the content block is a standard data content block.
938
+ """Check if the provided content block is a standard v1 data content block.
889
939
 
890
940
  Args:
891
941
  block: The content block to check.
892
942
 
893
943
  Returns:
894
944
  True if the content block is a data content block, False otherwise.
945
+
895
946
  """
896
- return block.get("type") in (
897
- "audio",
898
- "image",
899
- "video",
900
- "file",
901
- "text-plain",
902
- ) and any(
903
- key in block
904
- for key in (
905
- "url",
906
- "base64",
907
- "file_id",
908
- "text",
909
- "source_type", # backwards compatibility
910
- )
911
- )
947
+ if block.get("type") not in _get_data_content_block_types():
948
+ return False
912
949
 
950
+ if any(key in block for key in ("url", "base64", "file_id", "text")):
951
+ return True
913
952
 
914
- def convert_to_openai_image_block(block: dict[str, Any]) -> dict:
915
- """Convert image content block to format expected by OpenAI Chat Completions API."""
916
- if "url" in block:
917
- return {
918
- "type": "image_url",
919
- "image_url": {
920
- "url": block["url"],
921
- },
922
- }
923
- if "base64" in block or block.get("source_type") == "base64":
924
- if "mime_type" not in block:
925
- error_message = "mime_type key is required for base64 data."
926
- raise ValueError(error_message)
927
- mime_type = block["mime_type"]
928
- base64_data = block["data"] if "data" in block else block["base64"]
929
- return {
930
- "type": "image_url",
931
- "image_url": {
932
- "url": f"data:{mime_type};base64,{base64_data}",
933
- },
934
- }
935
- error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
936
- raise ValueError(error_message)
937
-
938
-
939
- def convert_to_openai_data_block(block: dict) -> dict:
940
- """Format standard data content block to format expected by OpenAI."""
941
- if block["type"] == "image":
942
- formatted_block = convert_to_openai_image_block(block)
943
-
944
- elif block["type"] == "file":
945
- if "base64" in block or block.get("source_type") == "base64":
946
- base64_data = block["data"] if "source_type" in block else block["base64"]
947
- file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"}
948
- if filename := block.get("filename"):
949
- file["filename"] = filename
950
- elif (metadata := block.get("metadata")) and ("filename" in metadata):
951
- file["filename"] = metadata["filename"]
952
- else:
953
- warnings.warn(
954
- "OpenAI may require a filename for file inputs. Specify a filename "
955
- "in the content block: {'type': 'file', 'mime_type': "
956
- "'application/pdf', 'base64': '...', 'filename': 'my-pdf'}",
957
- stacklevel=1,
958
- )
959
- formatted_block = {"type": "file", "file": file}
960
- elif "file_id" in block or block.get("source_type") == "id":
961
- file_id = block["id"] if "source_type" in block else block["file_id"]
962
- formatted_block = {"type": "file", "file": {"file_id": file_id}}
963
- else:
964
- error_msg = "Keys base64 or file_id required for file blocks."
965
- raise ValueError(error_msg)
966
-
967
- elif block["type"] == "audio":
968
- if "base64" in block or block.get("source_type") == "base64":
969
- base64_data = block["data"] if "source_type" in block else block["base64"]
970
- audio_format = block["mime_type"].split("/")[-1]
971
- formatted_block = {
972
- "type": "input_audio",
973
- "input_audio": {"data": base64_data, "format": audio_format},
974
- }
975
- else:
976
- error_msg = "Key base64 is required for audio blocks."
977
- raise ValueError(error_msg)
978
- else:
979
- error_msg = f"Block of type {block['type']} is not supported."
980
- raise ValueError(error_msg)
953
+ # Verify data presence based on source type
954
+ if "source_type" in block:
955
+ source_type = block["source_type"]
956
+ if (source_type == "url" and "url" in block) or (
957
+ source_type == "base64" and "data" in block
958
+ ):
959
+ return True
960
+ if (source_type == "id" and "id" in block) or (
961
+ source_type == "text" and "url" in block
962
+ ):
963
+ return True
981
964
 
982
- return formatted_block
965
+ return False
983
966
 
984
967
 
985
968
  def create_text_block(
@@ -987,14 +970,15 @@ def create_text_block(
987
970
  *,
988
971
  id: Optional[str] = None,
989
972
  annotations: Optional[list[Annotation]] = None,
990
- index: Optional[int] = None,
973
+ index: Optional[Union[int, str]] = None,
974
+ **kwargs: Any,
991
975
  ) -> TextContentBlock:
992
976
  """Create a ``TextContentBlock``.
993
977
 
994
978
  Args:
995
979
  text: The text content of the block.
996
980
  id: Content block identifier. Generated automatically if not provided.
997
- annotations: Citations and other annotations for the text.
981
+ annotations: ``Citation``s and other annotations for the text.
998
982
  index: Index of block in aggregate response. Used during streaming.
999
983
 
1000
984
  Returns:
@@ -1008,12 +992,17 @@ def create_text_block(
1008
992
  block = TextContentBlock(
1009
993
  type="text",
1010
994
  text=text,
1011
- id=_ensure_id(id),
995
+ id=ensure_id(id),
1012
996
  )
1013
997
  if annotations is not None:
1014
998
  block["annotations"] = annotations
1015
999
  if index is not None:
1016
1000
  block["index"] = index
1001
+
1002
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1003
+ if extras:
1004
+ block["extras"] = extras
1005
+
1017
1006
  return block
1018
1007
 
1019
1008
 
@@ -1024,7 +1013,8 @@ def create_image_block(
1024
1013
  file_id: Optional[str] = None,
1025
1014
  mime_type: Optional[str] = None,
1026
1015
  id: Optional[str] = None,
1027
- index: Optional[int] = None,
1016
+ index: Optional[Union[int, str]] = None,
1017
+ **kwargs: Any,
1028
1018
  ) -> ImageContentBlock:
1029
1019
  """Create an ``ImageContentBlock``.
1030
1020
 
@@ -1052,11 +1042,7 @@ def create_image_block(
1052
1042
  msg = "Must provide one of: url, base64, or file_id"
1053
1043
  raise ValueError(msg)
1054
1044
 
1055
- if base64 and not mime_type:
1056
- msg = "mime_type is required when using base64 data"
1057
- raise ValueError(msg)
1058
-
1059
- block = ImageContentBlock(type="image", id=_ensure_id(id))
1045
+ block = ImageContentBlock(type="image", id=ensure_id(id))
1060
1046
 
1061
1047
  if url is not None:
1062
1048
  block["url"] = url
@@ -1069,6 +1055,10 @@ def create_image_block(
1069
1055
  if index is not None:
1070
1056
  block["index"] = index
1071
1057
 
1058
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1059
+ if extras:
1060
+ block["extras"] = extras
1061
+
1072
1062
  return block
1073
1063
 
1074
1064
 
@@ -1079,7 +1069,8 @@ def create_video_block(
1079
1069
  file_id: Optional[str] = None,
1080
1070
  mime_type: Optional[str] = None,
1081
1071
  id: Optional[str] = None,
1082
- index: Optional[int] = None,
1072
+ index: Optional[Union[int, str]] = None,
1073
+ **kwargs: Any,
1083
1074
  ) -> VideoContentBlock:
1084
1075
  """Create a ``VideoContentBlock``.
1085
1076
 
@@ -1111,7 +1102,7 @@ def create_video_block(
1111
1102
  msg = "mime_type is required when using base64 data"
1112
1103
  raise ValueError(msg)
1113
1104
 
1114
- block = VideoContentBlock(type="video", id=_ensure_id(id))
1105
+ block = VideoContentBlock(type="video", id=ensure_id(id))
1115
1106
 
1116
1107
  if url is not None:
1117
1108
  block["url"] = url
@@ -1124,6 +1115,10 @@ def create_video_block(
1124
1115
  if index is not None:
1125
1116
  block["index"] = index
1126
1117
 
1118
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1119
+ if extras:
1120
+ block["extras"] = extras
1121
+
1127
1122
  return block
1128
1123
 
1129
1124
 
@@ -1134,7 +1129,8 @@ def create_audio_block(
1134
1129
  file_id: Optional[str] = None,
1135
1130
  mime_type: Optional[str] = None,
1136
1131
  id: Optional[str] = None,
1137
- index: Optional[int] = None,
1132
+ index: Optional[Union[int, str]] = None,
1133
+ **kwargs: Any,
1138
1134
  ) -> AudioContentBlock:
1139
1135
  """Create an ``AudioContentBlock``.
1140
1136
 
@@ -1166,7 +1162,7 @@ def create_audio_block(
1166
1162
  msg = "mime_type is required when using base64 data"
1167
1163
  raise ValueError(msg)
1168
1164
 
1169
- block = AudioContentBlock(type="audio", id=_ensure_id(id))
1165
+ block = AudioContentBlock(type="audio", id=ensure_id(id))
1170
1166
 
1171
1167
  if url is not None:
1172
1168
  block["url"] = url
@@ -1179,6 +1175,10 @@ def create_audio_block(
1179
1175
  if index is not None:
1180
1176
  block["index"] = index
1181
1177
 
1178
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1179
+ if extras:
1180
+ block["extras"] = extras
1181
+
1182
1182
  return block
1183
1183
 
1184
1184
 
@@ -1189,7 +1189,8 @@ def create_file_block(
1189
1189
  file_id: Optional[str] = None,
1190
1190
  mime_type: Optional[str] = None,
1191
1191
  id: Optional[str] = None,
1192
- index: Optional[int] = None,
1192
+ index: Optional[Union[int, str]] = None,
1193
+ **kwargs: Any,
1193
1194
  ) -> FileContentBlock:
1194
1195
  """Create a ``FileContentBlock``.
1195
1196
 
@@ -1221,7 +1222,7 @@ def create_file_block(
1221
1222
  msg = "mime_type is required when using base64 data"
1222
1223
  raise ValueError(msg)
1223
1224
 
1224
- block = FileContentBlock(type="file", id=_ensure_id(id))
1225
+ block = FileContentBlock(type="file", id=ensure_id(id))
1225
1226
 
1226
1227
  if url is not None:
1227
1228
  block["url"] = url
@@ -1234,19 +1235,23 @@ def create_file_block(
1234
1235
  if index is not None:
1235
1236
  block["index"] = index
1236
1237
 
1238
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1239
+ if extras:
1240
+ block["extras"] = extras
1241
+
1237
1242
  return block
1238
1243
 
1239
1244
 
1240
1245
  def create_plaintext_block(
1241
- text: str,
1242
- *,
1246
+ text: Optional[str] = None,
1243
1247
  url: Optional[str] = None,
1244
1248
  base64: Optional[str] = None,
1245
1249
  file_id: Optional[str] = None,
1246
1250
  title: Optional[str] = None,
1247
1251
  context: Optional[str] = None,
1248
1252
  id: Optional[str] = None,
1249
- index: Optional[int] = None,
1253
+ index: Optional[Union[int, str]] = None,
1254
+ **kwargs: Any,
1250
1255
  ) -> PlainTextContentBlock:
1251
1256
  """Create a ``PlainTextContentBlock``.
1252
1257
 
@@ -1271,10 +1276,11 @@ def create_plaintext_block(
1271
1276
  block = PlainTextContentBlock(
1272
1277
  type="text-plain",
1273
1278
  mime_type="text/plain",
1274
- text=text,
1275
- id=_ensure_id(id),
1279
+ id=ensure_id(id),
1276
1280
  )
1277
1281
 
1282
+ if text is not None:
1283
+ block["text"] = text
1278
1284
  if url is not None:
1279
1285
  block["url"] = url
1280
1286
  if base64 is not None:
@@ -1288,6 +1294,10 @@ def create_plaintext_block(
1288
1294
  if index is not None:
1289
1295
  block["index"] = index
1290
1296
 
1297
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1298
+ if extras:
1299
+ block["extras"] = extras
1300
+
1291
1301
  return block
1292
1302
 
1293
1303
 
@@ -1296,7 +1306,8 @@ def create_tool_call(
1296
1306
  args: dict[str, Any],
1297
1307
  *,
1298
1308
  id: Optional[str] = None,
1299
- index: Optional[int] = None,
1309
+ index: Optional[Union[int, str]] = None,
1310
+ **kwargs: Any,
1300
1311
  ) -> ToolCall:
1301
1312
  """Create a ``ToolCall``.
1302
1313
 
@@ -1318,19 +1329,24 @@ def create_tool_call(
1318
1329
  type="tool_call",
1319
1330
  name=name,
1320
1331
  args=args,
1321
- id=_ensure_id(id),
1332
+ id=ensure_id(id),
1322
1333
  )
1323
1334
 
1324
1335
  if index is not None:
1325
1336
  block["index"] = index
1326
1337
 
1338
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1339
+ if extras:
1340
+ block["extras"] = extras
1341
+
1327
1342
  return block
1328
1343
 
1329
1344
 
1330
1345
  def create_reasoning_block(
1331
1346
  reasoning: Optional[str] = None,
1332
1347
  id: Optional[str] = None,
1333
- index: Optional[int] = None,
1348
+ index: Optional[Union[int, str]] = None,
1349
+ **kwargs: Any,
1334
1350
  ) -> ReasoningContentBlock:
1335
1351
  """Create a ``ReasoningContentBlock``.
1336
1352
 
@@ -1350,12 +1366,16 @@ def create_reasoning_block(
1350
1366
  block = ReasoningContentBlock(
1351
1367
  type="reasoning",
1352
1368
  reasoning=reasoning or "",
1353
- id=_ensure_id(id),
1369
+ id=ensure_id(id),
1354
1370
  )
1355
1371
 
1356
1372
  if index is not None:
1357
1373
  block["index"] = index
1358
1374
 
1375
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1376
+ if extras:
1377
+ block["extras"] = extras
1378
+
1359
1379
  return block
1360
1380
 
1361
1381
 
@@ -1367,6 +1387,7 @@ def create_citation(
1367
1387
  end_index: Optional[int] = None,
1368
1388
  cited_text: Optional[str] = None,
1369
1389
  id: Optional[str] = None,
1390
+ **kwargs: Any,
1370
1391
  ) -> Citation:
1371
1392
  """Create a ``Citation``.
1372
1393
 
@@ -1386,7 +1407,7 @@ def create_citation(
1386
1407
  prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1387
1408
 
1388
1409
  """
1389
- block = Citation(type="citation", id=_ensure_id(id))
1410
+ block = Citation(type="citation", id=ensure_id(id))
1390
1411
 
1391
1412
  if url is not None:
1392
1413
  block["url"] = url
@@ -1399,6 +1420,10 @@ def create_citation(
1399
1420
  if cited_text is not None:
1400
1421
  block["cited_text"] = cited_text
1401
1422
 
1423
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1424
+ if extras:
1425
+ block["extras"] = extras
1426
+
1402
1427
  return block
1403
1428
 
1404
1429
 
@@ -1406,7 +1431,7 @@ def create_non_standard_block(
1406
1431
  value: dict[str, Any],
1407
1432
  *,
1408
1433
  id: Optional[str] = None,
1409
- index: Optional[int] = None,
1434
+ index: Optional[Union[int, str]] = None,
1410
1435
  ) -> NonStandardContentBlock:
1411
1436
  """Create a ``NonStandardContentBlock``.
1412
1437
 
@@ -1426,7 +1451,7 @@ def create_non_standard_block(
1426
1451
  block = NonStandardContentBlock(
1427
1452
  type="non_standard",
1428
1453
  value=value,
1429
- id=_ensure_id(id),
1454
+ id=ensure_id(id),
1430
1455
  )
1431
1456
 
1432
1457
  if index is not None: