langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (74) hide show
  1. langchain_core/_api/beta_decorator.py +2 -2
  2. langchain_core/_api/deprecation.py +1 -1
  3. langchain_core/beta/runnables/context.py +1 -1
  4. langchain_core/callbacks/base.py +14 -23
  5. langchain_core/callbacks/file.py +13 -2
  6. langchain_core/callbacks/manager.py +74 -157
  7. langchain_core/callbacks/streaming_stdout.py +3 -4
  8. langchain_core/callbacks/usage.py +2 -12
  9. langchain_core/chat_history.py +6 -6
  10. langchain_core/documents/base.py +1 -1
  11. langchain_core/documents/compressor.py +9 -6
  12. langchain_core/indexing/base.py +2 -2
  13. langchain_core/language_models/_utils.py +230 -101
  14. langchain_core/language_models/base.py +35 -23
  15. langchain_core/language_models/chat_models.py +245 -53
  16. langchain_core/language_models/fake_chat_models.py +28 -81
  17. langchain_core/load/dump.py +3 -4
  18. langchain_core/messages/__init__.py +38 -22
  19. langchain_core/messages/ai.py +188 -30
  20. langchain_core/messages/base.py +164 -25
  21. langchain_core/messages/block_translators/__init__.py +89 -0
  22. langchain_core/messages/block_translators/anthropic.py +451 -0
  23. langchain_core/messages/block_translators/bedrock.py +45 -0
  24. langchain_core/messages/block_translators/bedrock_converse.py +47 -0
  25. langchain_core/messages/block_translators/google_genai.py +45 -0
  26. langchain_core/messages/block_translators/google_vertexai.py +47 -0
  27. langchain_core/messages/block_translators/groq.py +45 -0
  28. langchain_core/messages/block_translators/langchain_v0.py +297 -0
  29. langchain_core/messages/block_translators/ollama.py +45 -0
  30. langchain_core/messages/block_translators/openai.py +586 -0
  31. langchain_core/messages/{content_blocks.py → content.py} +346 -213
  32. langchain_core/messages/human.py +29 -9
  33. langchain_core/messages/system.py +29 -9
  34. langchain_core/messages/tool.py +94 -13
  35. langchain_core/messages/utils.py +32 -234
  36. langchain_core/output_parsers/base.py +14 -50
  37. langchain_core/output_parsers/json.py +2 -5
  38. langchain_core/output_parsers/list.py +2 -7
  39. langchain_core/output_parsers/openai_functions.py +5 -28
  40. langchain_core/output_parsers/openai_tools.py +49 -90
  41. langchain_core/output_parsers/pydantic.py +2 -3
  42. langchain_core/output_parsers/transform.py +12 -53
  43. langchain_core/output_parsers/xml.py +9 -17
  44. langchain_core/prompt_values.py +8 -112
  45. langchain_core/prompts/chat.py +1 -3
  46. langchain_core/runnables/base.py +500 -451
  47. langchain_core/runnables/branch.py +1 -1
  48. langchain_core/runnables/fallbacks.py +4 -4
  49. langchain_core/runnables/history.py +1 -1
  50. langchain_core/runnables/passthrough.py +3 -3
  51. langchain_core/runnables/retry.py +1 -1
  52. langchain_core/runnables/router.py +1 -1
  53. langchain_core/structured_query.py +3 -7
  54. langchain_core/tools/base.py +14 -41
  55. langchain_core/tools/convert.py +2 -22
  56. langchain_core/tools/retriever.py +1 -8
  57. langchain_core/tools/structured.py +2 -10
  58. langchain_core/tracers/_streaming.py +6 -7
  59. langchain_core/tracers/base.py +7 -14
  60. langchain_core/tracers/core.py +4 -27
  61. langchain_core/tracers/event_stream.py +4 -15
  62. langchain_core/tracers/langchain.py +3 -14
  63. langchain_core/tracers/log_stream.py +2 -3
  64. langchain_core/utils/_merge.py +45 -7
  65. langchain_core/utils/function_calling.py +22 -9
  66. langchain_core/utils/utils.py +29 -0
  67. langchain_core/version.py +1 -1
  68. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a1.dist-info}/METADATA +7 -9
  69. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a1.dist-info}/RECORD +71 -64
  70. langchain_core/v1/__init__.py +0 -1
  71. langchain_core/v1/chat_models.py +0 -1047
  72. langchain_core/v1/messages.py +0 -755
  73. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a1.dist-info}/WHEEL +0 -0
  74. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a1.dist-info}/entry_points.txt +0 -0
@@ -4,132 +4,144 @@
4
4
  This module is under active development. The API is unstable and subject to
5
5
  change in future releases.
6
6
 
7
- This module provides a standardized data structure for representing inputs to and
8
- outputs from Large Language Models. The core abstraction is the **Content Block**, a
9
- ``TypedDict`` that can represent a piece of text, an image, a tool call, or other
10
- structured data.
7
+ This module provides standardized data structures for representing inputs to and
8
+ outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict``.
9
+
10
+ **Rationale**
11
+
12
+ Different LLM providers use distinct and incompatible API schemas. This module
13
+ provides a unified, provider-agnostic format to facilitate these interactions. A
14
+ message to or from a model is simply a list of content blocks, allowing for the natural
15
+ interleaving of text, images, and other content in a single ordered sequence.
16
+
17
+ An adapter for a specific provider is responsible for translating this standard list of
18
+ blocks into the format required by its API.
19
+
20
+ **Extensibility**
11
21
 
12
22
  Data **not yet mapped** to a standard block may be represented using the
13
23
  ``NonStandardContentBlock``, which allows for provider-specific data to be included
14
24
  without losing the benefits of type checking and validation.
15
25
 
16
26
  Furthermore, provider-specific fields **within** a standard block are fully supported
17
- by default. However, since current type checkers do not recognize this, we are temporarily
18
- applying type ignore comments to suppress warnings. In the future,
19
- `PEP 728 <https://peps.python.org/pep-0728/>`__ will add an extra param, ``extra_items=Any``.
20
- When this is supported, we will apply it to block signatures to signify to type checkers
21
- that additional provider-specific fields are allowed.
22
-
23
- **Example with PEP 728 provider-specific fields:**
27
+ by default in the ``extras`` field of each block. This allows for additional metadata
28
+ to be included without breaking the standard structure.
24
29
 
25
- .. code-block:: python
30
+ .. warning::
31
+ Do not heavily rely on the ``extras`` field for provider-specific data! This field
32
+ is subject to deprecation in future releases as we move towards PEP 728.
26
33
 
27
- # Note `extra_items=Any`
28
- class TextContentBlock(TypedDict, extra_items=Any):
29
- type: Literal["text"]
30
- id: NotRequired[str]
31
- text: str
32
- annotations: NotRequired[list[Annotation]]
33
- index: NotRequired[int]
34
+ .. note::
35
+ Following widespread adoption of `PEP 728 <https://peps.python.org/pep-0728/>`__, we
36
+ will add ``extra_items=Any`` as a param to Content Blocks. This will signify to type
37
+ checkers that additional provider-specific fields are allowed outside of the
38
+ ``extras`` field, and that will become the new standard approach to adding
39
+ provider-specific metadata.
34
40
 
35
- .. code-block:: python
41
+ .. dropdown::
36
42
 
37
- from langchain_core.messages.content_blocks import TextContentBlock
43
+ **Example with PEP 728 provider-specific fields:**
38
44
 
39
- my_block: TextContentBlock = {
40
- # Add required fields
41
- "type": "text",
42
- "text": "Hello, world!",
43
- # Additional fields not specified in the TypedDict
44
- # These are valid with PEP 728 and are typed as Any
45
- "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
46
- "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
47
- "custom_field": "any value",
48
- }
45
+ .. code-block:: python
49
46
 
50
- openai_data = my_block["openai_metadata"] # Type: Any
47
+ # Content block definition
48
+ # NOTE: `extra_items=Any`
49
+ class TextContentBlock(TypedDict, extra_items=Any):
50
+ type: Literal["text"]
51
+ id: NotRequired[str]
52
+ text: str
53
+ annotations: NotRequired[list[Annotation]]
54
+ index: NotRequired[int]
51
55
 
52
- .. note::
53
- PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings
54
- from type checkers that don't yet support it. The functionality works correctly
55
- in Python 3.13+ and will be fully supported as the ecosystem catches up.
56
+ .. code-block:: python
56
57
 
57
- **Rationale**
58
+ from langchain_core.messages.content import TextContentBlock
59
+
60
+ # Create a text content block with provider-specific fields
61
+ my_block: TextContentBlock = {
62
+ # Add required fields
63
+ "type": "text",
64
+ "text": "Hello, world!",
65
+ # Additional fields not specified in the TypedDict
66
+ # These are valid with PEP 728 and are typed as Any
67
+ "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
68
+ "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
69
+ "custom_field": "any value",
70
+ }
58
71
 
59
- Different LLM providers use distinct and incompatible API schemas. This module
60
- introduces a unified, provider-agnostic format to standardize these interactions. A
61
- message to or from a model is simply a `list` of `ContentBlock` objects, allowing for
62
- the natural interleaving of text, images, and other content in a single, ordered
63
- sequence.
72
+ # Mutating an existing block to add provider-specific fields
73
+ openai_data = my_block["openai_metadata"] # Type: Any
64
74
 
65
- An adapter for a specific provider is responsible for translating this standard list of
66
- blocks into the format required by its API.
75
+ PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress
76
+ warnings from type checkers that don't yet support it. The functionality works
77
+ correctly in Python 3.13+ and will be fully supported as the ecosystem catches
78
+ up.
67
79
 
68
80
  **Key Block Types**
69
81
 
70
82
  The module defines several types of content blocks, including:
71
83
 
72
- - ``TextContentBlock``: Standard text.
73
- - ``ImageContentBlock``, ``Audio...``, ``Video...``, ``PlainText...``, ``File...``: For multimodal data.
74
- - ``ToolCallContentBlock``, ``ToolOutputContentBlock``: For function calling.
84
+ - ``TextContentBlock``: Standard text output.
85
+ - ``Citation``: For annotations that link text output to a source document.
86
+ - ``ToolCallContentBlock``: For function calling.
75
87
  - ``ReasoningContentBlock``: To capture a model's thought process.
76
- - ``Citation``: For annotations that link generated text to a source document.
88
+ - Multimodal data:
89
+ - ``ImageContentBlock``
90
+ - ``AudioContentBlock``
91
+ - ``VideoContentBlock``
92
+ - ``PlainTextContentBlock`` (e.g. .txt or .md files)
93
+ - ``FileContentBlock`` (e.g. PDFs, etc.)
77
94
 
78
95
  **Example Usage**
79
96
 
80
97
  .. code-block:: python
81
98
 
82
99
  # Direct construction:
83
- from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock
84
-
85
- multimodal_message: AIMessage = [
86
- TextContentBlock(type="text", text="What is shown in this image?"),
87
- ImageContentBlock(
88
- type="image",
89
- url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
90
- mime_type="image/png",
91
- ),
92
- ]
93
-
94
- from langchain_core.messages.content_blocks import create_text_block, create_image_block
95
-
96
- # Using factory functions:
97
- multimodal_message: AIMessage = [
98
- create_text_block("What is shown in this image?"),
99
- create_image_block(
100
- url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
101
- mime_type="image/png",
102
- ),
103
- ]
104
- """ # noqa: E501
105
-
106
- import warnings
107
- from typing import Any, Literal, Optional, Union
108
- from uuid import uuid4
100
+ from langchain_core.messages.content import TextContentBlock, ImageContentBlock
101
+
102
+ multimodal_message: AIMessage(content_blocks=
103
+ [
104
+ TextContentBlock(type="text", text="What is shown in this image?"),
105
+ ImageContentBlock(
106
+ type="image",
107
+ url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
108
+ mime_type="image/png",
109
+ ),
110
+ ]
111
+ )
109
112
 
110
- from typing_extensions import NotRequired, TypedDict, get_args, get_origin
113
+ # Using factories:
114
+ from langchain_core.messages.content import create_text_block, create_image_block
115
+
116
+ multimodal_message: AIMessage(content=
117
+ [
118
+ create_text_block("What is shown in this image?"),
119
+ create_image_block(
120
+ url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
121
+ mime_type="image/png",
122
+ ),
123
+ ]
124
+ )
111
125
 
126
+ Factory functions offer benefits such as:
127
+ - Automatic ID generation (when not provided)
128
+ - No need to manually specify the ``type`` field
112
129
 
113
- def _ensure_id(id_val: Optional[str]) -> str:
114
- """Ensure the ID is a valid string, generating a new UUID if not provided.
130
+ """
115
131
 
116
- Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are
117
- LangChain-generated IDs.
132
+ import warnings
133
+ from typing import Any, Literal, Optional, Union, get_args, get_type_hints
118
134
 
119
- Args:
120
- id_val: Optional string ID value to validate.
135
+ from typing_extensions import NotRequired, TypedDict, TypeGuard
121
136
 
122
- Returns:
123
- A valid string ID, either the provided value or a new UUID.
124
- """
125
- return id_val or str(f"lc_{uuid4()}")
137
+ from langchain_core.utils.utils import ensure_id
126
138
 
127
139
 
128
140
  class Citation(TypedDict):
129
141
  """Annotation for citing data from a document.
130
142
 
131
143
  .. note::
132
- ``start/end`` indices refer to the **response text**,
144
+ ``start``/``end`` indices refer to the **response text**,
133
145
  not the source text. This means that the indices are relative to the model's
134
146
  response, not the original document (as specified in the ``url``).
135
147
 
@@ -150,18 +162,12 @@ class Citation(TypedDict):
150
162
 
151
163
  - Generated by the provider (e.g., OpenAI's file ID)
152
164
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
165
+
153
166
  """
154
167
 
155
168
  url: NotRequired[str]
156
169
  """URL of the document source."""
157
170
 
158
- # For future consideration, if needed:
159
- # provenance: NotRequired[str]
160
- # """Provenance of the document, e.g., "Wikipedia", "arXiv", etc.
161
-
162
- # Included for future compatibility; not currently implemented.
163
- # """
164
-
165
171
  title: NotRequired[str]
166
172
  """Source document title.
167
173
 
@@ -169,12 +175,10 @@ class Citation(TypedDict):
169
175
  """
170
176
 
171
177
  start_index: NotRequired[int]
172
- """Start index of the **response text** (``TextContentBlock.text``) for which the
173
- annotation applies."""
178
+ """Start index of the **response text** (``TextContentBlock.text``)."""
174
179
 
175
180
  end_index: NotRequired[int]
176
- """End index of the **response text** (``TextContentBlock.text``) for which the
177
- annotation applies."""
181
+ """End index of the **response text** (``TextContentBlock.text``)"""
178
182
 
179
183
  cited_text: NotRequired[str]
180
184
  """Excerpt of source text being cited."""
@@ -196,10 +200,12 @@ class NonStandardAnnotation(TypedDict):
196
200
  """Type of the content block. Used for discrimination."""
197
201
 
198
202
  id: NotRequired[str]
199
- """Content block identifier. Either:
203
+ """Content block identifier.
200
204
 
205
+ Either:
201
206
  - Generated by the provider (e.g., OpenAI's file ID)
202
207
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
208
+
203
209
  """
204
210
 
205
211
  value: dict[str, Any]
@@ -228,19 +234,21 @@ class TextContentBlock(TypedDict):
228
234
  """Type of the content block. Used for discrimination."""
229
235
 
230
236
  id: NotRequired[str]
231
- """Content block identifier. Either:
237
+ """Content block identifier.
232
238
 
239
+ Either:
233
240
  - Generated by the provider (e.g., OpenAI's file ID)
234
241
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
242
+
235
243
  """
236
244
 
237
245
  text: str
238
246
  """Block text."""
239
247
 
240
248
  annotations: NotRequired[list[Annotation]]
241
- """Citations and other annotations."""
249
+ """``Citation``s and other annotations."""
242
250
 
243
- index: NotRequired[int]
251
+ index: NotRequired[Union[int, str]]
244
252
  """Index of block in aggregate response. Used during streaming."""
245
253
 
246
254
  extras: NotRequired[dict[str, Any]]
@@ -280,6 +288,7 @@ class ToolCall(TypedDict):
280
288
 
281
289
  An identifier is needed to associate a tool call request with a tool
282
290
  call result in events when multiple concurrent tool calls are made.
291
+
283
292
  """
284
293
  # TODO: Consider making this NotRequired[str] in the future.
285
294
 
@@ -289,7 +298,7 @@ class ToolCall(TypedDict):
289
298
  args: dict[str, Any]
290
299
  """The arguments to the tool call."""
291
300
 
292
- index: NotRequired[int]
301
+ index: NotRequired[Union[int, str]]
293
302
  """Index of block in aggregate response. Used during streaming."""
294
303
 
295
304
  extras: NotRequired[dict[str, Any]]
@@ -299,7 +308,7 @@ class ToolCall(TypedDict):
299
308
  class ToolCallChunk(TypedDict):
300
309
  """A chunk of a tool call (e.g., as part of a stream).
301
310
 
302
- When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
311
+ When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``),
303
312
  all string attributes are concatenated. Chunks are only merged if their
304
313
  values of ``index`` are equal and not ``None``.
305
314
 
@@ -314,15 +323,21 @@ class ToolCallChunk(TypedDict):
314
323
  AIMessageChunk(content="", tool_call_chunks=left_chunks)
315
324
  + AIMessageChunk(content="", tool_call_chunks=right_chunks)
316
325
  ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)]
326
+
317
327
  """
318
328
 
319
329
  # TODO: Consider making fields NotRequired[str] in the future.
320
330
 
321
- type: NotRequired[Literal["tool_call_chunk"]]
331
+ type: Literal["tool_call_chunk"]
322
332
  """Used for serialization."""
323
333
 
324
334
  id: Optional[str]
325
- """An identifier associated with the tool call."""
335
+ """An identifier associated with the tool call.
336
+
337
+ An identifier is needed to associate a tool call request with a tool
338
+ call result in events when multiple concurrent tool calls are made.
339
+
340
+ """
326
341
 
327
342
  name: Optional[str]
328
343
  """The name of the tool to be called."""
@@ -330,7 +345,7 @@ class ToolCallChunk(TypedDict):
330
345
  args: Optional[str]
331
346
  """The arguments to the tool call."""
332
347
 
333
- index: Optional[int]
348
+ index: NotRequired[Union[int, str]]
334
349
  """The index of the tool call in a sequence."""
335
350
 
336
351
  extras: NotRequired[dict[str, Any]]
@@ -342,6 +357,7 @@ class InvalidToolCall(TypedDict):
342
357
 
343
358
  Here we add an ``error`` key to surface errors made during generation
344
359
  (e.g., invalid JSON arguments.)
360
+
345
361
  """
346
362
 
347
363
  # TODO: Consider making fields NotRequired[str] in the future.
@@ -350,7 +366,12 @@ class InvalidToolCall(TypedDict):
350
366
  """Used for discrimination."""
351
367
 
352
368
  id: Optional[str]
353
- """An identifier associated with the tool call."""
369
+ """An identifier associated with the tool call.
370
+
371
+ An identifier is needed to associate a tool call request with a tool
372
+ call result in events when multiple concurrent tool calls are made.
373
+
374
+ """
354
375
 
355
376
  name: Optional[str]
356
377
  """The name of the tool to be called."""
@@ -361,15 +382,13 @@ class InvalidToolCall(TypedDict):
361
382
  error: Optional[str]
362
383
  """An error message associated with the tool call."""
363
384
 
364
- index: NotRequired[int]
385
+ index: NotRequired[Union[int, str]]
365
386
  """Index of block in aggregate response. Used during streaming."""
366
387
 
367
388
  extras: NotRequired[dict[str, Any]]
368
389
  """Provider-specific metadata."""
369
390
 
370
391
 
371
- # Note: These are not standard tool calls, but rather provider-specific built-in tools.
372
- # Web search
373
392
  class WebSearchCall(TypedDict):
374
393
  """Built-in web search tool call."""
375
394
 
@@ -377,16 +396,18 @@ class WebSearchCall(TypedDict):
377
396
  """Type of the content block. Used for discrimination."""
378
397
 
379
398
  id: NotRequired[str]
380
- """Content block identifier. Either:
399
+ """Content block identifier.
381
400
 
401
+ Either:
382
402
  - Generated by the provider (e.g., OpenAI's file ID)
383
403
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
404
+
384
405
  """
385
406
 
386
407
  query: NotRequired[str]
387
408
  """The search query used in the web search tool call."""
388
409
 
389
- index: NotRequired[int]
410
+ index: NotRequired[Union[int, str]]
390
411
  """Index of block in aggregate response. Used during streaming."""
391
412
 
392
413
  extras: NotRequired[dict[str, Any]]
@@ -400,16 +421,18 @@ class WebSearchResult(TypedDict):
400
421
  """Type of the content block. Used for discrimination."""
401
422
 
402
423
  id: NotRequired[str]
403
- """Content block identifier. Either:
424
+ """Content block identifier.
404
425
 
426
+ Either:
405
427
  - Generated by the provider (e.g., OpenAI's file ID)
406
428
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
429
+
407
430
  """
408
431
 
409
432
  urls: NotRequired[list[str]]
410
433
  """List of URLs returned by the web search tool call."""
411
434
 
412
- index: NotRequired[int]
435
+ index: NotRequired[Union[int, str]]
413
436
  """Index of block in aggregate response. Used during streaming."""
414
437
 
415
438
  extras: NotRequired[dict[str, Any]]
@@ -423,10 +446,12 @@ class CodeInterpreterCall(TypedDict):
423
446
  """Type of the content block. Used for discrimination."""
424
447
 
425
448
  id: NotRequired[str]
426
- """Content block identifier. Either:
449
+ """Content block identifier.
427
450
 
451
+ Either:
428
452
  - Generated by the provider (e.g., OpenAI's file ID)
429
453
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
454
+
430
455
  """
431
456
 
432
457
  language: NotRequired[str]
@@ -435,7 +460,7 @@ class CodeInterpreterCall(TypedDict):
435
460
  code: NotRequired[str]
436
461
  """The code to be executed by the code interpreter."""
437
462
 
438
- index: NotRequired[int]
463
+ index: NotRequired[Union[int, str]]
439
464
  """Index of block in aggregate response. Used during streaming."""
440
465
 
441
466
  extras: NotRequired[dict[str, Any]]
@@ -447,22 +472,26 @@ class CodeInterpreterOutput(TypedDict):
447
472
 
448
473
  Full output of a code interpreter tool call is represented by
449
474
  ``CodeInterpreterResult`` which is a list of these blocks.
475
+
450
476
  """
451
477
 
452
478
  type: Literal["code_interpreter_output"]
453
479
  """Type of the content block. Used for discrimination."""
454
480
 
455
481
  id: NotRequired[str]
456
- """Content block identifier. Either:
482
+ """Content block identifier.
457
483
 
484
+ Either:
458
485
  - Generated by the provider (e.g., OpenAI's file ID)
459
486
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
487
+
460
488
  """
461
489
 
462
490
  return_code: NotRequired[int]
463
491
  """Return code of the executed code.
464
492
 
465
493
  Example: ``0`` for success, non-zero for failure.
494
+
466
495
  """
467
496
 
468
497
  stderr: NotRequired[str]
@@ -474,12 +503,6 @@ class CodeInterpreterOutput(TypedDict):
474
503
  file_ids: NotRequired[list[str]]
475
504
  """List of file IDs generated by the code interpreter."""
476
505
 
477
- index: NotRequired[int]
478
- """Index of block in aggregate response. Used during streaming."""
479
-
480
- extras: NotRequired[dict[str, Any]]
481
- """Provider-specific metadata."""
482
-
483
506
 
484
507
  class CodeInterpreterResult(TypedDict):
485
508
  """Result of a code interpreter tool call."""
@@ -488,16 +511,18 @@ class CodeInterpreterResult(TypedDict):
488
511
  """Type of the content block. Used for discrimination."""
489
512
 
490
513
  id: NotRequired[str]
491
- """Content block identifier. Either:
514
+ """Content block identifier.
492
515
 
516
+ Either:
493
517
  - Generated by the provider (e.g., OpenAI's file ID)
494
518
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
519
+
495
520
  """
496
521
 
497
522
  output: list[CodeInterpreterOutput]
498
523
  """List of outputs from the code interpreter tool call."""
499
524
 
500
- index: NotRequired[int]
525
+ index: NotRequired[Union[int, str]]
501
526
  """Index of block in aggregate response. Used during streaming."""
502
527
 
503
528
  extras: NotRequired[dict[str, Any]]
@@ -520,10 +545,12 @@ class ReasoningContentBlock(TypedDict):
520
545
  """Type of the content block. Used for discrimination."""
521
546
 
522
547
  id: NotRequired[str]
523
- """Content block identifier. Either:
548
+ """Content block identifier.
524
549
 
550
+ Either:
525
551
  - Generated by the provider (e.g., OpenAI's file ID)
526
552
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
553
+
527
554
  """
528
555
 
529
556
  reasoning: NotRequired[str]
@@ -531,9 +558,10 @@ class ReasoningContentBlock(TypedDict):
531
558
 
532
559
  Either the thought summary or the raw reasoning text itself. This is often parsed
533
560
  from ``<think>`` tags in the model's response.
561
+
534
562
  """
535
563
 
536
- index: NotRequired[int]
564
+ index: NotRequired[Union[int, str]]
537
565
  """Index of block in aggregate response. Used during streaming."""
538
566
 
539
567
  extras: NotRequired[dict[str, Any]]
@@ -559,10 +587,12 @@ class ImageContentBlock(TypedDict):
559
587
  """Type of the content block. Used for discrimination."""
560
588
 
561
589
  id: NotRequired[str]
562
- """Content block identifier. Either:
590
+ """Content block identifier.
563
591
 
592
+ Either:
564
593
  - Generated by the provider (e.g., OpenAI's file ID)
565
594
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
595
+
566
596
  """
567
597
 
568
598
  file_id: NotRequired[str]
@@ -572,9 +602,10 @@ class ImageContentBlock(TypedDict):
572
602
  """MIME type of the image. Required for base64.
573
603
 
574
604
  `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#image>`__
605
+
575
606
  """
576
607
 
577
- index: NotRequired[int]
608
+ index: NotRequired[Union[int, str]]
578
609
  """Index of block in aggregate response. Used during streaming."""
579
610
 
580
611
  url: NotRequired[str]
@@ -584,7 +615,7 @@ class ImageContentBlock(TypedDict):
584
615
  """Data as a base64 string."""
585
616
 
586
617
  extras: NotRequired[dict[str, Any]]
587
- """Provider-specific metadata."""
618
+ """Provider-specific metadata. This shouldn't be used for the image data itself."""
588
619
 
589
620
 
590
621
  class VideoContentBlock(TypedDict):
@@ -603,10 +634,12 @@ class VideoContentBlock(TypedDict):
603
634
  """Type of the content block. Used for discrimination."""
604
635
 
605
636
  id: NotRequired[str]
606
- """Content block identifier. Either:
637
+ """Content block identifier.
607
638
 
639
+ Either:
608
640
  - Generated by the provider (e.g., OpenAI's file ID)
609
641
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
642
+
610
643
  """
611
644
 
612
645
  file_id: NotRequired[str]
@@ -616,9 +649,10 @@ class VideoContentBlock(TypedDict):
616
649
  """MIME type of the video. Required for base64.
617
650
 
618
651
  `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#video>`__
652
+
619
653
  """
620
654
 
621
- index: NotRequired[int]
655
+ index: NotRequired[Union[int, str]]
622
656
  """Index of block in aggregate response. Used during streaming."""
623
657
 
624
658
  url: NotRequired[str]
@@ -628,7 +662,7 @@ class VideoContentBlock(TypedDict):
628
662
  """Data as a base64 string."""
629
663
 
630
664
  extras: NotRequired[dict[str, Any]]
631
- """Provider-specific metadata."""
665
+ """Provider-specific metadata. This shouldn't be used for the video data itself."""
632
666
 
633
667
 
634
668
  class AudioContentBlock(TypedDict):
@@ -637,7 +671,6 @@ class AudioContentBlock(TypedDict):
637
671
  .. note::
638
672
  ``create_audio_block`` may also be used as a factory to create an
639
673
  ``AudioContentBlock``. Benefits include:
640
-
641
674
  * Automatic ID generation (when not provided)
642
675
  * Required arguments strictly validated at creation time
643
676
 
@@ -647,10 +680,12 @@ class AudioContentBlock(TypedDict):
647
680
  """Type of the content block. Used for discrimination."""
648
681
 
649
682
  id: NotRequired[str]
650
- """Content block identifier. Either:
683
+ """Content block identifier.
651
684
 
685
+ Either:
652
686
  - Generated by the provider (e.g., OpenAI's file ID)
653
687
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
688
+
654
689
  """
655
690
 
656
691
  file_id: NotRequired[str]
@@ -660,9 +695,10 @@ class AudioContentBlock(TypedDict):
660
695
  """MIME type of the audio. Required for base64.
661
696
 
662
697
  `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#audio>`__
698
+
663
699
  """
664
700
 
665
- index: NotRequired[int]
701
+ index: NotRequired[Union[int, str]]
666
702
  """Index of block in aggregate response. Used during streaming."""
667
703
 
668
704
  url: NotRequired[str]
@@ -672,7 +708,7 @@ class AudioContentBlock(TypedDict):
672
708
  """Data as a base64 string."""
673
709
 
674
710
  extras: NotRequired[dict[str, Any]]
675
- """Provider-specific metadata."""
711
+ """Provider-specific metadata. This shouldn't be used for the audio data itself."""
676
712
 
677
713
 
678
714
  class PlainTextContentBlock(TypedDict):
@@ -695,10 +731,12 @@ class PlainTextContentBlock(TypedDict):
695
731
  """Type of the content block. Used for discrimination."""
696
732
 
697
733
  id: NotRequired[str]
698
- """Content block identifier. Either:
734
+ """Content block identifier.
699
735
 
736
+ Either:
700
737
  - Generated by the provider (e.g., OpenAI's file ID)
701
738
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
739
+
702
740
  """
703
741
 
704
742
  file_id: NotRequired[str]
@@ -707,7 +745,7 @@ class PlainTextContentBlock(TypedDict):
707
745
  mime_type: Literal["text/plain"]
708
746
  """MIME type of the file. Required for base64."""
709
747
 
710
- index: NotRequired[int]
748
+ index: NotRequired[Union[int, str]]
711
749
  """Index of block in aggregate response. Used during streaming."""
712
750
 
713
751
  url: NotRequired[str]
@@ -726,7 +764,7 @@ class PlainTextContentBlock(TypedDict):
726
764
  """Context for the text, e.g., a description or summary of the text's content."""
727
765
 
728
766
  extras: NotRequired[dict[str, Any]]
729
- """Provider-specific metadata."""
767
+ """Provider-specific metadata. This shouldn't be used for the data itself."""
730
768
 
731
769
 
732
770
  class FileContentBlock(TypedDict):
@@ -752,10 +790,12 @@ class FileContentBlock(TypedDict):
752
790
  """Type of the content block. Used for discrimination."""
753
791
 
754
792
  id: NotRequired[str]
755
- """Content block identifier. Either:
793
+ """Content block identifier.
756
794
 
795
+ Either:
757
796
  - Generated by the provider (e.g., OpenAI's file ID)
758
797
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
798
+
759
799
  """
760
800
 
761
801
  file_id: NotRequired[str]
@@ -765,9 +805,10 @@ class FileContentBlock(TypedDict):
765
805
  """MIME type of the file. Required for base64.
766
806
 
767
807
  `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml>`__
808
+
768
809
  """
769
810
 
770
- index: NotRequired[int]
811
+ index: NotRequired[Union[int, str]]
771
812
  """Index of block in aggregate response. Used during streaming."""
772
813
 
773
814
  url: NotRequired[str]
@@ -777,7 +818,7 @@ class FileContentBlock(TypedDict):
777
818
  """Data as a base64 string."""
778
819
 
779
820
  extras: NotRequired[dict[str, Any]]
780
- """Provider-specific metadata."""
821
+ """Provider-specific metadata. This shouldn't be used for the file data itself."""
781
822
 
782
823
 
783
824
  # Future modalities to consider:
@@ -793,7 +834,10 @@ class NonStandardContentBlock(TypedDict):
793
834
  The purpose of this block should be to simply hold a provider-specific payload.
794
835
  If a provider's non-standard output includes reasoning and tool calls, it should be
795
836
  the adapter's job to parse that payload and emit the corresponding standard
796
- ReasoningContentBlock and ToolCallContentBlocks.
837
+ ``ReasoningContentBlock`` and ``ToolCallContentBlocks``.
838
+
839
+ Has no ``extras`` field, as provider-specific data should be included in the
840
+ ``value`` field.
797
841
 
798
842
  .. note::
799
843
  ``create_non_standard_block`` may also be used as a factory to create a
@@ -808,16 +852,18 @@ class NonStandardContentBlock(TypedDict):
808
852
  """Type of the content block. Used for discrimination."""
809
853
 
810
854
  id: NotRequired[str]
811
- """Content block identifier. Either:
855
+ """Content block identifier.
812
856
 
857
+ Either:
813
858
  - Generated by the provider (e.g., OpenAI's file ID)
814
859
  - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
860
+
815
861
  """
816
862
 
817
863
  value: dict[str, Any]
818
864
  """Provider-specific data."""
819
865
 
820
- index: NotRequired[int]
866
+ index: NotRequired[Union[int, str]]
821
867
  """Index of block in aggregate response. Used during streaming."""
822
868
 
823
869
 
@@ -832,8 +878,8 @@ DataContentBlock = Union[
832
878
 
833
879
  ToolContentBlock = Union[
834
880
  ToolCall,
881
+ ToolCallChunk,
835
882
  CodeInterpreterCall,
836
- CodeInterpreterOutput,
837
883
  CodeInterpreterResult,
838
884
  WebSearchCall,
839
885
  WebSearchResult,
@@ -841,9 +887,7 @@ ToolContentBlock = Union[
841
887
 
842
888
  ContentBlock = Union[
843
889
  TextContentBlock,
844
- ToolCall,
845
890
  InvalidToolCall,
846
- ToolCallChunk,
847
891
  ReasoningContentBlock,
848
892
  NonStandardContentBlock,
849
893
  DataContentBlock,
@@ -851,68 +895,106 @@ ContentBlock = Union[
851
895
  ]
852
896
 
853
897
 
854
- def _extract_typedict_type_values(union_type: Any) -> set[str]:
855
- """Extract the values of the 'type' field from a TypedDict union type."""
856
- result: set[str] = set()
857
- for value in get_args(union_type):
858
- annotation = value.__annotations__["type"]
859
- if get_origin(annotation) is Literal:
860
- result.update(get_args(annotation))
861
- else:
862
- msg = f"{value} 'type' is not a Literal"
863
- raise ValueError(msg)
864
- return result
865
-
866
-
867
898
  KNOWN_BLOCK_TYPES = {
899
+ # Text output
868
900
  "text",
869
- "text-plain",
901
+ "reasoning",
902
+ # Tools
870
903
  "tool_call",
871
904
  "invalid_tool_call",
872
905
  "tool_call_chunk",
873
- "reasoning",
874
- "non_standard",
906
+ # Multimodal data
875
907
  "image",
876
908
  "audio",
877
909
  "file",
910
+ "text-plain",
878
911
  "video",
912
+ # Server-side tool calls
879
913
  "code_interpreter_call",
880
- "code_interpreter_output",
881
914
  "code_interpreter_result",
882
915
  "web_search_call",
883
916
  "web_search_result",
917
+ # Catch-all
918
+ "non_standard",
884
919
  }
885
920
 
886
921
 
922
+ def _get_data_content_block_types() -> tuple[str, ...]:
923
+ """Get type literals from DataContentBlock union members dynamically."""
924
+ data_block_types = []
925
+
926
+ for block_type in get_args(DataContentBlock):
927
+ hints = get_type_hints(block_type)
928
+ if "type" in hints:
929
+ type_annotation = hints["type"]
930
+ if hasattr(type_annotation, "__args__"):
931
+ # This is a Literal type, get the literal value
932
+ literal_value = type_annotation.__args__[0]
933
+ data_block_types.append(literal_value)
934
+
935
+ return tuple(data_block_types)
936
+
937
+
887
938
  def is_data_content_block(block: dict) -> bool:
888
- """Check if the content block is a standard data content block.
939
+ """Check if the provided content block is a standard v1 data content block.
889
940
 
890
941
  Args:
891
942
  block: The content block to check.
892
943
 
893
944
  Returns:
894
945
  True if the content block is a data content block, False otherwise.
946
+
895
947
  """
896
- return block.get("type") in (
897
- "audio",
898
- "image",
899
- "video",
900
- "file",
901
- "text-plain",
902
- ) and any(
903
- key in block
904
- for key in (
905
- "url",
906
- "base64",
907
- "file_id",
908
- "text",
909
- "source_type", # backwards compatibility
910
- )
911
- )
948
+ if block.get("type") not in _get_data_content_block_types():
949
+ return False
950
+
951
+ if any(key in block for key in ("url", "base64", "file_id", "text")):
952
+ return True
953
+
954
+ # Verify data presence based on source type
955
+ if "source_type" in block:
956
+ source_type = block["source_type"]
957
+ if (source_type == "url" and "url" in block) or (
958
+ source_type == "base64" and "data" in block
959
+ ):
960
+ return True
961
+ if (source_type == "id" and "id" in block) or (
962
+ source_type == "text" and "url" in block
963
+ ):
964
+ return True
965
+
966
+ return False
967
+
968
+
969
+ def is_tool_call_block(block: ContentBlock) -> TypeGuard[ToolCall]:
970
+ """Type guard to check if a content block is a ``ToolCall``."""
971
+ return block.get("type") == "tool_call"
972
+
973
+
974
+ def is_tool_call_chunk(block: ContentBlock) -> TypeGuard[ToolCallChunk]:
975
+ """Type guard to check if a content block is a ``ToolCallChunk``."""
976
+ return block.get("type") == "tool_call_chunk"
977
+
978
+
979
+ def is_text_block(block: ContentBlock) -> TypeGuard[TextContentBlock]:
980
+ """Type guard to check if a content block is a ``TextContentBlock``."""
981
+ return block.get("type") == "text"
982
+
983
+
984
+ def is_reasoning_block(block: ContentBlock) -> TypeGuard[ReasoningContentBlock]:
985
+ """Type guard to check if a content block is a ``ReasoningContentBlock``."""
986
+ return block.get("type") == "reasoning"
987
+
988
+
989
+ def is_invalid_tool_call_block(
990
+ block: ContentBlock,
991
+ ) -> TypeGuard[InvalidToolCall]:
992
+ """Type guard to check if a content block is an ``InvalidToolCall``."""
993
+ return block.get("type") == "invalid_tool_call"
912
994
 
913
995
 
914
996
  def convert_to_openai_image_block(block: dict[str, Any]) -> dict:
915
- """Convert image content block to format expected by OpenAI Chat Completions API."""
997
+ """Convert ``ImageContentBlock`` to format expected by OpenAI Chat Completions."""
916
998
  if "url" in block:
917
999
  return {
918
1000
  "type": "image_url",
@@ -943,12 +1025,17 @@ def convert_to_openai_data_block(block: dict) -> dict:
943
1025
 
944
1026
  elif block["type"] == "file":
945
1027
  if "base64" in block or block.get("source_type") == "base64":
1028
+ # Handle v0 format: {"source_type": "base64", "data": "...", ...}
1029
+ # Handle v1 format: {"base64": "...", ...}
946
1030
  base64_data = block["data"] if "source_type" in block else block["base64"]
947
1031
  file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"}
948
1032
  if filename := block.get("filename"):
949
1033
  file["filename"] = filename
950
- elif (metadata := block.get("metadata")) and ("filename" in metadata):
951
- file["filename"] = metadata["filename"]
1034
+ elif (extras := block.get("extras")) and ("filename" in extras):
1035
+ file["filename"] = extras["filename"]
1036
+ elif (extras := block.get("metadata")) and ("filename" in extras):
1037
+ # Backward compat
1038
+ file["filename"] = extras["filename"]
952
1039
  else:
953
1040
  warnings.warn(
954
1041
  "OpenAI may require a filename for file inputs. Specify a filename "
@@ -958,6 +1045,8 @@ def convert_to_openai_data_block(block: dict) -> dict:
958
1045
  )
959
1046
  formatted_block = {"type": "file", "file": file}
960
1047
  elif "file_id" in block or block.get("source_type") == "id":
1048
+ # Handle v0 format: {"source_type": "id", "id": "...", ...}
1049
+ # Handle v1 format: {"file_id": "...", ...}
961
1050
  file_id = block["id"] if "source_type" in block else block["file_id"]
962
1051
  formatted_block = {"type": "file", "file": {"file_id": file_id}}
963
1052
  else:
@@ -966,6 +1055,8 @@ def convert_to_openai_data_block(block: dict) -> dict:
966
1055
 
967
1056
  elif block["type"] == "audio":
968
1057
  if "base64" in block or block.get("source_type") == "base64":
1058
+ # Handle v0 format: {"source_type": "base64", "data": "...", ...}
1059
+ # Handle v1 format: {"base64": "...", ...}
969
1060
  base64_data = block["data"] if "source_type" in block else block["base64"]
970
1061
  audio_format = block["mime_type"].split("/")[-1]
971
1062
  formatted_block = {
@@ -987,14 +1078,15 @@ def create_text_block(
987
1078
  *,
988
1079
  id: Optional[str] = None,
989
1080
  annotations: Optional[list[Annotation]] = None,
990
- index: Optional[int] = None,
1081
+ index: Optional[Union[int, str]] = None,
1082
+ **kwargs: Any,
991
1083
  ) -> TextContentBlock:
992
1084
  """Create a ``TextContentBlock``.
993
1085
 
994
1086
  Args:
995
1087
  text: The text content of the block.
996
1088
  id: Content block identifier. Generated automatically if not provided.
997
- annotations: Citations and other annotations for the text.
1089
+ annotations: ``Citation``s and other annotations for the text.
998
1090
  index: Index of block in aggregate response. Used during streaming.
999
1091
 
1000
1092
  Returns:
@@ -1008,12 +1100,17 @@ def create_text_block(
1008
1100
  block = TextContentBlock(
1009
1101
  type="text",
1010
1102
  text=text,
1011
- id=_ensure_id(id),
1103
+ id=ensure_id(id),
1012
1104
  )
1013
1105
  if annotations is not None:
1014
1106
  block["annotations"] = annotations
1015
1107
  if index is not None:
1016
1108
  block["index"] = index
1109
+
1110
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1111
+ if extras:
1112
+ block["extras"] = extras
1113
+
1017
1114
  return block
1018
1115
 
1019
1116
 
@@ -1024,7 +1121,8 @@ def create_image_block(
1024
1121
  file_id: Optional[str] = None,
1025
1122
  mime_type: Optional[str] = None,
1026
1123
  id: Optional[str] = None,
1027
- index: Optional[int] = None,
1124
+ index: Optional[Union[int, str]] = None,
1125
+ **kwargs: Any,
1028
1126
  ) -> ImageContentBlock:
1029
1127
  """Create an ``ImageContentBlock``.
1030
1128
 
@@ -1052,11 +1150,7 @@ def create_image_block(
1052
1150
  msg = "Must provide one of: url, base64, or file_id"
1053
1151
  raise ValueError(msg)
1054
1152
 
1055
- if base64 and not mime_type:
1056
- msg = "mime_type is required when using base64 data"
1057
- raise ValueError(msg)
1058
-
1059
- block = ImageContentBlock(type="image", id=_ensure_id(id))
1153
+ block = ImageContentBlock(type="image", id=ensure_id(id))
1060
1154
 
1061
1155
  if url is not None:
1062
1156
  block["url"] = url
@@ -1069,6 +1163,10 @@ def create_image_block(
1069
1163
  if index is not None:
1070
1164
  block["index"] = index
1071
1165
 
1166
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1167
+ if extras:
1168
+ block["extras"] = extras
1169
+
1072
1170
  return block
1073
1171
 
1074
1172
 
@@ -1079,7 +1177,8 @@ def create_video_block(
1079
1177
  file_id: Optional[str] = None,
1080
1178
  mime_type: Optional[str] = None,
1081
1179
  id: Optional[str] = None,
1082
- index: Optional[int] = None,
1180
+ index: Optional[Union[int, str]] = None,
1181
+ **kwargs: Any,
1083
1182
  ) -> VideoContentBlock:
1084
1183
  """Create a ``VideoContentBlock``.
1085
1184
 
@@ -1111,7 +1210,7 @@ def create_video_block(
1111
1210
  msg = "mime_type is required when using base64 data"
1112
1211
  raise ValueError(msg)
1113
1212
 
1114
- block = VideoContentBlock(type="video", id=_ensure_id(id))
1213
+ block = VideoContentBlock(type="video", id=ensure_id(id))
1115
1214
 
1116
1215
  if url is not None:
1117
1216
  block["url"] = url
@@ -1124,6 +1223,10 @@ def create_video_block(
1124
1223
  if index is not None:
1125
1224
  block["index"] = index
1126
1225
 
1226
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1227
+ if extras:
1228
+ block["extras"] = extras
1229
+
1127
1230
  return block
1128
1231
 
1129
1232
 
@@ -1134,7 +1237,8 @@ def create_audio_block(
1134
1237
  file_id: Optional[str] = None,
1135
1238
  mime_type: Optional[str] = None,
1136
1239
  id: Optional[str] = None,
1137
- index: Optional[int] = None,
1240
+ index: Optional[Union[int, str]] = None,
1241
+ **kwargs: Any,
1138
1242
  ) -> AudioContentBlock:
1139
1243
  """Create an ``AudioContentBlock``.
1140
1244
 
@@ -1166,7 +1270,7 @@ def create_audio_block(
1166
1270
  msg = "mime_type is required when using base64 data"
1167
1271
  raise ValueError(msg)
1168
1272
 
1169
- block = AudioContentBlock(type="audio", id=_ensure_id(id))
1273
+ block = AudioContentBlock(type="audio", id=ensure_id(id))
1170
1274
 
1171
1275
  if url is not None:
1172
1276
  block["url"] = url
@@ -1179,6 +1283,10 @@ def create_audio_block(
1179
1283
  if index is not None:
1180
1284
  block["index"] = index
1181
1285
 
1286
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1287
+ if extras:
1288
+ block["extras"] = extras
1289
+
1182
1290
  return block
1183
1291
 
1184
1292
 
@@ -1189,7 +1297,8 @@ def create_file_block(
1189
1297
  file_id: Optional[str] = None,
1190
1298
  mime_type: Optional[str] = None,
1191
1299
  id: Optional[str] = None,
1192
- index: Optional[int] = None,
1300
+ index: Optional[Union[int, str]] = None,
1301
+ **kwargs: Any,
1193
1302
  ) -> FileContentBlock:
1194
1303
  """Create a ``FileContentBlock``.
1195
1304
 
@@ -1221,7 +1330,7 @@ def create_file_block(
1221
1330
  msg = "mime_type is required when using base64 data"
1222
1331
  raise ValueError(msg)
1223
1332
 
1224
- block = FileContentBlock(type="file", id=_ensure_id(id))
1333
+ block = FileContentBlock(type="file", id=ensure_id(id))
1225
1334
 
1226
1335
  if url is not None:
1227
1336
  block["url"] = url
@@ -1234,19 +1343,23 @@ def create_file_block(
1234
1343
  if index is not None:
1235
1344
  block["index"] = index
1236
1345
 
1346
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1347
+ if extras:
1348
+ block["extras"] = extras
1349
+
1237
1350
  return block
1238
1351
 
1239
1352
 
1240
1353
  def create_plaintext_block(
1241
- text: str,
1242
- *,
1354
+ text: Optional[str] = None,
1243
1355
  url: Optional[str] = None,
1244
1356
  base64: Optional[str] = None,
1245
1357
  file_id: Optional[str] = None,
1246
1358
  title: Optional[str] = None,
1247
1359
  context: Optional[str] = None,
1248
1360
  id: Optional[str] = None,
1249
- index: Optional[int] = None,
1361
+ index: Optional[Union[int, str]] = None,
1362
+ **kwargs: Any,
1250
1363
  ) -> PlainTextContentBlock:
1251
1364
  """Create a ``PlainTextContentBlock``.
1252
1365
 
@@ -1271,10 +1384,11 @@ def create_plaintext_block(
1271
1384
  block = PlainTextContentBlock(
1272
1385
  type="text-plain",
1273
1386
  mime_type="text/plain",
1274
- text=text,
1275
- id=_ensure_id(id),
1387
+ id=ensure_id(id),
1276
1388
  )
1277
1389
 
1390
+ if text is not None:
1391
+ block["text"] = text
1278
1392
  if url is not None:
1279
1393
  block["url"] = url
1280
1394
  if base64 is not None:
@@ -1288,6 +1402,10 @@ def create_plaintext_block(
1288
1402
  if index is not None:
1289
1403
  block["index"] = index
1290
1404
 
1405
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1406
+ if extras:
1407
+ block["extras"] = extras
1408
+
1291
1409
  return block
1292
1410
 
1293
1411
 
@@ -1296,7 +1414,8 @@ def create_tool_call(
1296
1414
  args: dict[str, Any],
1297
1415
  *,
1298
1416
  id: Optional[str] = None,
1299
- index: Optional[int] = None,
1417
+ index: Optional[Union[int, str]] = None,
1418
+ **kwargs: Any,
1300
1419
  ) -> ToolCall:
1301
1420
  """Create a ``ToolCall``.
1302
1421
 
@@ -1318,19 +1437,24 @@ def create_tool_call(
1318
1437
  type="tool_call",
1319
1438
  name=name,
1320
1439
  args=args,
1321
- id=_ensure_id(id),
1440
+ id=ensure_id(id),
1322
1441
  )
1323
1442
 
1324
1443
  if index is not None:
1325
1444
  block["index"] = index
1326
1445
 
1446
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1447
+ if extras:
1448
+ block["extras"] = extras
1449
+
1327
1450
  return block
1328
1451
 
1329
1452
 
1330
1453
  def create_reasoning_block(
1331
1454
  reasoning: Optional[str] = None,
1332
1455
  id: Optional[str] = None,
1333
- index: Optional[int] = None,
1456
+ index: Optional[Union[int, str]] = None,
1457
+ **kwargs: Any,
1334
1458
  ) -> ReasoningContentBlock:
1335
1459
  """Create a ``ReasoningContentBlock``.
1336
1460
 
@@ -1350,12 +1474,16 @@ def create_reasoning_block(
1350
1474
  block = ReasoningContentBlock(
1351
1475
  type="reasoning",
1352
1476
  reasoning=reasoning or "",
1353
- id=_ensure_id(id),
1477
+ id=ensure_id(id),
1354
1478
  )
1355
1479
 
1356
1480
  if index is not None:
1357
1481
  block["index"] = index
1358
1482
 
1483
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1484
+ if extras:
1485
+ block["extras"] = extras
1486
+
1359
1487
  return block
1360
1488
 
1361
1489
 
@@ -1367,6 +1495,7 @@ def create_citation(
1367
1495
  end_index: Optional[int] = None,
1368
1496
  cited_text: Optional[str] = None,
1369
1497
  id: Optional[str] = None,
1498
+ **kwargs: Any,
1370
1499
  ) -> Citation:
1371
1500
  """Create a ``Citation``.
1372
1501
 
@@ -1386,7 +1515,7 @@ def create_citation(
1386
1515
  prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1387
1516
 
1388
1517
  """
1389
- block = Citation(type="citation", id=_ensure_id(id))
1518
+ block = Citation(type="citation", id=ensure_id(id))
1390
1519
 
1391
1520
  if url is not None:
1392
1521
  block["url"] = url
@@ -1399,6 +1528,10 @@ def create_citation(
1399
1528
  if cited_text is not None:
1400
1529
  block["cited_text"] = cited_text
1401
1530
 
1531
+ extras = {k: v for k, v in kwargs.items() if v is not None}
1532
+ if extras:
1533
+ block["extras"] = extras
1534
+
1402
1535
  return block
1403
1536
 
1404
1537
 
@@ -1406,7 +1539,7 @@ def create_non_standard_block(
1406
1539
  value: dict[str, Any],
1407
1540
  *,
1408
1541
  id: Optional[str] = None,
1409
- index: Optional[int] = None,
1542
+ index: Optional[Union[int, str]] = None,
1410
1543
  ) -> NonStandardContentBlock:
1411
1544
  """Create a ``NonStandardContentBlock``.
1412
1545
 
@@ -1426,7 +1559,7 @@ def create_non_standard_block(
1426
1559
  block = NonStandardContentBlock(
1427
1560
  type="non_standard",
1428
1561
  value=value,
1429
- id=_ensure_id(id),
1562
+ id=ensure_id(id),
1430
1563
  )
1431
1564
 
1432
1565
  if index is not None: