langchain-core 0.3.72__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. langchain_core/_api/beta_decorator.py +1 -0
  2. langchain_core/_api/deprecation.py +2 -0
  3. langchain_core/beta/runnables/context.py +1 -0
  4. langchain_core/callbacks/base.py +23 -14
  5. langchain_core/callbacks/file.py +1 -0
  6. langchain_core/callbacks/manager.py +145 -19
  7. langchain_core/callbacks/streaming_stdout.py +4 -3
  8. langchain_core/callbacks/usage.py +15 -3
  9. langchain_core/chat_history.py +1 -0
  10. langchain_core/document_loaders/langsmith.py +2 -1
  11. langchain_core/documents/base.py +2 -0
  12. langchain_core/embeddings/fake.py +2 -0
  13. langchain_core/indexing/api.py +10 -0
  14. langchain_core/language_models/_utils.py +37 -0
  15. langchain_core/language_models/base.py +4 -1
  16. langchain_core/language_models/chat_models.py +48 -27
  17. langchain_core/language_models/fake_chat_models.py +71 -1
  18. langchain_core/language_models/llms.py +1 -0
  19. langchain_core/memory.py +1 -0
  20. langchain_core/messages/__init__.py +54 -0
  21. langchain_core/messages/ai.py +31 -18
  22. langchain_core/messages/content_blocks.py +1349 -69
  23. langchain_core/messages/human.py +1 -0
  24. langchain_core/messages/modifier.py +1 -1
  25. langchain_core/messages/tool.py +8 -83
  26. langchain_core/messages/utils.py +221 -6
  27. langchain_core/output_parsers/base.py +51 -14
  28. langchain_core/output_parsers/json.py +5 -2
  29. langchain_core/output_parsers/list.py +7 -2
  30. langchain_core/output_parsers/openai_functions.py +29 -5
  31. langchain_core/output_parsers/openai_tools.py +90 -47
  32. langchain_core/output_parsers/pydantic.py +3 -2
  33. langchain_core/output_parsers/transform.py +53 -12
  34. langchain_core/output_parsers/xml.py +14 -5
  35. langchain_core/outputs/llm_result.py +4 -1
  36. langchain_core/prompt_values.py +111 -7
  37. langchain_core/prompts/base.py +4 -0
  38. langchain_core/prompts/chat.py +3 -0
  39. langchain_core/prompts/few_shot.py +1 -0
  40. langchain_core/prompts/few_shot_with_templates.py +1 -0
  41. langchain_core/prompts/image.py +1 -0
  42. langchain_core/prompts/pipeline.py +1 -0
  43. langchain_core/prompts/prompt.py +1 -0
  44. langchain_core/prompts/structured.py +1 -0
  45. langchain_core/rate_limiters.py +1 -0
  46. langchain_core/retrievers.py +3 -0
  47. langchain_core/runnables/base.py +75 -57
  48. langchain_core/runnables/branch.py +1 -0
  49. langchain_core/runnables/config.py +2 -2
  50. langchain_core/runnables/configurable.py +2 -1
  51. langchain_core/runnables/fallbacks.py +3 -7
  52. langchain_core/runnables/graph.py +5 -3
  53. langchain_core/runnables/graph_ascii.py +1 -0
  54. langchain_core/runnables/graph_mermaid.py +1 -0
  55. langchain_core/runnables/history.py +1 -0
  56. langchain_core/runnables/passthrough.py +3 -0
  57. langchain_core/runnables/retry.py +1 -0
  58. langchain_core/runnables/router.py +1 -0
  59. langchain_core/runnables/schema.py +1 -0
  60. langchain_core/stores.py +3 -0
  61. langchain_core/tools/base.py +43 -11
  62. langchain_core/tools/convert.py +25 -3
  63. langchain_core/tools/retriever.py +8 -1
  64. langchain_core/tools/structured.py +10 -1
  65. langchain_core/tracers/base.py +14 -7
  66. langchain_core/tracers/context.py +1 -1
  67. langchain_core/tracers/core.py +27 -4
  68. langchain_core/tracers/event_stream.py +14 -3
  69. langchain_core/tracers/langchain.py +14 -3
  70. langchain_core/tracers/log_stream.py +4 -1
  71. langchain_core/utils/aiter.py +5 -0
  72. langchain_core/utils/function_calling.py +2 -1
  73. langchain_core/utils/iter.py +1 -0
  74. langchain_core/v1/__init__.py +1 -0
  75. langchain_core/v1/chat_models.py +1047 -0
  76. langchain_core/v1/messages.py +755 -0
  77. langchain_core/vectorstores/base.py +1 -0
  78. langchain_core/version.py +1 -1
  79. {langchain_core-0.3.72.dist-info → langchain_core-0.4.0.dev0.dist-info}/METADATA +1 -1
  80. {langchain_core-0.3.72.dist-info → langchain_core-0.4.0.dev0.dist-info}/RECORD +82 -79
  81. {langchain_core-0.3.72.dist-info → langchain_core-0.4.0.dev0.dist-info}/WHEEL +0 -0
  82. {langchain_core-0.3.72.dist-info → langchain_core-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,110 +1,935 @@
1
- """Types for content blocks."""
1
+ """Standard, multimodal content blocks for Large Language Model I/O.
2
+
3
+ .. warning::
4
+ This module is under active development. The API is unstable and subject to
5
+ change in future releases.
6
+
7
+ This module provides a standardized data structure for representing inputs to and
8
+ outputs from Large Language Models. The core abstraction is the **Content Block**, a
9
+ ``TypedDict`` that can represent a piece of text, an image, a tool call, or other
10
+ structured data.
11
+
12
+ Data **not yet mapped** to a standard block may be represented using the
13
+ ``NonStandardContentBlock``, which allows for provider-specific data to be included
14
+ without losing the benefits of type checking and validation.
15
+
16
+ Furthermore, provider-specific fields **within** a standard block are fully supported
17
+ by default. However, since current type checkers do not recognize this, we are temporarily
18
+ applying type ignore comments to suppress warnings. In the future,
19
+ `PEP 728 <https://peps.python.org/pep-0728/>`__ will add an extra param, ``extra_items=Any``.
20
+ When this is supported, we will apply it to block signatures to signify to type checkers
21
+ that additional provider-specific fields are allowed.
22
+
23
+ **Example with PEP 728 provider-specific fields:**
24
+
25
+ .. code-block:: python
26
+
27
+ # Note `extra_items=Any`
28
+ class TextContentBlock(TypedDict, extra_items=Any):
29
+ type: Literal["text"]
30
+ id: NotRequired[str]
31
+ text: str
32
+ annotations: NotRequired[list[Annotation]]
33
+ index: NotRequired[int]
34
+
35
+ .. code-block:: python
36
+
37
+ from langchain_core.messages.content_blocks import TextContentBlock
38
+
39
+ my_block: TextContentBlock = {
40
+ # Add required fields
41
+ "type": "text",
42
+ "text": "Hello, world!",
43
+ # Additional fields not specified in the TypedDict
44
+ # These are valid with PEP 728 and are typed as Any
45
+ "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
46
+ "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
47
+ "custom_field": "any value",
48
+ }
49
+
50
+ openai_data = my_block["openai_metadata"] # Type: Any
51
+
52
+ .. note::
53
+ PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings
54
+ from type checkers that don't yet support it. The functionality works correctly
55
+ in Python 3.13+ and will be fully supported as the ecosystem catches up.
56
+
57
+ **Rationale**
58
+
59
+ Different LLM providers use distinct and incompatible API schemas. This module
60
+ introduces a unified, provider-agnostic format to standardize these interactions. A
61
+ message to or from a model is simply a `list` of `ContentBlock` objects, allowing for
62
+ the natural interleaving of text, images, and other content in a single, ordered
63
+ sequence.
64
+
65
+ An adapter for a specific provider is responsible for translating this standard list of
66
+ blocks into the format required by its API.
67
+
68
+ **Key Block Types**
69
+
70
+ The module defines several types of content blocks, including:
71
+
72
+ - ``TextContentBlock``: Standard text.
73
+ - ``ImageContentBlock``, ``Audio...``, ``Video...``, ``PlainText...``, ``File...``: For multimodal data.
74
+ - ``ToolCallContentBlock``, ``ToolOutputContentBlock``: For function calling.
75
+ - ``ReasoningContentBlock``: To capture a model's thought process.
76
+ - ``Citation``: For annotations that link generated text to a source document.
77
+
78
+ **Example Usage**
79
+
80
+ .. code-block:: python
81
+
82
+ # Direct construction:
83
+ from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock
84
+
85
+ multimodal_message: AIMessage = [
86
+ TextContentBlock(type="text", text="What is shown in this image?"),
87
+ ImageContentBlock(
88
+ type="image",
89
+ url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
90
+ mime_type="image/png",
91
+ ),
92
+ ]
93
+
94
+ from langchain_core.messages.content_blocks import create_text_block, create_image_block
95
+
96
+ # Using factory functions:
97
+ multimodal_message: AIMessage = [
98
+ create_text_block("What is shown in this image?"),
99
+ create_image_block(
100
+ url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
101
+ mime_type="image/png",
102
+ ),
103
+ ]
104
+ """ # noqa: E501
2
105
 
3
106
  import warnings
4
- from typing import Any, Literal, Union
107
+ from typing import Any, Literal, Optional, Union
108
+ from uuid import uuid4
109
+
110
+ from typing_extensions import NotRequired, TypedDict, get_args, get_origin
111
+
112
+
113
+ def _ensure_id(id_val: Optional[str]) -> str:
114
+ """Ensure the ID is a valid string, generating a new UUID if not provided.
115
+
116
+ Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are
117
+ LangChain-generated IDs.
118
+
119
+ Args:
120
+ id_val: Optional string ID value to validate.
121
+
122
+ Returns:
123
+ A valid string ID, either the provided value or a new UUID.
124
+ """
125
+ return id_val or str(f"lc_{uuid4()}")
126
+
127
+
128
+ class Citation(TypedDict):
129
+ """Annotation for citing data from a document.
130
+
131
+ .. note::
132
+ ``start/end`` indices refer to the **response text**,
133
+ not the source text. This means that the indices are relative to the model's
134
+ response, not the original document (as specified in the ``url``).
135
+
136
+ .. note::
137
+ ``create_citation`` may also be used as a factory to create a ``Citation``.
138
+ Benefits include:
139
+
140
+ * Automatic ID generation (when not provided)
141
+ * Required arguments strictly validated at creation time
142
+
143
+ """
144
+
145
+ type: Literal["citation"]
146
+ """Type of the content block. Used for discrimination."""
147
+
148
+ id: NotRequired[str]
149
+ """Content block identifier. Either:
150
+
151
+ - Generated by the provider (e.g., OpenAI's file ID)
152
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
153
+ """
154
+
155
+ url: NotRequired[str]
156
+ """URL of the document source."""
157
+
158
+ # For future consideration, if needed:
159
+ # provenance: NotRequired[str]
160
+ # """Provenance of the document, e.g., "Wikipedia", "arXiv", etc.
161
+
162
+ # Included for future compatibility; not currently implemented.
163
+ # """
164
+
165
+ title: NotRequired[str]
166
+ """Source document title.
167
+
168
+ For example, the page title for a web page or the title of a paper.
169
+ """
170
+
171
+ start_index: NotRequired[int]
172
+ """Start index of the **response text** (``TextContentBlock.text``) for which the
173
+ annotation applies."""
174
+
175
+ end_index: NotRequired[int]
176
+ """End index of the **response text** (``TextContentBlock.text``) for which the
177
+ annotation applies."""
178
+
179
+ cited_text: NotRequired[str]
180
+ """Excerpt of source text being cited."""
181
+
182
+ # NOTE: not including spans for the raw document text (such as `text_start_index`
183
+ # and `text_end_index`) as this is not currently supported by any provider. The
184
+ # thinking is that the `cited_text` should be sufficient for most use cases, and it
185
+ # is difficult to reliably extract spans from the raw document text across file
186
+ # formats or encoding schemes.
187
+
188
+ extras: NotRequired[dict[str, Any]]
189
+ """Provider-specific metadata."""
190
+
191
+
192
+ class NonStandardAnnotation(TypedDict):
193
+ """Provider-specific annotation format."""
194
+
195
+ type: Literal["non_standard_annotation"]
196
+ """Type of the content block. Used for discrimination."""
197
+
198
+ id: NotRequired[str]
199
+ """Content block identifier. Either:
200
+
201
+ - Generated by the provider (e.g., OpenAI's file ID)
202
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
203
+ """
204
+
205
+ value: dict[str, Any]
206
+ """Provider-specific annotation data."""
207
+
208
+
209
+ Annotation = Union[Citation, NonStandardAnnotation]
210
+
211
+
212
+ class TextContentBlock(TypedDict):
213
+ """Text output from a LLM.
214
+
215
+ This typically represents the main text content of a message, such as the response
216
+ from a language model or the text of a user message.
217
+
218
+ .. note::
219
+ ``create_text_block`` may also be used as a factory to create a
220
+ ``TextContentBlock``. Benefits include:
221
+
222
+ * Automatic ID generation (when not provided)
223
+ * Required arguments strictly validated at creation time
224
+
225
+ """
226
+
227
+ type: Literal["text"]
228
+ """Type of the content block. Used for discrimination."""
229
+
230
+ id: NotRequired[str]
231
+ """Content block identifier. Either:
232
+
233
+ - Generated by the provider (e.g., OpenAI's file ID)
234
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
235
+ """
236
+
237
+ text: str
238
+ """Block text."""
239
+
240
+ annotations: NotRequired[list[Annotation]]
241
+ """Citations and other annotations."""
242
+
243
+ index: NotRequired[int]
244
+ """Index of block in aggregate response. Used during streaming."""
245
+
246
+ extras: NotRequired[dict[str, Any]]
247
+ """Provider-specific metadata."""
248
+
249
+
250
+ class ToolCall(TypedDict):
251
+ """Represents a request to call a tool.
252
+
253
+ Example:
254
+
255
+ .. code-block:: python
256
+
257
+ {
258
+ "name": "foo",
259
+ "args": {"a": 1},
260
+ "id": "123"
261
+ }
262
+
263
+ This represents a request to call the tool named "foo" with arguments {"a": 1}
264
+ and an identifier of "123".
265
+
266
+ .. note::
267
+ ``create_tool_call`` may also be used as a factory to create a
268
+ ``ToolCall``. Benefits include:
269
+
270
+ * Automatic ID generation (when not provided)
271
+ * Required arguments strictly validated at creation time
272
+
273
+ """
274
+
275
+ type: Literal["tool_call"]
276
+ """Used for discrimination."""
277
+
278
+ id: Optional[str]
279
+ """An identifier associated with the tool call.
280
+
281
+ An identifier is needed to associate a tool call request with a tool
282
+ call result in events when multiple concurrent tool calls are made.
283
+ """
284
+ # TODO: Consider making this NotRequired[str] in the future.
285
+
286
+ name: str
287
+ """The name of the tool to be called."""
288
+
289
+ args: dict[str, Any]
290
+ """The arguments to the tool call."""
291
+
292
+ index: NotRequired[int]
293
+ """Index of block in aggregate response. Used during streaming."""
294
+
295
+ extras: NotRequired[dict[str, Any]]
296
+ """Provider-specific metadata."""
297
+
298
+
299
+ class ToolCallChunk(TypedDict):
300
+ """A chunk of a tool call (e.g., as part of a stream).
301
+
302
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
303
+ all string attributes are concatenated. Chunks are only merged if their
304
+ values of ``index`` are equal and not ``None``.
305
+
306
+ Example:
307
+
308
+ .. code-block:: python
309
+
310
+ left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)]
311
+ right_chunks = [ToolCallChunk(name=None, args='1}', index=0)]
312
+
313
+ (
314
+ AIMessageChunk(content="", tool_call_chunks=left_chunks)
315
+ + AIMessageChunk(content="", tool_call_chunks=right_chunks)
316
+ ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)]
317
+ """
5
318
 
6
- from pydantic import TypeAdapter, ValidationError
7
- from typing_extensions import NotRequired, TypedDict
319
+ # TODO: Consider making fields NotRequired[str] in the future.
8
320
 
321
+ type: NotRequired[Literal["tool_call_chunk"]]
322
+ """Used for serialization."""
9
323
 
10
- class BaseDataContentBlock(TypedDict, total=False):
11
- """Base class for data content blocks."""
324
+ id: Optional[str]
325
+ """An identifier associated with the tool call."""
326
+
327
+ name: Optional[str]
328
+ """The name of the tool to be called."""
329
+
330
+ args: Optional[str]
331
+ """The arguments to the tool call."""
332
+
333
+ index: Optional[int]
334
+ """The index of the tool call in a sequence."""
335
+
336
+ extras: NotRequired[dict[str, Any]]
337
+ """Provider-specific metadata."""
338
+
339
+
340
+ class InvalidToolCall(TypedDict):
341
+ """Allowance for errors made by LLM.
342
+
343
+ Here we add an ``error`` key to surface errors made during generation
344
+ (e.g., invalid JSON arguments.)
345
+ """
346
+
347
+ # TODO: Consider making fields NotRequired[str] in the future.
348
+
349
+ type: Literal["invalid_tool_call"]
350
+ """Used for discrimination."""
351
+
352
+ id: Optional[str]
353
+ """An identifier associated with the tool call."""
354
+
355
+ name: Optional[str]
356
+ """The name of the tool to be called."""
357
+
358
+ args: Optional[str]
359
+ """The arguments to the tool call."""
360
+
361
+ error: Optional[str]
362
+ """An error message associated with the tool call."""
363
+
364
+ index: NotRequired[int]
365
+ """Index of block in aggregate response. Used during streaming."""
366
+
367
+ extras: NotRequired[dict[str, Any]]
368
+ """Provider-specific metadata."""
369
+
370
+
371
+ # Note: These are not standard tool calls, but rather provider-specific built-in tools.
372
+ # Web search
373
+ class WebSearchCall(TypedDict):
374
+ """Built-in web search tool call."""
375
+
376
+ type: Literal["web_search_call"]
377
+ """Type of the content block. Used for discrimination."""
378
+
379
+ id: NotRequired[str]
380
+ """Content block identifier. Either:
381
+
382
+ - Generated by the provider (e.g., OpenAI's file ID)
383
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
384
+ """
385
+
386
+ query: NotRequired[str]
387
+ """The search query used in the web search tool call."""
388
+
389
+ index: NotRequired[int]
390
+ """Index of block in aggregate response. Used during streaming."""
391
+
392
+ extras: NotRequired[dict[str, Any]]
393
+ """Provider-specific metadata."""
394
+
395
+
396
+ class WebSearchResult(TypedDict):
397
+ """Result of a built-in web search tool call."""
398
+
399
+ type: Literal["web_search_result"]
400
+ """Type of the content block. Used for discrimination."""
401
+
402
+ id: NotRequired[str]
403
+ """Content block identifier. Either:
404
+
405
+ - Generated by the provider (e.g., OpenAI's file ID)
406
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
407
+ """
408
+
409
+ urls: NotRequired[list[str]]
410
+ """List of URLs returned by the web search tool call."""
411
+
412
+ index: NotRequired[int]
413
+ """Index of block in aggregate response. Used during streaming."""
414
+
415
+ extras: NotRequired[dict[str, Any]]
416
+ """Provider-specific metadata."""
417
+
418
+
419
+ class CodeInterpreterCall(TypedDict):
420
+ """Built-in code interpreter tool call."""
421
+
422
+ type: Literal["code_interpreter_call"]
423
+ """Type of the content block. Used for discrimination."""
424
+
425
+ id: NotRequired[str]
426
+ """Content block identifier. Either:
427
+
428
+ - Generated by the provider (e.g., OpenAI's file ID)
429
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
430
+ """
431
+
432
+ language: NotRequired[str]
433
+ """The name of the programming language used in the code interpreter tool call."""
434
+
435
+ code: NotRequired[str]
436
+ """The code to be executed by the code interpreter."""
437
+
438
+ index: NotRequired[int]
439
+ """Index of block in aggregate response. Used during streaming."""
440
+
441
+ extras: NotRequired[dict[str, Any]]
442
+ """Provider-specific metadata."""
443
+
444
+
445
+ class CodeInterpreterOutput(TypedDict):
446
+ """Output of a singular code interpreter tool call.
447
+
448
+ Full output of a code interpreter tool call is represented by
449
+ ``CodeInterpreterResult`` which is a list of these blocks.
450
+ """
451
+
452
+ type: Literal["code_interpreter_output"]
453
+ """Type of the content block. Used for discrimination."""
454
+
455
+ id: NotRequired[str]
456
+ """Content block identifier. Either:
457
+
458
+ - Generated by the provider (e.g., OpenAI's file ID)
459
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
460
+ """
461
+
462
+ return_code: NotRequired[int]
463
+ """Return code of the executed code.
464
+
465
+ Example: ``0`` for success, non-zero for failure.
466
+ """
467
+
468
+ stderr: NotRequired[str]
469
+ """Standard error output of the executed code."""
470
+
471
+ stdout: NotRequired[str]
472
+ """Standard output of the executed code."""
473
+
474
+ file_ids: NotRequired[list[str]]
475
+ """List of file IDs generated by the code interpreter."""
476
+
477
+ index: NotRequired[int]
478
+ """Index of block in aggregate response. Used during streaming."""
479
+
480
+ extras: NotRequired[dict[str, Any]]
481
+ """Provider-specific metadata."""
482
+
483
+
484
+ class CodeInterpreterResult(TypedDict):
485
+ """Result of a code interpreter tool call."""
486
+
487
+ type: Literal["code_interpreter_result"]
488
+ """Type of the content block. Used for discrimination."""
489
+
490
+ id: NotRequired[str]
491
+ """Content block identifier. Either:
492
+
493
+ - Generated by the provider (e.g., OpenAI's file ID)
494
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
495
+ """
496
+
497
+ output: list[CodeInterpreterOutput]
498
+ """List of outputs from the code interpreter tool call."""
499
+
500
+ index: NotRequired[int]
501
+ """Index of block in aggregate response. Used during streaming."""
502
+
503
+ extras: NotRequired[dict[str, Any]]
504
+ """Provider-specific metadata."""
505
+
506
+
507
+ class ReasoningContentBlock(TypedDict):
508
+ """Reasoning output from a LLM.
509
+
510
+ .. note::
511
+ ``create_reasoning_block`` may also be used as a factory to create a
512
+ ``ReasoningContentBlock``. Benefits include:
513
+
514
+ * Automatic ID generation (when not provided)
515
+ * Required arguments strictly validated at creation time
516
+
517
+ """
518
+
519
+ type: Literal["reasoning"]
520
+ """Type of the content block. Used for discrimination."""
521
+
522
+ id: NotRequired[str]
523
+ """Content block identifier. Either:
524
+
525
+ - Generated by the provider (e.g., OpenAI's file ID)
526
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
527
+ """
528
+
529
+ reasoning: NotRequired[str]
530
+ """Reasoning text.
531
+
532
+ Either the thought summary or the raw reasoning text itself. This is often parsed
533
+ from ``<think>`` tags in the model's response.
534
+ """
535
+
536
+ index: NotRequired[int]
537
+ """Index of block in aggregate response. Used during streaming."""
538
+
539
+ extras: NotRequired[dict[str, Any]]
540
+ """Provider-specific metadata."""
541
+
542
+
543
+ # Note: `title` and `context` are fields that could be used to provide additional
544
+ # information about the file, such as a description or summary of its content.
545
+ # E.g. with Claude, you can provide a context for a file which is passed to the model.
546
+ class ImageContentBlock(TypedDict):
547
+ """Image data.
548
+
549
+ .. note::
550
+ ``create_image_block`` may also be used as a factory to create a
551
+ ``ImageContentBlock``. Benefits include:
552
+
553
+ * Automatic ID generation (when not provided)
554
+ * Required arguments strictly validated at creation time
555
+
556
+ """
557
+
558
+ type: Literal["image"]
559
+ """Type of the content block. Used for discrimination."""
560
+
561
+ id: NotRequired[str]
562
+ """Content block identifier. Either:
563
+
564
+ - Generated by the provider (e.g., OpenAI's file ID)
565
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
566
+ """
567
+
568
+ file_id: NotRequired[str]
569
+ """ID of the image file, e.g., from a file storage system."""
12
570
 
13
571
  mime_type: NotRequired[str]
14
- """MIME type of the content block (if needed)."""
572
+ """MIME type of the image. Required for base64.
573
+
574
+ `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#image>`__
575
+ """
15
576
 
577
+ index: NotRequired[int]
578
+ """Index of block in aggregate response. Used during streaming."""
579
+
580
+ url: NotRequired[str]
581
+ """URL of the image."""
582
+
583
+ base64: NotRequired[str]
584
+ """Data as a base64 string."""
16
585
 
17
- class URLContentBlock(BaseDataContentBlock):
18
- """Content block for data from a URL."""
586
+ extras: NotRequired[dict[str, Any]]
587
+ """Provider-specific metadata."""
19
588
 
20
- type: Literal["image", "audio", "file"]
21
- """Type of the content block."""
22
- source_type: Literal["url"]
23
- """Source type (url)."""
24
- url: str
25
- """URL for data."""
26
589
 
590
+ class VideoContentBlock(TypedDict):
591
+ """Video data.
27
592
 
28
- class Base64ContentBlock(BaseDataContentBlock):
29
- """Content block for inline data from a base64 string."""
593
+ .. note::
594
+ ``create_video_block`` may also be used as a factory to create a
595
+ ``VideoContentBlock``. Benefits include:
30
596
 
31
- type: Literal["image", "audio", "file"]
32
- """Type of the content block."""
33
- source_type: Literal["base64"]
34
- """Source type (base64)."""
35
- data: str
597
+ * Automatic ID generation (when not provided)
598
+ * Required arguments strictly validated at creation time
599
+
600
+ """
601
+
602
+ type: Literal["video"]
603
+ """Type of the content block. Used for discrimination."""
604
+
605
+ id: NotRequired[str]
606
+ """Content block identifier. Either:
607
+
608
+ - Generated by the provider (e.g., OpenAI's file ID)
609
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
610
+ """
611
+
612
+ file_id: NotRequired[str]
613
+ """ID of the video file, e.g., from a file storage system."""
614
+
615
+ mime_type: NotRequired[str]
616
+ """MIME type of the video. Required for base64.
617
+
618
+ `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#video>`__
619
+ """
620
+
621
+ index: NotRequired[int]
622
+ """Index of block in aggregate response. Used during streaming."""
623
+
624
+ url: NotRequired[str]
625
+ """URL of the video."""
626
+
627
+ base64: NotRequired[str]
36
628
  """Data as a base64 string."""
37
629
 
630
+ extras: NotRequired[dict[str, Any]]
631
+ """Provider-specific metadata."""
632
+
633
+
634
+ class AudioContentBlock(TypedDict):
635
+ """Audio data.
636
+
637
+ .. note::
638
+ ``create_audio_block`` may also be used as a factory to create an
639
+ ``AudioContentBlock``. Benefits include:
640
+
641
+ * Automatic ID generation (when not provided)
642
+ * Required arguments strictly validated at creation time
643
+
644
+ """
645
+
646
+ type: Literal["audio"]
647
+ """Type of the content block. Used for discrimination."""
648
+
649
+ id: NotRequired[str]
650
+ """Content block identifier. Either:
651
+
652
+ - Generated by the provider (e.g., OpenAI's file ID)
653
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
654
+ """
655
+
656
+ file_id: NotRequired[str]
657
+ """ID of the audio file, e.g., from a file storage system."""
658
+
659
+ mime_type: NotRequired[str]
660
+ """MIME type of the audio. Required for base64.
661
+
662
+ `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#audio>`__
663
+ """
664
+
665
+ index: NotRequired[int]
666
+ """Index of block in aggregate response. Used during streaming."""
667
+
668
+ url: NotRequired[str]
669
+ """URL of the audio."""
670
+
671
+ base64: NotRequired[str]
672
+ """Data as a base64 string."""
673
+
674
+ extras: NotRequired[dict[str, Any]]
675
+ """Provider-specific metadata."""
676
+
677
+
678
+ class PlainTextContentBlock(TypedDict):
679
+ """Plaintext data (e.g., from a document).
680
+
681
+ .. note::
682
+ Title and context are optional fields that may be passed to the model. See
683
+ Anthropic `example <https://docs.anthropic.com/en/docs/build-with-claude/citations#citable-vs-non-citable-content>`__.
684
+
685
+ .. note::
686
+ ``create_plaintext_block`` may also be used as a factory to create a
687
+ ``PlainTextContentBlock``. Benefits include:
688
+
689
+ * Automatic ID generation (when not provided)
690
+ * Required arguments strictly validated at creation time
691
+
692
+ """
693
+
694
+ type: Literal["text-plain"]
695
+ """Type of the content block. Used for discrimination."""
696
+
697
+ id: NotRequired[str]
698
+ """Content block identifier. Either:
699
+
700
+ - Generated by the provider (e.g., OpenAI's file ID)
701
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
702
+ """
703
+
704
+ file_id: NotRequired[str]
705
+ """ID of the plaintext file, e.g., from a file storage system."""
706
+
707
+ mime_type: Literal["text/plain"]
708
+ """MIME type of the file. Required for base64."""
709
+
710
+ index: NotRequired[int]
711
+ """Index of block in aggregate response. Used during streaming."""
712
+
713
+ url: NotRequired[str]
714
+ """URL of the plaintext."""
715
+
716
+ base64: NotRequired[str]
717
+ """Data as a base64 string."""
718
+
719
+ text: NotRequired[str]
720
+ """Plaintext content. This is optional if the data is provided as base64."""
721
+
722
+ title: NotRequired[str]
723
+ """Title of the text data, e.g., the title of a document."""
724
+
725
+ context: NotRequired[str]
726
+ """Context for the text, e.g., a description or summary of the text's content."""
727
+
728
+ extras: NotRequired[dict[str, Any]]
729
+ """Provider-specific metadata."""
730
+
731
+
732
+ class FileContentBlock(TypedDict):
733
+ """File data that doesn't fit into other multimodal blocks.
734
+
735
+ This block is intended for files that are not images, audio, or plaintext. For
736
+ example, it can be used for PDFs, Word documents, etc.
737
+
738
+ If the file is an image, audio, or plaintext, you should use the corresponding
739
+ content block type (e.g., ``ImageContentBlock``, ``AudioContentBlock``,
740
+ ``PlainTextContentBlock``).
38
741
 
39
- class PlainTextContentBlock(BaseDataContentBlock):
40
- """Content block for plain text data (e.g., from a document)."""
742
+ .. note::
743
+ ``create_file_block`` may also be used as a factory to create a
744
+ ``FileContentBlock``. Benefits include:
745
+
746
+ * Automatic ID generation (when not provided)
747
+ * Required arguments strictly validated at creation time
748
+
749
+ """
41
750
 
42
751
  type: Literal["file"]
43
- """Type of the content block."""
44
- source_type: Literal["text"]
45
- """Source type (text)."""
46
- text: str
47
- """Text data."""
752
+ """Type of the content block. Used for discrimination."""
753
+
754
+ id: NotRequired[str]
755
+ """Content block identifier. Either:
756
+
757
+ - Generated by the provider (e.g., OpenAI's file ID)
758
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
759
+ """
48
760
 
761
+ file_id: NotRequired[str]
762
+ """ID of the file, e.g., from a file storage system."""
49
763
 
50
- class IDContentBlock(TypedDict):
51
- """Content block for data specified by an identifier."""
764
+ mime_type: NotRequired[str]
765
+ """MIME type of the file. Required for base64.
766
+
767
+ `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml>`__
768
+ """
769
+
770
+ index: NotRequired[int]
771
+ """Index of block in aggregate response. Used during streaming."""
772
+
773
+ url: NotRequired[str]
774
+ """URL of the file."""
775
+
776
+ base64: NotRequired[str]
777
+ """Data as a base64 string."""
52
778
 
53
- type: Literal["image", "audio", "file"]
54
- """Type of the content block."""
55
- source_type: Literal["id"]
56
- """Source type (id)."""
57
- id: str
58
- """Identifier for data source."""
779
+ extras: NotRequired[dict[str, Any]]
780
+ """Provider-specific metadata."""
59
781
 
60
782
 
783
+ # Future modalities to consider:
784
+ # - 3D models
785
+ # - Tabular data
786
+
787
+
788
+ class NonStandardContentBlock(TypedDict):
789
+ """Provider-specific data.
790
+
791
+ This block contains data for which there is not yet a standard type.
792
+
793
+ The purpose of this block should be to simply hold a provider-specific payload.
794
+ If a provider's non-standard output includes reasoning and tool calls, it should be
795
+ the adapter's job to parse that payload and emit the corresponding standard
796
+ ReasoningContentBlock and ToolCallContentBlocks.
797
+
798
+ .. note::
799
+ ``create_non_standard_block`` may also be used as a factory to create a
800
+ ``NonStandardContentBlock``. Benefits include:
801
+
802
+ * Automatic ID generation (when not provided)
803
+ * Required arguments strictly validated at creation time
804
+
805
+ """
806
+
807
+ type: Literal["non_standard"]
808
+ """Type of the content block. Used for discrimination."""
809
+
810
+ id: NotRequired[str]
811
+ """Content block identifier. Either:
812
+
813
+ - Generated by the provider (e.g., OpenAI's file ID)
814
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
815
+ """
816
+
817
+ value: dict[str, Any]
818
+ """Provider-specific data."""
819
+
820
+ index: NotRequired[int]
821
+ """Index of block in aggregate response. Used during streaming."""
822
+
823
+
824
+ # --- Aliases ---
61
825
  DataContentBlock = Union[
62
- URLContentBlock,
63
- Base64ContentBlock,
826
+ ImageContentBlock,
827
+ VideoContentBlock,
828
+ AudioContentBlock,
64
829
  PlainTextContentBlock,
65
- IDContentBlock,
830
+ FileContentBlock,
66
831
  ]
67
832
 
68
- _DataContentBlockAdapter: TypeAdapter[DataContentBlock] = TypeAdapter(DataContentBlock)
833
+ ToolContentBlock = Union[
834
+ ToolCall,
835
+ CodeInterpreterCall,
836
+ CodeInterpreterOutput,
837
+ CodeInterpreterResult,
838
+ WebSearchCall,
839
+ WebSearchResult,
840
+ ]
841
+
842
+ ContentBlock = Union[
843
+ TextContentBlock,
844
+ ToolCall,
845
+ InvalidToolCall,
846
+ ToolCallChunk,
847
+ ReasoningContentBlock,
848
+ NonStandardContentBlock,
849
+ DataContentBlock,
850
+ ToolContentBlock,
851
+ ]
69
852
 
70
853
 
71
- def is_data_content_block(
72
- content_block: dict,
73
- ) -> bool:
854
+ def _extract_typedict_type_values(union_type: Any) -> set[str]:
855
+ """Extract the values of the 'type' field from a TypedDict union type."""
856
+ result: set[str] = set()
857
+ for value in get_args(union_type):
858
+ annotation = value.__annotations__["type"]
859
+ if get_origin(annotation) is Literal:
860
+ result.update(get_args(annotation))
861
+ else:
862
+ msg = f"{value} 'type' is not a Literal"
863
+ raise ValueError(msg)
864
+ return result
865
+
866
+
867
+ KNOWN_BLOCK_TYPES = {
868
+ "text",
869
+ "text-plain",
870
+ "tool_call",
871
+ "invalid_tool_call",
872
+ "tool_call_chunk",
873
+ "reasoning",
874
+ "non_standard",
875
+ "image",
876
+ "audio",
877
+ "file",
878
+ "video",
879
+ "code_interpreter_call",
880
+ "code_interpreter_output",
881
+ "code_interpreter_result",
882
+ "web_search_call",
883
+ "web_search_result",
884
+ }
885
+
886
+
887
+ def is_data_content_block(block: dict) -> bool:
74
888
  """Check if the content block is a standard data content block.
75
889
 
76
890
  Args:
77
- content_block: The content block to check.
891
+ block: The content block to check.
78
892
 
79
893
  Returns:
80
894
  True if the content block is a data content block, False otherwise.
81
895
  """
82
- try:
83
- _ = _DataContentBlockAdapter.validate_python(content_block)
84
- except ValidationError:
85
- return False
86
- else:
87
- return True
896
+ return block.get("type") in (
897
+ "audio",
898
+ "image",
899
+ "video",
900
+ "file",
901
+ "text-plain",
902
+ ) and any(
903
+ key in block
904
+ for key in (
905
+ "url",
906
+ "base64",
907
+ "file_id",
908
+ "text",
909
+ "source_type", # backwards compatibility
910
+ )
911
+ )
88
912
 
89
913
 
90
- def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict:
914
+ def convert_to_openai_image_block(block: dict[str, Any]) -> dict:
91
915
  """Convert image content block to format expected by OpenAI Chat Completions API."""
92
- if content_block["source_type"] == "url":
916
+ if "url" in block:
93
917
  return {
94
918
  "type": "image_url",
95
919
  "image_url": {
96
- "url": content_block["url"],
920
+ "url": block["url"],
97
921
  },
98
922
  }
99
- if content_block["source_type"] == "base64":
100
- if "mime_type" not in content_block:
923
+ if "base64" in block or block.get("source_type") == "base64":
924
+ if "mime_type" not in block:
101
925
  error_message = "mime_type key is required for base64 data."
102
926
  raise ValueError(error_message)
103
- mime_type = content_block["mime_type"]
927
+ mime_type = block["mime_type"]
928
+ base64_data = block["data"] if "data" in block else block["base64"]
104
929
  return {
105
930
  "type": "image_url",
106
931
  "image_url": {
107
- "url": f"data:{mime_type};base64,{content_block['data']}",
932
+ "url": f"data:{mime_type};base64,{base64_data}",
108
933
  },
109
934
  }
110
935
  error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
@@ -117,8 +942,9 @@ def convert_to_openai_data_block(block: dict) -> dict:
117
942
  formatted_block = convert_to_openai_image_block(block)
118
943
 
119
944
  elif block["type"] == "file":
120
- if block["source_type"] == "base64":
121
- file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
945
+ if "base64" in block or block.get("source_type") == "base64":
946
+ base64_data = block["data"] if "source_type" in block else block["base64"]
947
+ file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"}
122
948
  if filename := block.get("filename"):
123
949
  file["filename"] = filename
124
950
  elif (metadata := block.get("metadata")) and ("filename" in metadata):
@@ -126,30 +952,484 @@ def convert_to_openai_data_block(block: dict) -> dict:
126
952
  else:
127
953
  warnings.warn(
128
954
  "OpenAI may require a filename for file inputs. Specify a filename "
129
- "in the content block: {'type': 'file', 'source_type': 'base64', "
130
- "'mime_type': 'application/pdf', 'data': '...', "
131
- "'filename': 'my-pdf'}",
955
+ "in the content block: {'type': 'file', 'mime_type': "
956
+ "'application/pdf', 'base64': '...', 'filename': 'my-pdf'}",
132
957
  stacklevel=1,
133
958
  )
134
959
  formatted_block = {"type": "file", "file": file}
135
- elif block["source_type"] == "id":
136
- formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
960
+ elif "file_id" in block or block.get("source_type") == "id":
961
+ file_id = block["id"] if "source_type" in block else block["file_id"]
962
+ formatted_block = {"type": "file", "file": {"file_id": file_id}}
137
963
  else:
138
- error_msg = "source_type base64 or id is required for file blocks."
964
+ error_msg = "Keys base64 or file_id required for file blocks."
139
965
  raise ValueError(error_msg)
140
966
 
141
967
  elif block["type"] == "audio":
142
- if block["source_type"] == "base64":
968
+ if "base64" in block or block.get("source_type") == "base64":
969
+ base64_data = block["data"] if "source_type" in block else block["base64"]
143
970
  audio_format = block["mime_type"].split("/")[-1]
144
971
  formatted_block = {
145
972
  "type": "input_audio",
146
- "input_audio": {"data": block["data"], "format": audio_format},
973
+ "input_audio": {"data": base64_data, "format": audio_format},
147
974
  }
148
975
  else:
149
- error_msg = "source_type base64 is required for audio blocks."
976
+ error_msg = "Key base64 is required for audio blocks."
150
977
  raise ValueError(error_msg)
151
978
  else:
152
979
  error_msg = f"Block of type {block['type']} is not supported."
153
980
  raise ValueError(error_msg)
154
981
 
155
982
  return formatted_block
983
+
984
+
985
+ def create_text_block(
986
+ text: str,
987
+ *,
988
+ id: Optional[str] = None,
989
+ annotations: Optional[list[Annotation]] = None,
990
+ index: Optional[int] = None,
991
+ ) -> TextContentBlock:
992
+ """Create a ``TextContentBlock``.
993
+
994
+ Args:
995
+ text: The text content of the block.
996
+ id: Content block identifier. Generated automatically if not provided.
997
+ annotations: Citations and other annotations for the text.
998
+ index: Index of block in aggregate response. Used during streaming.
999
+
1000
+ Returns:
1001
+ A properly formatted ``TextContentBlock``.
1002
+
1003
+ .. note::
1004
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1005
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1006
+
1007
+ """
1008
+ block = TextContentBlock(
1009
+ type="text",
1010
+ text=text,
1011
+ id=_ensure_id(id),
1012
+ )
1013
+ if annotations is not None:
1014
+ block["annotations"] = annotations
1015
+ if index is not None:
1016
+ block["index"] = index
1017
+ return block
1018
+
1019
+
1020
+ def create_image_block(
1021
+ *,
1022
+ url: Optional[str] = None,
1023
+ base64: Optional[str] = None,
1024
+ file_id: Optional[str] = None,
1025
+ mime_type: Optional[str] = None,
1026
+ id: Optional[str] = None,
1027
+ index: Optional[int] = None,
1028
+ ) -> ImageContentBlock:
1029
+ """Create an ``ImageContentBlock``.
1030
+
1031
+ Args:
1032
+ url: URL of the image.
1033
+ base64: Base64-encoded image data.
1034
+ file_id: ID of the image file from a file storage system.
1035
+ mime_type: MIME type of the image. Required for base64 data.
1036
+ id: Content block identifier. Generated automatically if not provided.
1037
+ index: Index of block in aggregate response. Used during streaming.
1038
+
1039
+ Returns:
1040
+ A properly formatted ``ImageContentBlock``.
1041
+
1042
+ Raises:
1043
+ ValueError: If no image source is provided or if ``base64`` is used without
1044
+ ``mime_type``.
1045
+
1046
+ .. note::
1047
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1048
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1049
+
1050
+ """
1051
+ if not any([url, base64, file_id]):
1052
+ msg = "Must provide one of: url, base64, or file_id"
1053
+ raise ValueError(msg)
1054
+
1055
+ if base64 and not mime_type:
1056
+ msg = "mime_type is required when using base64 data"
1057
+ raise ValueError(msg)
1058
+
1059
+ block = ImageContentBlock(type="image", id=_ensure_id(id))
1060
+
1061
+ if url is not None:
1062
+ block["url"] = url
1063
+ if base64 is not None:
1064
+ block["base64"] = base64
1065
+ if file_id is not None:
1066
+ block["file_id"] = file_id
1067
+ if mime_type is not None:
1068
+ block["mime_type"] = mime_type
1069
+ if index is not None:
1070
+ block["index"] = index
1071
+
1072
+ return block
1073
+
1074
+
1075
+ def create_video_block(
1076
+ *,
1077
+ url: Optional[str] = None,
1078
+ base64: Optional[str] = None,
1079
+ file_id: Optional[str] = None,
1080
+ mime_type: Optional[str] = None,
1081
+ id: Optional[str] = None,
1082
+ index: Optional[int] = None,
1083
+ ) -> VideoContentBlock:
1084
+ """Create a ``VideoContentBlock``.
1085
+
1086
+ Args:
1087
+ url: URL of the video.
1088
+ base64: Base64-encoded video data.
1089
+ file_id: ID of the video file from a file storage system.
1090
+ mime_type: MIME type of the video. Required for base64 data.
1091
+ id: Content block identifier. Generated automatically if not provided.
1092
+ index: Index of block in aggregate response. Used during streaming.
1093
+
1094
+ Returns:
1095
+ A properly formatted ``VideoContentBlock``.
1096
+
1097
+ Raises:
1098
+ ValueError: If no video source is provided or if ``base64`` is used without
1099
+ ``mime_type``.
1100
+
1101
+ .. note::
1102
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1103
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1104
+
1105
+ """
1106
+ if not any([url, base64, file_id]):
1107
+ msg = "Must provide one of: url, base64, or file_id"
1108
+ raise ValueError(msg)
1109
+
1110
+ if base64 and not mime_type:
1111
+ msg = "mime_type is required when using base64 data"
1112
+ raise ValueError(msg)
1113
+
1114
+ block = VideoContentBlock(type="video", id=_ensure_id(id))
1115
+
1116
+ if url is not None:
1117
+ block["url"] = url
1118
+ if base64 is not None:
1119
+ block["base64"] = base64
1120
+ if file_id is not None:
1121
+ block["file_id"] = file_id
1122
+ if mime_type is not None:
1123
+ block["mime_type"] = mime_type
1124
+ if index is not None:
1125
+ block["index"] = index
1126
+
1127
+ return block
1128
+
1129
+
1130
+ def create_audio_block(
1131
+ *,
1132
+ url: Optional[str] = None,
1133
+ base64: Optional[str] = None,
1134
+ file_id: Optional[str] = None,
1135
+ mime_type: Optional[str] = None,
1136
+ id: Optional[str] = None,
1137
+ index: Optional[int] = None,
1138
+ ) -> AudioContentBlock:
1139
+ """Create an ``AudioContentBlock``.
1140
+
1141
+ Args:
1142
+ url: URL of the audio.
1143
+ base64: Base64-encoded audio data.
1144
+ file_id: ID of the audio file from a file storage system.
1145
+ mime_type: MIME type of the audio. Required for base64 data.
1146
+ id: Content block identifier. Generated automatically if not provided.
1147
+ index: Index of block in aggregate response. Used during streaming.
1148
+
1149
+ Returns:
1150
+ A properly formatted ``AudioContentBlock``.
1151
+
1152
+ Raises:
1153
+ ValueError: If no audio source is provided or if ``base64`` is used without
1154
+ ``mime_type``.
1155
+
1156
+ .. note::
1157
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1158
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1159
+
1160
+ """
1161
+ if not any([url, base64, file_id]):
1162
+ msg = "Must provide one of: url, base64, or file_id"
1163
+ raise ValueError(msg)
1164
+
1165
+ if base64 and not mime_type:
1166
+ msg = "mime_type is required when using base64 data"
1167
+ raise ValueError(msg)
1168
+
1169
+ block = AudioContentBlock(type="audio", id=_ensure_id(id))
1170
+
1171
+ if url is not None:
1172
+ block["url"] = url
1173
+ if base64 is not None:
1174
+ block["base64"] = base64
1175
+ if file_id is not None:
1176
+ block["file_id"] = file_id
1177
+ if mime_type is not None:
1178
+ block["mime_type"] = mime_type
1179
+ if index is not None:
1180
+ block["index"] = index
1181
+
1182
+ return block
1183
+
1184
+
1185
+ def create_file_block(
1186
+ *,
1187
+ url: Optional[str] = None,
1188
+ base64: Optional[str] = None,
1189
+ file_id: Optional[str] = None,
1190
+ mime_type: Optional[str] = None,
1191
+ id: Optional[str] = None,
1192
+ index: Optional[int] = None,
1193
+ ) -> FileContentBlock:
1194
+ """Create a ``FileContentBlock``.
1195
+
1196
+ Args:
1197
+ url: URL of the file.
1198
+ base64: Base64-encoded file data.
1199
+ file_id: ID of the file from a file storage system.
1200
+ mime_type: MIME type of the file. Required for base64 data.
1201
+ id: Content block identifier. Generated automatically if not provided.
1202
+ index: Index of block in aggregate response. Used during streaming.
1203
+
1204
+ Returns:
1205
+ A properly formatted ``FileContentBlock``.
1206
+
1207
+ Raises:
1208
+ ValueError: If no file source is provided or if ``base64`` is used without
1209
+ ``mime_type``.
1210
+
1211
+ .. note::
1212
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1213
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1214
+
1215
+ """
1216
+ if not any([url, base64, file_id]):
1217
+ msg = "Must provide one of: url, base64, or file_id"
1218
+ raise ValueError(msg)
1219
+
1220
+ if base64 and not mime_type:
1221
+ msg = "mime_type is required when using base64 data"
1222
+ raise ValueError(msg)
1223
+
1224
+ block = FileContentBlock(type="file", id=_ensure_id(id))
1225
+
1226
+ if url is not None:
1227
+ block["url"] = url
1228
+ if base64 is not None:
1229
+ block["base64"] = base64
1230
+ if file_id is not None:
1231
+ block["file_id"] = file_id
1232
+ if mime_type is not None:
1233
+ block["mime_type"] = mime_type
1234
+ if index is not None:
1235
+ block["index"] = index
1236
+
1237
+ return block
1238
+
1239
+
1240
+ def create_plaintext_block(
1241
+ text: str,
1242
+ *,
1243
+ url: Optional[str] = None,
1244
+ base64: Optional[str] = None,
1245
+ file_id: Optional[str] = None,
1246
+ title: Optional[str] = None,
1247
+ context: Optional[str] = None,
1248
+ id: Optional[str] = None,
1249
+ index: Optional[int] = None,
1250
+ ) -> PlainTextContentBlock:
1251
+ """Create a ``PlainTextContentBlock``.
1252
+
1253
+ Args:
1254
+ text: The plaintext content.
1255
+ url: URL of the plaintext file.
1256
+ base64: Base64-encoded plaintext data.
1257
+ file_id: ID of the plaintext file from a file storage system.
1258
+ title: Title of the text data.
1259
+ context: Context or description of the text content.
1260
+ id: Content block identifier. Generated automatically if not provided.
1261
+ index: Index of block in aggregate response. Used during streaming.
1262
+
1263
+ Returns:
1264
+ A properly formatted ``PlainTextContentBlock``.
1265
+
1266
+ .. note::
1267
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1268
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1269
+
1270
+ """
1271
+ block = PlainTextContentBlock(
1272
+ type="text-plain",
1273
+ mime_type="text/plain",
1274
+ text=text,
1275
+ id=_ensure_id(id),
1276
+ )
1277
+
1278
+ if url is not None:
1279
+ block["url"] = url
1280
+ if base64 is not None:
1281
+ block["base64"] = base64
1282
+ if file_id is not None:
1283
+ block["file_id"] = file_id
1284
+ if title is not None:
1285
+ block["title"] = title
1286
+ if context is not None:
1287
+ block["context"] = context
1288
+ if index is not None:
1289
+ block["index"] = index
1290
+
1291
+ return block
1292
+
1293
+
1294
+ def create_tool_call(
1295
+ name: str,
1296
+ args: dict[str, Any],
1297
+ *,
1298
+ id: Optional[str] = None,
1299
+ index: Optional[int] = None,
1300
+ ) -> ToolCall:
1301
+ """Create a ``ToolCall``.
1302
+
1303
+ Args:
1304
+ name: The name of the tool to be called.
1305
+ args: The arguments to the tool call.
1306
+ id: An identifier for the tool call. Generated automatically if not provided.
1307
+ index: Index of block in aggregate response. Used during streaming.
1308
+
1309
+ Returns:
1310
+ A properly formatted ``ToolCall``.
1311
+
1312
+ .. note::
1313
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1314
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1315
+
1316
+ """
1317
+ block = ToolCall(
1318
+ type="tool_call",
1319
+ name=name,
1320
+ args=args,
1321
+ id=_ensure_id(id),
1322
+ )
1323
+
1324
+ if index is not None:
1325
+ block["index"] = index
1326
+
1327
+ return block
1328
+
1329
+
1330
+ def create_reasoning_block(
1331
+ reasoning: Optional[str] = None,
1332
+ id: Optional[str] = None,
1333
+ index: Optional[int] = None,
1334
+ ) -> ReasoningContentBlock:
1335
+ """Create a ``ReasoningContentBlock``.
1336
+
1337
+ Args:
1338
+ reasoning: The reasoning text or thought summary.
1339
+ id: Content block identifier. Generated automatically if not provided.
1340
+ index: Index of block in aggregate response. Used during streaming.
1341
+
1342
+ Returns:
1343
+ A properly formatted ``ReasoningContentBlock``.
1344
+
1345
+ .. note::
1346
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1347
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1348
+
1349
+ """
1350
+ block = ReasoningContentBlock(
1351
+ type="reasoning",
1352
+ reasoning=reasoning or "",
1353
+ id=_ensure_id(id),
1354
+ )
1355
+
1356
+ if index is not None:
1357
+ block["index"] = index
1358
+
1359
+ return block
1360
+
1361
+
1362
+ def create_citation(
1363
+ *,
1364
+ url: Optional[str] = None,
1365
+ title: Optional[str] = None,
1366
+ start_index: Optional[int] = None,
1367
+ end_index: Optional[int] = None,
1368
+ cited_text: Optional[str] = None,
1369
+ id: Optional[str] = None,
1370
+ ) -> Citation:
1371
+ """Create a ``Citation``.
1372
+
1373
+ Args:
1374
+ url: URL of the document source.
1375
+ title: Source document title.
1376
+ start_index: Start index in the response text where citation applies.
1377
+ end_index: End index in the response text where citation applies.
1378
+ cited_text: Excerpt of source text being cited.
1379
+ id: Content block identifier. Generated automatically if not provided.
1380
+
1381
+ Returns:
1382
+ A properly formatted ``Citation``.
1383
+
1384
+ .. note::
1385
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1386
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1387
+
1388
+ """
1389
+ block = Citation(type="citation", id=_ensure_id(id))
1390
+
1391
+ if url is not None:
1392
+ block["url"] = url
1393
+ if title is not None:
1394
+ block["title"] = title
1395
+ if start_index is not None:
1396
+ block["start_index"] = start_index
1397
+ if end_index is not None:
1398
+ block["end_index"] = end_index
1399
+ if cited_text is not None:
1400
+ block["cited_text"] = cited_text
1401
+
1402
+ return block
1403
+
1404
+
1405
+ def create_non_standard_block(
1406
+ value: dict[str, Any],
1407
+ *,
1408
+ id: Optional[str] = None,
1409
+ index: Optional[int] = None,
1410
+ ) -> NonStandardContentBlock:
1411
+ """Create a ``NonStandardContentBlock``.
1412
+
1413
+ Args:
1414
+ value: Provider-specific data.
1415
+ id: Content block identifier. Generated automatically if not provided.
1416
+ index: Index of block in aggregate response. Used during streaming.
1417
+
1418
+ Returns:
1419
+ A properly formatted ``NonStandardContentBlock``.
1420
+
1421
+ .. note::
1422
+ The ``id`` is generated automatically if not provided, using a UUID4 format
1423
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
1424
+
1425
+ """
1426
+ block = NonStandardContentBlock(
1427
+ type="non_standard",
1428
+ value=value,
1429
+ id=_ensure_id(id),
1430
+ )
1431
+
1432
+ if index is not None:
1433
+ block["index"] = index
1434
+
1435
+ return block