langchain-core 1.0.0rc3__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/agents.py +2 -4
- langchain_core/caches.py +16 -7
- langchain_core/callbacks/base.py +0 -4
- langchain_core/callbacks/manager.py +0 -11
- langchain_core/chat_history.py +5 -5
- langchain_core/document_loaders/base.py +6 -4
- langchain_core/document_loaders/blob_loaders.py +1 -1
- langchain_core/document_loaders/langsmith.py +9 -13
- langchain_core/documents/__init__.py +24 -3
- langchain_core/documents/base.py +72 -61
- langchain_core/documents/compressor.py +6 -6
- langchain_core/documents/transformers.py +6 -6
- langchain_core/embeddings/fake.py +2 -2
- langchain_core/example_selectors/semantic_similarity.py +7 -7
- langchain_core/exceptions.py +2 -2
- langchain_core/indexing/__init__.py +1 -1
- langchain_core/indexing/api.py +62 -62
- langchain_core/indexing/base.py +20 -22
- langchain_core/indexing/in_memory.py +2 -4
- langchain_core/language_models/__init__.py +6 -5
- langchain_core/language_models/base.py +7 -8
- langchain_core/language_models/chat_models.py +84 -78
- langchain_core/language_models/fake_chat_models.py +1 -1
- langchain_core/language_models/llms.py +20 -18
- langchain_core/load/dump.py +6 -8
- langchain_core/load/serializable.py +4 -1
- langchain_core/messages/__init__.py +9 -0
- langchain_core/messages/ai.py +11 -7
- langchain_core/messages/base.py +4 -0
- langchain_core/messages/block_translators/google_genai.py +5 -3
- langchain_core/messages/content.py +4 -4
- langchain_core/messages/utils.py +17 -17
- langchain_core/output_parsers/__init__.py +17 -1
- langchain_core/output_parsers/base.py +3 -0
- langchain_core/output_parsers/format_instructions.py +9 -4
- langchain_core/output_parsers/json.py +5 -2
- langchain_core/output_parsers/list.py +16 -16
- langchain_core/output_parsers/openai_tools.py +2 -2
- langchain_core/output_parsers/pydantic.py +1 -1
- langchain_core/output_parsers/string.py +3 -3
- langchain_core/output_parsers/xml.py +28 -25
- langchain_core/outputs/generation.py +2 -3
- langchain_core/prompt_values.py +0 -6
- langchain_core/prompts/base.py +5 -3
- langchain_core/prompts/chat.py +60 -52
- langchain_core/prompts/string.py +5 -2
- langchain_core/prompts/structured.py +12 -8
- langchain_core/rate_limiters.py +1 -3
- langchain_core/retrievers.py +41 -37
- langchain_core/runnables/base.py +25 -29
- langchain_core/runnables/branch.py +9 -9
- langchain_core/runnables/config.py +2 -4
- langchain_core/runnables/configurable.py +3 -3
- langchain_core/runnables/fallbacks.py +1 -1
- langchain_core/runnables/graph.py +7 -3
- langchain_core/runnables/retry.py +1 -1
- langchain_core/runnables/schema.py +2 -5
- langchain_core/runnables/utils.py +3 -3
- langchain_core/stores.py +4 -6
- langchain_core/tools/base.py +68 -14
- langchain_core/tools/convert.py +8 -7
- langchain_core/tools/retriever.py +6 -5
- langchain_core/tools/structured.py +7 -5
- langchain_core/tracers/event_stream.py +4 -1
- langchain_core/tracers/log_stream.py +6 -3
- langchain_core/utils/function_calling.py +8 -0
- langchain_core/utils/json_schema.py +1 -1
- langchain_core/utils/strings.py +1 -4
- langchain_core/utils/utils.py +12 -5
- langchain_core/vectorstores/base.py +130 -130
- langchain_core/vectorstores/in_memory.py +4 -4
- langchain_core/vectorstores/utils.py +1 -1
- langchain_core/version.py +1 -1
- {langchain_core-1.0.0rc3.dist-info → langchain_core-1.0.2.dist-info}/METADATA +8 -7
- {langchain_core-1.0.0rc3.dist-info → langchain_core-1.0.2.dist-info}/RECORD +76 -76
- {langchain_core-1.0.0rc3.dist-info → langchain_core-1.0.2.dist-info}/WHEEL +0 -0
langchain_core/messages/utils.py
CHANGED
|
@@ -86,7 +86,7 @@ AnyMessage = Annotated[
|
|
|
86
86
|
| Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
|
|
87
87
|
Field(discriminator=Discriminator(_get_type)),
|
|
88
88
|
]
|
|
89
|
-
"""
|
|
89
|
+
"""A type representing any defined `Message` or `MessageChunk` type."""
|
|
90
90
|
|
|
91
91
|
|
|
92
92
|
def get_buffer_string(
|
|
@@ -439,8 +439,8 @@ def filter_messages(
|
|
|
439
439
|
exclude_ids: Message IDs to exclude.
|
|
440
440
|
exclude_tool_calls: Tool call IDs to exclude.
|
|
441
441
|
Can be one of the following:
|
|
442
|
-
- `True`:
|
|
443
|
-
|
|
442
|
+
- `True`: All `AIMessage` objects with tool calls and all `ToolMessage`
|
|
443
|
+
objects will be excluded.
|
|
444
444
|
- a sequence of tool call IDs to exclude:
|
|
445
445
|
- `ToolMessage` objects with the corresponding tool call ID will be
|
|
446
446
|
excluded.
|
|
@@ -1025,18 +1025,18 @@ def convert_to_openai_messages(
|
|
|
1025
1025
|
messages: Message-like object or iterable of objects whose contents are
|
|
1026
1026
|
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
|
|
1027
1027
|
text_format: How to format string or text block contents:
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
include_id: Whether to include message
|
|
1039
|
-
|
|
1028
|
+
- `'string'`:
|
|
1029
|
+
If a message has a string content, this is left as a string. If
|
|
1030
|
+
a message has content blocks that are all of type `'text'`, these
|
|
1031
|
+
are joined with a newline to make a single string. If a message has
|
|
1032
|
+
content blocks and at least one isn't of type `'text'`, then
|
|
1033
|
+
all blocks are left as dicts.
|
|
1034
|
+
- `'block'`:
|
|
1035
|
+
If a message has a string content, this is turned into a list
|
|
1036
|
+
with a single content block of type `'text'`. If a message has
|
|
1037
|
+
content blocks these are left as is.
|
|
1038
|
+
include_id: Whether to include message IDs in the openai messages, if they
|
|
1039
|
+
are present in the source messages.
|
|
1040
1040
|
|
|
1041
1041
|
Raises:
|
|
1042
1042
|
ValueError: if an unrecognized `text_format` is specified, or if a message
|
|
@@ -1678,11 +1678,11 @@ def count_tokens_approximately(
|
|
|
1678
1678
|
messages: List of messages to count tokens for.
|
|
1679
1679
|
chars_per_token: Number of characters per token to use for the approximation.
|
|
1680
1680
|
One token corresponds to ~4 chars for common English text.
|
|
1681
|
-
You can also specify float values for more fine-grained control.
|
|
1681
|
+
You can also specify `float` values for more fine-grained control.
|
|
1682
1682
|
[See more here](https://platform.openai.com/tokenizer).
|
|
1683
1683
|
extra_tokens_per_message: Number of extra tokens to add per message, e.g.
|
|
1684
1684
|
special tokens, including beginning/end of message.
|
|
1685
|
-
You can also specify float values for more fine-grained control.
|
|
1685
|
+
You can also specify `float` values for more fine-grained control.
|
|
1686
1686
|
[See more here](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb).
|
|
1687
1687
|
count_name: Whether to include message names in the count.
|
|
1688
1688
|
Enabled by default.
|
|
@@ -1,4 +1,20 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""`OutputParser` classes parse the output of an LLM call into structured data.
|
|
2
|
+
|
|
3
|
+
!!! tip "Structured output"
|
|
4
|
+
|
|
5
|
+
Output parsers emerged as an early solution to the challenge of obtaining structured
|
|
6
|
+
output from LLMs.
|
|
7
|
+
|
|
8
|
+
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
|
9
|
+
natively. In such cases, using output parsers may be unnecessary, and you should
|
|
10
|
+
leverage the model's built-in capabilities for structured output. Refer to the
|
|
11
|
+
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
|
|
12
|
+
for guidance on how to achieve structured output directly.
|
|
13
|
+
|
|
14
|
+
Output parsers remain valuable when working with models that do not support
|
|
15
|
+
structured output natively, or when you require additional processing or validation
|
|
16
|
+
of the model's output beyond its inherent capabilities.
|
|
17
|
+
"""
|
|
2
18
|
|
|
3
19
|
from typing import TYPE_CHECKING
|
|
4
20
|
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
"""Format instructions."""
|
|
2
2
|
|
|
3
|
-
JSON_FORMAT_INSTRUCTIONS = """
|
|
3
|
+
JSON_FORMAT_INSTRUCTIONS = """STRICT OUTPUT FORMAT:
|
|
4
|
+
- Return only the JSON value that conforms to the schema. Do not include any additional text, explanations, headings, or separators.
|
|
5
|
+
- Do not wrap the JSON in Markdown or code fences (no ``` or ```json).
|
|
6
|
+
- Do not prepend or append any text (e.g., do not write "Here is the JSON:").
|
|
7
|
+
- The response must be a single top-level JSON value exactly as required by the schema (object/array/etc.), with no trailing commas or comments.
|
|
4
8
|
|
|
5
|
-
|
|
6
|
-
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
|
9
|
+
The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
|
7
10
|
|
|
8
|
-
|
|
11
|
+
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}} the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
|
12
|
+
|
|
13
|
+
Here is the output schema (shown in a code block for readability only — do not include any backticks or Markdown in your output):
|
|
9
14
|
```
|
|
10
15
|
{schema}
|
|
11
16
|
```""" # noqa: E501
|
|
@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
|
|
|
31
31
|
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
|
32
32
|
"""Parse the output of an LLM call to a JSON object.
|
|
33
33
|
|
|
34
|
+
Probably the most reliable output parser for getting structured data that does *not*
|
|
35
|
+
use function calling.
|
|
36
|
+
|
|
34
37
|
When used in streaming mode, it will yield partial JSON objects containing
|
|
35
38
|
all the keys that have been returned so far.
|
|
36
39
|
|
|
37
|
-
In streaming, if `diff` is set to `True`, yields JSONPatch operations
|
|
38
|
-
|
|
40
|
+
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
|
|
41
|
+
difference between the previous and the current object.
|
|
39
42
|
"""
|
|
40
43
|
|
|
41
44
|
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]
|
|
@@ -41,7 +41,7 @@ def droplastn(
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
|
44
|
-
"""Parse the output of
|
|
44
|
+
"""Parse the output of a model to a list."""
|
|
45
45
|
|
|
46
46
|
@property
|
|
47
47
|
def _type(self) -> str:
|
|
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
|
|
74
74
|
buffer = ""
|
|
75
75
|
for chunk in input:
|
|
76
76
|
if isinstance(chunk, BaseMessage):
|
|
77
|
-
#
|
|
77
|
+
# Extract text
|
|
78
78
|
chunk_content = chunk.content
|
|
79
79
|
if not isinstance(chunk_content, str):
|
|
80
80
|
continue
|
|
81
81
|
buffer += chunk_content
|
|
82
82
|
else:
|
|
83
|
-
#
|
|
83
|
+
# Add current chunk to buffer
|
|
84
84
|
buffer += chunk
|
|
85
|
-
#
|
|
85
|
+
# Parse buffer into a list of parts
|
|
86
86
|
try:
|
|
87
87
|
done_idx = 0
|
|
88
|
-
#
|
|
88
|
+
# Yield only complete parts
|
|
89
89
|
for m in droplastn(self.parse_iter(buffer), 1):
|
|
90
90
|
done_idx = m.end()
|
|
91
91
|
yield [m.group(1)]
|
|
92
92
|
buffer = buffer[done_idx:]
|
|
93
93
|
except NotImplementedError:
|
|
94
94
|
parts = self.parse(buffer)
|
|
95
|
-
#
|
|
95
|
+
# Yield only complete parts
|
|
96
96
|
if len(parts) > 1:
|
|
97
97
|
for part in parts[:-1]:
|
|
98
98
|
yield [part]
|
|
99
99
|
buffer = parts[-1]
|
|
100
|
-
#
|
|
100
|
+
# Yield the last part
|
|
101
101
|
for part in self.parse(buffer):
|
|
102
102
|
yield [part]
|
|
103
103
|
|
|
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
|
|
108
108
|
buffer = ""
|
|
109
109
|
async for chunk in input:
|
|
110
110
|
if isinstance(chunk, BaseMessage):
|
|
111
|
-
#
|
|
111
|
+
# Extract text
|
|
112
112
|
chunk_content = chunk.content
|
|
113
113
|
if not isinstance(chunk_content, str):
|
|
114
114
|
continue
|
|
115
115
|
buffer += chunk_content
|
|
116
116
|
else:
|
|
117
|
-
#
|
|
117
|
+
# Add current chunk to buffer
|
|
118
118
|
buffer += chunk
|
|
119
|
-
#
|
|
119
|
+
# Parse buffer into a list of parts
|
|
120
120
|
try:
|
|
121
121
|
done_idx = 0
|
|
122
|
-
#
|
|
122
|
+
# Yield only complete parts
|
|
123
123
|
for m in droplastn(self.parse_iter(buffer), 1):
|
|
124
124
|
done_idx = m.end()
|
|
125
125
|
yield [m.group(1)]
|
|
126
126
|
buffer = buffer[done_idx:]
|
|
127
127
|
except NotImplementedError:
|
|
128
128
|
parts = self.parse(buffer)
|
|
129
|
-
#
|
|
129
|
+
# Yield only complete parts
|
|
130
130
|
if len(parts) > 1:
|
|
131
131
|
for part in parts[:-1]:
|
|
132
132
|
yield [part]
|
|
133
133
|
buffer = parts[-1]
|
|
134
|
-
#
|
|
134
|
+
# Yield the last part
|
|
135
135
|
for part in self.parse(buffer):
|
|
136
136
|
yield [part]
|
|
137
137
|
|
|
138
138
|
|
|
139
139
|
class CommaSeparatedListOutputParser(ListOutputParser):
|
|
140
|
-
"""Parse the output of
|
|
140
|
+
"""Parse the output of a model to a comma-separated list."""
|
|
141
141
|
|
|
142
142
|
@classmethod
|
|
143
143
|
def is_lc_serializable(cls) -> bool:
|
|
144
|
-
"""Return True as this class is serializable."""
|
|
144
|
+
"""Return `True` as this class is serializable."""
|
|
145
145
|
return True
|
|
146
146
|
|
|
147
147
|
@classmethod
|
|
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
|
|
177
177
|
)
|
|
178
178
|
return [item for sublist in reader for item in sublist]
|
|
179
179
|
except csv.Error:
|
|
180
|
-
#
|
|
180
|
+
# Keep old logic for backup
|
|
181
181
|
return [part.strip() for part in text.split(",")]
|
|
182
182
|
|
|
183
183
|
@property
|
|
@@ -224,7 +224,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
|
|
|
224
224
|
result: The result of the LLM call.
|
|
225
225
|
partial: Whether to parse partial JSON.
|
|
226
226
|
If `True`, the output will be a JSON object containing
|
|
227
|
-
|
|
227
|
+
all the keys that have been returned so far.
|
|
228
228
|
If `False`, the output will be the full JSON object.
|
|
229
229
|
|
|
230
230
|
Raises:
|
|
@@ -307,7 +307,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
|
|
|
307
307
|
result: The result of the LLM call.
|
|
308
308
|
partial: Whether to parse partial JSON.
|
|
309
309
|
If `True`, the output will be a JSON object containing
|
|
310
|
-
|
|
310
|
+
all the keys that have been returned so far.
|
|
311
311
|
If `False`, the output will be the full JSON object.
|
|
312
312
|
|
|
313
313
|
Returns:
|
|
@@ -86,7 +86,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
|
|
|
86
86
|
The format instructions for the JSON output.
|
|
87
87
|
"""
|
|
88
88
|
# Copy schema to avoid altering original Pydantic schema.
|
|
89
|
-
schema = dict(self.pydantic_object
|
|
89
|
+
schema = dict(self._get_schema(self.pydantic_object).items())
|
|
90
90
|
|
|
91
91
|
# Remove extraneous fields.
|
|
92
92
|
reduced_schema = schema
|
|
@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class StrOutputParser(BaseTransformOutputParser[str]):
|
|
9
|
-
"""OutputParser that parses LLMResult into the top likely string."""
|
|
9
|
+
"""OutputParser that parses `LLMResult` into the top likely string."""
|
|
10
10
|
|
|
11
11
|
@classmethod
|
|
12
12
|
def is_lc_serializable(cls) -> bool:
|
|
13
|
-
"""StrOutputParser is serializable.
|
|
13
|
+
"""`StrOutputParser` is serializable.
|
|
14
14
|
|
|
15
15
|
Returns:
|
|
16
|
-
True
|
|
16
|
+
`True`
|
|
17
17
|
"""
|
|
18
18
|
return True
|
|
19
19
|
|
|
@@ -43,19 +43,19 @@ class _StreamingParser:
|
|
|
43
43
|
"""Streaming parser for XML.
|
|
44
44
|
|
|
45
45
|
This implementation is pulled into a class to avoid implementation
|
|
46
|
-
drift between transform and atransform of the XMLOutputParser
|
|
46
|
+
drift between transform and atransform of the `XMLOutputParser`.
|
|
47
47
|
"""
|
|
48
48
|
|
|
49
49
|
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
|
|
50
50
|
"""Initialize the streaming parser.
|
|
51
51
|
|
|
52
52
|
Args:
|
|
53
|
-
parser: Parser to use for XML parsing. Can be either 'defusedxml' or
|
|
54
|
-
|
|
53
|
+
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
|
|
54
|
+
`'xml'`. See documentation in `XMLOutputParser` for more information.
|
|
55
55
|
|
|
56
56
|
Raises:
|
|
57
|
-
ImportError: If defusedxml is not installed and the defusedxml
|
|
58
|
-
|
|
57
|
+
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
|
|
58
|
+
requested.
|
|
59
59
|
"""
|
|
60
60
|
if parser == "defusedxml":
|
|
61
61
|
if not _HAS_DEFUSEDXML:
|
|
@@ -79,10 +79,10 @@ class _StreamingParser:
|
|
|
79
79
|
"""Parse a chunk of text.
|
|
80
80
|
|
|
81
81
|
Args:
|
|
82
|
-
chunk: A chunk of text to parse. This can be a
|
|
82
|
+
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
|
|
83
83
|
|
|
84
84
|
Yields:
|
|
85
|
-
A
|
|
85
|
+
A `dict` representing the parsed XML element.
|
|
86
86
|
|
|
87
87
|
Raises:
|
|
88
88
|
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
|
|
@@ -147,46 +147,49 @@ class _StreamingParser:
|
|
|
147
147
|
|
|
148
148
|
|
|
149
149
|
class XMLOutputParser(BaseTransformOutputParser):
|
|
150
|
-
"""Parse an output using xml format.
|
|
150
|
+
"""Parse an output using xml format.
|
|
151
|
+
|
|
152
|
+
Returns a dictionary of tags.
|
|
153
|
+
"""
|
|
151
154
|
|
|
152
155
|
tags: list[str] | None = None
|
|
153
156
|
"""Tags to tell the LLM to expect in the XML output.
|
|
154
157
|
|
|
155
158
|
Note this may not be perfect depending on the LLM implementation.
|
|
156
159
|
|
|
157
|
-
For example, with tags=["foo", "bar", "baz"]
|
|
160
|
+
For example, with `tags=["foo", "bar", "baz"]`:
|
|
158
161
|
|
|
159
162
|
1. A well-formatted XML instance:
|
|
160
|
-
|
|
163
|
+
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
|
|
161
164
|
|
|
162
165
|
2. A badly-formatted XML instance (missing closing tag for 'bar'):
|
|
163
|
-
|
|
166
|
+
`"<foo>\n <bar>\n </foo>"`
|
|
164
167
|
|
|
165
168
|
3. A badly-formatted XML instance (unexpected 'tag' element):
|
|
166
|
-
|
|
169
|
+
`"<foo>\n <tag>\n </tag>\n</foo>"`
|
|
167
170
|
"""
|
|
168
171
|
encoding_matcher: re.Pattern = re.compile(
|
|
169
172
|
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
|
|
170
173
|
)
|
|
171
174
|
parser: Literal["defusedxml", "xml"] = "defusedxml"
|
|
172
|
-
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'
|
|
175
|
+
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
|
|
173
176
|
|
|
174
|
-
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
* 'xml' is the standard library parser.
|
|
177
|
+
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
|
|
178
|
+
present in some distributions of Python's standard library xml.
|
|
179
|
+
`defusedxml` is a wrapper around the standard library parser that
|
|
180
|
+
sets up the parser with secure defaults.
|
|
181
|
+
* `'xml'` is the standard library parser.
|
|
179
182
|
|
|
180
|
-
Use `xml` only if you are sure that your distribution of the standard library
|
|
181
|
-
|
|
183
|
+
Use `xml` only if you are sure that your distribution of the standard library is not
|
|
184
|
+
vulnerable to XML vulnerabilities.
|
|
182
185
|
|
|
183
186
|
Please review the following resources for more information:
|
|
184
187
|
|
|
185
188
|
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
|
|
186
189
|
* https://github.com/tiran/defusedxml
|
|
187
190
|
|
|
188
|
-
The standard library relies on libexpat
|
|
189
|
-
|
|
191
|
+
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
|
|
192
|
+
for parsing XML.
|
|
190
193
|
"""
|
|
191
194
|
|
|
192
195
|
def get_format_instructions(self) -> str:
|
|
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
|
|
|
200
203
|
text: The output of an LLM call.
|
|
201
204
|
|
|
202
205
|
Returns:
|
|
203
|
-
A
|
|
206
|
+
A `dict` representing the parsed XML.
|
|
204
207
|
|
|
205
208
|
Raises:
|
|
206
209
|
OutputParserException: If the XML is not well-formed.
|
|
207
|
-
ImportError: If
|
|
208
|
-
|
|
210
|
+
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
|
|
211
|
+
requested.
|
|
209
212
|
"""
|
|
210
213
|
# Try to find XML string within triple backticks
|
|
211
214
|
# Imports are temporarily placed here to avoid issue with caching on CI
|
|
@@ -11,9 +11,8 @@ from langchain_core.utils._merge import merge_dicts
|
|
|
11
11
|
class Generation(Serializable):
|
|
12
12
|
"""A single text generation output.
|
|
13
13
|
|
|
14
|
-
Generation represents the response from an
|
|
15
|
-
|
|
16
|
-
generates regular text (not chat messages).
|
|
14
|
+
Generation represents the response from an "old-fashioned" LLM (string-in,
|
|
15
|
+
string-out) that generates regular text (not chat messages).
|
|
17
16
|
|
|
18
17
|
This model is used internally by chat model and will eventually
|
|
19
18
|
be mapped to a more general `LLMResult` object, and then projected into
|
langchain_core/prompt_values.py
CHANGED
|
@@ -37,8 +37,6 @@ class PromptValue(Serializable, ABC):
|
|
|
37
37
|
def get_lc_namespace(cls) -> list[str]:
|
|
38
38
|
"""Get the namespace of the LangChain object.
|
|
39
39
|
|
|
40
|
-
This is used to determine the namespace of the object when serializing.
|
|
41
|
-
|
|
42
40
|
Returns:
|
|
43
41
|
`["langchain", "schema", "prompt"]`
|
|
44
42
|
"""
|
|
@@ -64,8 +62,6 @@ class StringPromptValue(PromptValue):
|
|
|
64
62
|
def get_lc_namespace(cls) -> list[str]:
|
|
65
63
|
"""Get the namespace of the LangChain object.
|
|
66
64
|
|
|
67
|
-
This is used to determine the namespace of the object when serializing.
|
|
68
|
-
|
|
69
65
|
Returns:
|
|
70
66
|
`["langchain", "prompts", "base"]`
|
|
71
67
|
"""
|
|
@@ -101,8 +97,6 @@ class ChatPromptValue(PromptValue):
|
|
|
101
97
|
def get_lc_namespace(cls) -> list[str]:
|
|
102
98
|
"""Get the namespace of the LangChain object.
|
|
103
99
|
|
|
104
|
-
This is used to determine the namespace of the object when serializing.
|
|
105
|
-
|
|
106
100
|
Returns:
|
|
107
101
|
`["langchain", "prompts", "chat"]`
|
|
108
102
|
"""
|
langchain_core/prompts/base.py
CHANGED
|
@@ -48,11 +48,13 @@ class BasePromptTemplate(
|
|
|
48
48
|
"""A list of the names of the variables whose values are required as inputs to the
|
|
49
49
|
prompt."""
|
|
50
50
|
optional_variables: list[str] = Field(default=[])
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
"""A list of the names of the variables for placeholder or `MessagePlaceholder` that
|
|
52
|
+
are optional.
|
|
53
|
+
|
|
54
|
+
These variables are auto inferred from the prompt and user need not provide them."""
|
|
54
55
|
input_types: typing.Dict[str, Any] = Field(default_factory=dict, exclude=True) # noqa: UP006
|
|
55
56
|
"""A dictionary of the types of the variables the prompt template expects.
|
|
57
|
+
|
|
56
58
|
If not provided, all variables are assumed to be strings."""
|
|
57
59
|
output_parser: BaseOutputParser | None = None
|
|
58
60
|
"""How to parse the output of calling an LLM on this formatted prompt."""
|
langchain_core/prompts/chat.py
CHANGED
|
@@ -776,42 +776,36 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
|
|
776
776
|
|
|
777
777
|
Use to create flexible templated prompts for chat models.
|
|
778
778
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
("system", "You are a helpful AI bot. Your name is {name}."),
|
|
791
|
-
("human", "Hello, how are you doing?"),
|
|
792
|
-
("ai", "I'm doing well, thanks!"),
|
|
793
|
-
("human", "{user_input}"),
|
|
794
|
-
]
|
|
795
|
-
)
|
|
796
|
-
|
|
797
|
-
prompt_value = template.invoke(
|
|
798
|
-
{
|
|
799
|
-
"name": "Bob",
|
|
800
|
-
"user_input": "What is your name?",
|
|
801
|
-
}
|
|
802
|
-
)
|
|
803
|
-
# Output:
|
|
804
|
-
# ChatPromptValue(
|
|
805
|
-
# messages=[
|
|
806
|
-
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
|
807
|
-
# HumanMessage(content='Hello, how are you doing?'),
|
|
808
|
-
# AIMessage(content="I'm doing well, thanks!"),
|
|
809
|
-
# HumanMessage(content='What is your name?')
|
|
810
|
-
# ]
|
|
811
|
-
# )
|
|
812
|
-
```
|
|
779
|
+
```python
|
|
780
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
781
|
+
|
|
782
|
+
template = ChatPromptTemplate(
|
|
783
|
+
[
|
|
784
|
+
("system", "You are a helpful AI bot. Your name is {name}."),
|
|
785
|
+
("human", "Hello, how are you doing?"),
|
|
786
|
+
("ai", "I'm doing well, thanks!"),
|
|
787
|
+
("human", "{user_input}"),
|
|
788
|
+
]
|
|
789
|
+
)
|
|
813
790
|
|
|
814
|
-
|
|
791
|
+
prompt_value = template.invoke(
|
|
792
|
+
{
|
|
793
|
+
"name": "Bob",
|
|
794
|
+
"user_input": "What is your name?",
|
|
795
|
+
}
|
|
796
|
+
)
|
|
797
|
+
# Output:
|
|
798
|
+
# ChatPromptValue(
|
|
799
|
+
# messages=[
|
|
800
|
+
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
|
801
|
+
# HumanMessage(content='Hello, how are you doing?'),
|
|
802
|
+
# AIMessage(content="I'm doing well, thanks!"),
|
|
803
|
+
# HumanMessage(content='What is your name?')
|
|
804
|
+
# ]
|
|
805
|
+
# )
|
|
806
|
+
```
|
|
807
|
+
|
|
808
|
+
!!! note "Messages Placeholder"
|
|
815
809
|
|
|
816
810
|
```python
|
|
817
811
|
# In addition to Human/AI/Tool/Function messages,
|
|
@@ -852,13 +846,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
|
|
852
846
|
# )
|
|
853
847
|
```
|
|
854
848
|
|
|
855
|
-
Single-variable template
|
|
849
|
+
!!! note "Single-variable template"
|
|
856
850
|
|
|
857
851
|
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
|
|
858
852
|
and you invoke the template with a non-dict object, the prompt template will
|
|
859
853
|
inject the provided argument into that variable location.
|
|
860
854
|
|
|
861
|
-
|
|
862
855
|
```python
|
|
863
856
|
from langchain_core.prompts import ChatPromptTemplate
|
|
864
857
|
|
|
@@ -898,25 +891,35 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
|
|
898
891
|
"""Create a chat prompt template from a variety of message formats.
|
|
899
892
|
|
|
900
893
|
Args:
|
|
901
|
-
messages:
|
|
894
|
+
messages: Sequence of message representations.
|
|
895
|
+
|
|
902
896
|
A message can be represented using the following formats:
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
897
|
+
|
|
898
|
+
1. `BaseMessagePromptTemplate`
|
|
899
|
+
2. `BaseMessage`
|
|
900
|
+
3. 2-tuple of `(message type, template)`; e.g.,
|
|
901
|
+
`("human", "{user_input}")`
|
|
902
|
+
4. 2-tuple of `(message class, template)`
|
|
903
|
+
5. A string which is shorthand for `("human", template)`; e.g.,
|
|
904
|
+
`"{user_input}"`
|
|
905
|
+
template_format: Format of the template.
|
|
908
906
|
input_variables: A list of the names of the variables whose values are
|
|
909
907
|
required as inputs to the prompt.
|
|
910
908
|
optional_variables: A list of the names of the variables for placeholder
|
|
911
909
|
or MessagePlaceholder that are optional.
|
|
910
|
+
|
|
912
911
|
These variables are auto inferred from the prompt and user need not
|
|
913
912
|
provide them.
|
|
914
913
|
partial_variables: A dictionary of the partial variables the prompt
|
|
915
|
-
template carries.
|
|
916
|
-
|
|
914
|
+
template carries.
|
|
915
|
+
|
|
916
|
+
Partial variables populate the template so that you don't need to pass
|
|
917
|
+
them in every time you call the prompt.
|
|
917
918
|
validate_template: Whether to validate the template.
|
|
918
919
|
input_types: A dictionary of the types of the variables the prompt template
|
|
919
|
-
expects.
|
|
920
|
+
expects.
|
|
921
|
+
|
|
922
|
+
If not provided, all variables are assumed to be strings.
|
|
920
923
|
|
|
921
924
|
Examples:
|
|
922
925
|
Instantiation from a list of message templates:
|
|
@@ -1121,12 +1124,17 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
|
|
1121
1124
|
)
|
|
1122
1125
|
```
|
|
1123
1126
|
Args:
|
|
1124
|
-
messages:
|
|
1127
|
+
messages: Sequence of message representations.
|
|
1128
|
+
|
|
1125
1129
|
A message can be represented using the following formats:
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
+
|
|
1131
|
+
1. `BaseMessagePromptTemplate`
|
|
1132
|
+
2. `BaseMessage`
|
|
1133
|
+
3. 2-tuple of `(message type, template)`; e.g.,
|
|
1134
|
+
`("human", "{user_input}")`
|
|
1135
|
+
4. 2-tuple of `(message class, template)`
|
|
1136
|
+
5. A string which is shorthand for `("human", template)`; e.g.,
|
|
1137
|
+
`"{user_input}"`
|
|
1130
1138
|
template_format: format of the template.
|
|
1131
1139
|
|
|
1132
1140
|
Returns:
|
|
@@ -1238,7 +1246,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
|
|
1238
1246
|
"""Extend the chat template with a sequence of messages.
|
|
1239
1247
|
|
|
1240
1248
|
Args:
|
|
1241
|
-
messages:
|
|
1249
|
+
messages: Sequence of message representations to append.
|
|
1242
1250
|
"""
|
|
1243
1251
|
self.messages.extend(
|
|
1244
1252
|
[_convert_to_message_template(message) for message in messages]
|
langchain_core/prompts/string.py
CHANGED
|
@@ -122,13 +122,16 @@ def mustache_formatter(template: str, /, **kwargs: Any) -> str:
|
|
|
122
122
|
def mustache_template_vars(
|
|
123
123
|
template: str,
|
|
124
124
|
) -> set[str]:
|
|
125
|
-
"""Get the variables from a mustache template.
|
|
125
|
+
"""Get the top-level variables from a mustache template.
|
|
126
|
+
|
|
127
|
+
For nested variables like `{{person.name}}`, only the top-level
|
|
128
|
+
key (`person`) is returned.
|
|
126
129
|
|
|
127
130
|
Args:
|
|
128
131
|
template: The template string.
|
|
129
132
|
|
|
130
133
|
Returns:
|
|
131
|
-
|
|
134
|
+
The top-level variables from the template.
|
|
132
135
|
"""
|
|
133
136
|
variables: set[str] = set()
|
|
134
137
|
section_depth = 0
|