langchain-core 1.0.0a6__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_core/__init__.py +1 -1
- langchain_core/_api/__init__.py +3 -4
- langchain_core/_api/beta_decorator.py +23 -26
- langchain_core/_api/deprecation.py +51 -64
- langchain_core/_api/path.py +3 -6
- langchain_core/_import_utils.py +3 -4
- langchain_core/agents.py +55 -48
- langchain_core/caches.py +65 -66
- langchain_core/callbacks/__init__.py +1 -8
- langchain_core/callbacks/base.py +321 -336
- langchain_core/callbacks/file.py +44 -44
- langchain_core/callbacks/manager.py +454 -514
- langchain_core/callbacks/stdout.py +29 -30
- langchain_core/callbacks/streaming_stdout.py +32 -32
- langchain_core/callbacks/usage.py +60 -57
- langchain_core/chat_history.py +53 -68
- langchain_core/document_loaders/base.py +27 -25
- langchain_core/document_loaders/blob_loaders.py +1 -1
- langchain_core/document_loaders/langsmith.py +44 -48
- langchain_core/documents/__init__.py +23 -3
- langchain_core/documents/base.py +102 -94
- langchain_core/documents/compressor.py +10 -10
- langchain_core/documents/transformers.py +34 -35
- langchain_core/embeddings/fake.py +50 -54
- langchain_core/example_selectors/length_based.py +2 -2
- langchain_core/example_selectors/semantic_similarity.py +28 -32
- langchain_core/exceptions.py +21 -20
- langchain_core/globals.py +3 -151
- langchain_core/indexing/__init__.py +1 -1
- langchain_core/indexing/api.py +121 -126
- langchain_core/indexing/base.py +73 -75
- langchain_core/indexing/in_memory.py +4 -6
- langchain_core/language_models/__init__.py +14 -29
- langchain_core/language_models/_utils.py +58 -61
- langchain_core/language_models/base.py +82 -172
- langchain_core/language_models/chat_models.py +329 -402
- langchain_core/language_models/fake.py +11 -11
- langchain_core/language_models/fake_chat_models.py +42 -36
- langchain_core/language_models/llms.py +189 -269
- langchain_core/load/dump.py +9 -12
- langchain_core/load/load.py +18 -28
- langchain_core/load/mapping.py +2 -4
- langchain_core/load/serializable.py +42 -40
- langchain_core/messages/__init__.py +10 -16
- langchain_core/messages/ai.py +148 -148
- langchain_core/messages/base.py +53 -51
- langchain_core/messages/block_translators/__init__.py +19 -22
- langchain_core/messages/block_translators/anthropic.py +6 -6
- langchain_core/messages/block_translators/bedrock_converse.py +5 -5
- langchain_core/messages/block_translators/google_genai.py +10 -7
- langchain_core/messages/block_translators/google_vertexai.py +4 -32
- langchain_core/messages/block_translators/groq.py +117 -21
- langchain_core/messages/block_translators/langchain_v0.py +5 -5
- langchain_core/messages/block_translators/openai.py +11 -11
- langchain_core/messages/chat.py +2 -6
- langchain_core/messages/content.py +339 -330
- langchain_core/messages/function.py +6 -10
- langchain_core/messages/human.py +24 -31
- langchain_core/messages/modifier.py +2 -2
- langchain_core/messages/system.py +19 -29
- langchain_core/messages/tool.py +74 -90
- langchain_core/messages/utils.py +484 -510
- langchain_core/output_parsers/__init__.py +13 -10
- langchain_core/output_parsers/base.py +61 -61
- langchain_core/output_parsers/format_instructions.py +9 -4
- langchain_core/output_parsers/json.py +12 -10
- langchain_core/output_parsers/list.py +21 -23
- langchain_core/output_parsers/openai_functions.py +49 -47
- langchain_core/output_parsers/openai_tools.py +30 -23
- langchain_core/output_parsers/pydantic.py +13 -14
- langchain_core/output_parsers/string.py +5 -5
- langchain_core/output_parsers/transform.py +15 -17
- langchain_core/output_parsers/xml.py +35 -34
- langchain_core/outputs/__init__.py +1 -1
- langchain_core/outputs/chat_generation.py +18 -18
- langchain_core/outputs/chat_result.py +1 -3
- langchain_core/outputs/generation.py +16 -16
- langchain_core/outputs/llm_result.py +10 -10
- langchain_core/prompt_values.py +13 -19
- langchain_core/prompts/__init__.py +3 -27
- langchain_core/prompts/base.py +81 -86
- langchain_core/prompts/chat.py +308 -351
- langchain_core/prompts/dict.py +6 -6
- langchain_core/prompts/few_shot.py +81 -88
- langchain_core/prompts/few_shot_with_templates.py +11 -13
- langchain_core/prompts/image.py +12 -14
- langchain_core/prompts/loading.py +4 -6
- langchain_core/prompts/message.py +7 -7
- langchain_core/prompts/prompt.py +24 -39
- langchain_core/prompts/string.py +26 -10
- langchain_core/prompts/structured.py +49 -53
- langchain_core/rate_limiters.py +51 -60
- langchain_core/retrievers.py +61 -198
- langchain_core/runnables/base.py +1551 -1656
- langchain_core/runnables/branch.py +68 -70
- langchain_core/runnables/config.py +72 -89
- langchain_core/runnables/configurable.py +145 -161
- langchain_core/runnables/fallbacks.py +102 -96
- langchain_core/runnables/graph.py +91 -97
- langchain_core/runnables/graph_ascii.py +27 -28
- langchain_core/runnables/graph_mermaid.py +42 -51
- langchain_core/runnables/graph_png.py +43 -16
- langchain_core/runnables/history.py +175 -177
- langchain_core/runnables/passthrough.py +151 -167
- langchain_core/runnables/retry.py +46 -51
- langchain_core/runnables/router.py +30 -35
- langchain_core/runnables/schema.py +75 -80
- langchain_core/runnables/utils.py +60 -67
- langchain_core/stores.py +85 -121
- langchain_core/structured_query.py +8 -8
- langchain_core/sys_info.py +29 -29
- langchain_core/tools/__init__.py +1 -14
- langchain_core/tools/base.py +306 -245
- langchain_core/tools/convert.py +160 -155
- langchain_core/tools/render.py +10 -10
- langchain_core/tools/retriever.py +12 -11
- langchain_core/tools/simple.py +19 -24
- langchain_core/tools/structured.py +32 -39
- langchain_core/tracers/__init__.py +1 -9
- langchain_core/tracers/base.py +97 -99
- langchain_core/tracers/context.py +29 -52
- langchain_core/tracers/core.py +49 -53
- langchain_core/tracers/evaluation.py +11 -11
- langchain_core/tracers/event_stream.py +65 -64
- langchain_core/tracers/langchain.py +21 -21
- langchain_core/tracers/log_stream.py +45 -45
- langchain_core/tracers/memory_stream.py +3 -3
- langchain_core/tracers/root_listeners.py +16 -16
- langchain_core/tracers/run_collector.py +2 -4
- langchain_core/tracers/schemas.py +0 -129
- langchain_core/tracers/stdout.py +3 -3
- langchain_core/utils/__init__.py +1 -4
- langchain_core/utils/_merge.py +2 -2
- langchain_core/utils/aiter.py +57 -61
- langchain_core/utils/env.py +9 -9
- langchain_core/utils/function_calling.py +94 -188
- langchain_core/utils/html.py +7 -8
- langchain_core/utils/input.py +9 -6
- langchain_core/utils/interactive_env.py +1 -1
- langchain_core/utils/iter.py +36 -40
- langchain_core/utils/json.py +4 -3
- langchain_core/utils/json_schema.py +9 -9
- langchain_core/utils/mustache.py +8 -10
- langchain_core/utils/pydantic.py +35 -37
- langchain_core/utils/strings.py +6 -9
- langchain_core/utils/usage.py +1 -1
- langchain_core/utils/utils.py +66 -62
- langchain_core/vectorstores/base.py +182 -216
- langchain_core/vectorstores/in_memory.py +101 -176
- langchain_core/vectorstores/utils.py +5 -5
- langchain_core/version.py +1 -1
- langchain_core-1.0.4.dist-info/METADATA +69 -0
- langchain_core-1.0.4.dist-info/RECORD +172 -0
- {langchain_core-1.0.0a6.dist-info → langchain_core-1.0.4.dist-info}/WHEEL +1 -1
- langchain_core/memory.py +0 -120
- langchain_core/messages/block_translators/ollama.py +0 -47
- langchain_core/prompts/pipeline.py +0 -138
- langchain_core/pydantic_v1/__init__.py +0 -30
- langchain_core/pydantic_v1/dataclasses.py +0 -23
- langchain_core/pydantic_v1/main.py +0 -23
- langchain_core/tracers/langchain_v1.py +0 -31
- langchain_core/utils/loading.py +0 -35
- langchain_core-1.0.0a6.dist-info/METADATA +0 -67
- langchain_core-1.0.0a6.dist-info/RECORD +0 -181
- langchain_core-1.0.0a6.dist-info/entry_points.txt +0 -4
|
@@ -1,17 +1,20 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""`OutputParser` classes parse the output of an LLM call into structured data.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
!!! tip "Structured output"
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Output parsers emerged as an early solution to the challenge of obtaining structured
|
|
6
|
+
output from LLMs.
|
|
6
7
|
|
|
7
|
-
|
|
8
|
+
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
|
9
|
+
natively. In such cases, using output parsers may be unnecessary, and you should
|
|
10
|
+
leverage the model's built-in capabilities for structured output. Refer to the
|
|
11
|
+
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
|
|
12
|
+
for guidance on how to achieve structured output directly.
|
|
8
13
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
Serializable, Generation, PromptValue
|
|
14
|
-
""" # noqa: E501
|
|
14
|
+
Output parsers remain valuable when working with models that do not support
|
|
15
|
+
structured output natively, or when you require additional processing or validation
|
|
16
|
+
of the model's output beyond its inherent capabilities.
|
|
17
|
+
"""
|
|
15
18
|
|
|
16
19
|
from typing import TYPE_CHECKING
|
|
17
20
|
|
|
@@ -8,9 +8,7 @@ from typing import (
|
|
|
8
8
|
TYPE_CHECKING,
|
|
9
9
|
Any,
|
|
10
10
|
Generic,
|
|
11
|
-
Optional,
|
|
12
11
|
TypeVar,
|
|
13
|
-
Union,
|
|
14
12
|
)
|
|
15
13
|
|
|
16
14
|
from typing_extensions import override
|
|
@@ -33,13 +31,13 @@ class BaseLLMOutputParser(ABC, Generic[T]):
|
|
|
33
31
|
|
|
34
32
|
@abstractmethod
|
|
35
33
|
def parse_result(self, result: list[Generation], *, partial: bool = False) -> T:
|
|
36
|
-
"""Parse a list of candidate model
|
|
34
|
+
"""Parse a list of candidate model `Generation` objects into a specific format.
|
|
37
35
|
|
|
38
36
|
Args:
|
|
39
|
-
result: A list of
|
|
40
|
-
to be different candidate outputs for a single model input.
|
|
37
|
+
result: A list of `Generation` to be parsed. The `Generation` objects are
|
|
38
|
+
assumed to be different candidate outputs for a single model input.
|
|
41
39
|
partial: Whether to parse the output as a partial result. This is useful
|
|
42
|
-
for parsers that can parse partial results.
|
|
40
|
+
for parsers that can parse partial results.
|
|
43
41
|
|
|
44
42
|
Returns:
|
|
45
43
|
Structured output.
|
|
@@ -48,17 +46,17 @@ class BaseLLMOutputParser(ABC, Generic[T]):
|
|
|
48
46
|
async def aparse_result(
|
|
49
47
|
self, result: list[Generation], *, partial: bool = False
|
|
50
48
|
) -> T:
|
|
51
|
-
"""Async parse a list of candidate model
|
|
49
|
+
"""Async parse a list of candidate model `Generation` objects into a specific format.
|
|
52
50
|
|
|
53
51
|
Args:
|
|
54
|
-
result: A list of
|
|
52
|
+
result: A list of `Generation` to be parsed. The Generations are assumed
|
|
55
53
|
to be different candidate outputs for a single model input.
|
|
56
54
|
partial: Whether to parse the output as a partial result. This is useful
|
|
57
|
-
for parsers that can parse partial results.
|
|
55
|
+
for parsers that can parse partial results.
|
|
58
56
|
|
|
59
57
|
Returns:
|
|
60
58
|
Structured output.
|
|
61
|
-
"""
|
|
59
|
+
""" # noqa: E501
|
|
62
60
|
return await run_in_executor(None, self.parse_result, result, partial=partial)
|
|
63
61
|
|
|
64
62
|
|
|
@@ -71,7 +69,7 @@ class BaseGenerationOutputParser(
|
|
|
71
69
|
@override
|
|
72
70
|
def InputType(self) -> Any:
|
|
73
71
|
"""Return the input type for the parser."""
|
|
74
|
-
return
|
|
72
|
+
return str | AnyMessage
|
|
75
73
|
|
|
76
74
|
@property
|
|
77
75
|
@override
|
|
@@ -84,8 +82,8 @@ class BaseGenerationOutputParser(
|
|
|
84
82
|
@override
|
|
85
83
|
def invoke(
|
|
86
84
|
self,
|
|
87
|
-
input:
|
|
88
|
-
config:
|
|
85
|
+
input: str | BaseMessage,
|
|
86
|
+
config: RunnableConfig | None = None,
|
|
89
87
|
**kwargs: Any,
|
|
90
88
|
) -> T:
|
|
91
89
|
if isinstance(input, BaseMessage):
|
|
@@ -107,9 +105,9 @@ class BaseGenerationOutputParser(
|
|
|
107
105
|
@override
|
|
108
106
|
async def ainvoke(
|
|
109
107
|
self,
|
|
110
|
-
input:
|
|
111
|
-
config:
|
|
112
|
-
**kwargs:
|
|
108
|
+
input: str | BaseMessage,
|
|
109
|
+
config: RunnableConfig | None = None,
|
|
110
|
+
**kwargs: Any | None,
|
|
113
111
|
) -> T:
|
|
114
112
|
if isinstance(input, BaseMessage):
|
|
115
113
|
return await self._acall_with_config(
|
|
@@ -136,36 +134,38 @@ class BaseOutputParser(
|
|
|
136
134
|
Output parsers help structure language model responses.
|
|
137
135
|
|
|
138
136
|
Example:
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
137
|
+
```python
|
|
138
|
+
# Implement a simple boolean output parser
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class BooleanOutputParser(BaseOutputParser[bool]):
|
|
142
|
+
true_val: str = "YES"
|
|
143
|
+
false_val: str = "NO"
|
|
144
|
+
|
|
145
|
+
def parse(self, text: str) -> bool:
|
|
146
|
+
cleaned_text = text.strip().upper()
|
|
147
|
+
if cleaned_text not in (
|
|
148
|
+
self.true_val.upper(),
|
|
149
|
+
self.false_val.upper(),
|
|
150
|
+
):
|
|
151
|
+
raise OutputParserException(
|
|
152
|
+
f"BooleanOutputParser expected output value to either be "
|
|
153
|
+
f"{self.true_val} or {self.false_val} (case-insensitive). "
|
|
154
|
+
f"Received {cleaned_text}."
|
|
155
|
+
)
|
|
156
|
+
return cleaned_text == self.true_val.upper()
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def _type(self) -> str:
|
|
160
|
+
return "boolean_output_parser"
|
|
161
|
+
```
|
|
162
162
|
"""
|
|
163
163
|
|
|
164
164
|
@property
|
|
165
165
|
@override
|
|
166
166
|
def InputType(self) -> Any:
|
|
167
167
|
"""Return the input type for the parser."""
|
|
168
|
-
return
|
|
168
|
+
return str | AnyMessage
|
|
169
169
|
|
|
170
170
|
@property
|
|
171
171
|
@override
|
|
@@ -175,7 +175,7 @@ class BaseOutputParser(
|
|
|
175
175
|
This property is inferred from the first type argument of the class.
|
|
176
176
|
|
|
177
177
|
Raises:
|
|
178
|
-
TypeError: If the class doesn't have an inferable OutputType
|
|
178
|
+
TypeError: If the class doesn't have an inferable `OutputType`.
|
|
179
179
|
"""
|
|
180
180
|
for base in self.__class__.mro():
|
|
181
181
|
if hasattr(base, "__pydantic_generic_metadata__"):
|
|
@@ -192,8 +192,8 @@ class BaseOutputParser(
|
|
|
192
192
|
@override
|
|
193
193
|
def invoke(
|
|
194
194
|
self,
|
|
195
|
-
input:
|
|
196
|
-
config:
|
|
195
|
+
input: str | BaseMessage,
|
|
196
|
+
config: RunnableConfig | None = None,
|
|
197
197
|
**kwargs: Any,
|
|
198
198
|
) -> T:
|
|
199
199
|
if isinstance(input, BaseMessage):
|
|
@@ -215,9 +215,9 @@ class BaseOutputParser(
|
|
|
215
215
|
@override
|
|
216
216
|
async def ainvoke(
|
|
217
217
|
self,
|
|
218
|
-
input:
|
|
219
|
-
config:
|
|
220
|
-
**kwargs:
|
|
218
|
+
input: str | BaseMessage,
|
|
219
|
+
config: RunnableConfig | None = None,
|
|
220
|
+
**kwargs: Any | None,
|
|
221
221
|
) -> T:
|
|
222
222
|
if isinstance(input, BaseMessage):
|
|
223
223
|
return await self._acall_with_config(
|
|
@@ -237,16 +237,16 @@ class BaseOutputParser(
|
|
|
237
237
|
|
|
238
238
|
@override
|
|
239
239
|
def parse_result(self, result: list[Generation], *, partial: bool = False) -> T:
|
|
240
|
-
"""Parse a list of candidate model
|
|
240
|
+
"""Parse a list of candidate model `Generation` objects into a specific format.
|
|
241
241
|
|
|
242
|
-
The return value is parsed from only the first Generation in the result, which
|
|
243
|
-
is assumed to be the highest-likelihood Generation
|
|
242
|
+
The return value is parsed from only the first `Generation` in the result, which
|
|
243
|
+
is assumed to be the highest-likelihood `Generation`.
|
|
244
244
|
|
|
245
245
|
Args:
|
|
246
|
-
result: A list of
|
|
247
|
-
to be different candidate outputs for a single model input.
|
|
246
|
+
result: A list of `Generation` to be parsed. The `Generation` objects are
|
|
247
|
+
assumed to be different candidate outputs for a single model input.
|
|
248
248
|
partial: Whether to parse the output as a partial result. This is useful
|
|
249
|
-
for parsers that can parse partial results.
|
|
249
|
+
for parsers that can parse partial results.
|
|
250
250
|
|
|
251
251
|
Returns:
|
|
252
252
|
Structured output.
|
|
@@ -267,20 +267,20 @@ class BaseOutputParser(
|
|
|
267
267
|
async def aparse_result(
|
|
268
268
|
self, result: list[Generation], *, partial: bool = False
|
|
269
269
|
) -> T:
|
|
270
|
-
"""Async parse a list of candidate model
|
|
270
|
+
"""Async parse a list of candidate model `Generation` objects into a specific format.
|
|
271
271
|
|
|
272
|
-
The return value is parsed from only the first Generation in the result, which
|
|
273
|
-
is assumed to be the highest-likelihood Generation
|
|
272
|
+
The return value is parsed from only the first `Generation` in the result, which
|
|
273
|
+
is assumed to be the highest-likelihood `Generation`.
|
|
274
274
|
|
|
275
275
|
Args:
|
|
276
|
-
result: A list of
|
|
277
|
-
to be different candidate outputs for a single model input.
|
|
276
|
+
result: A list of `Generation` to be parsed. The `Generation` objects are
|
|
277
|
+
assumed to be different candidate outputs for a single model input.
|
|
278
278
|
partial: Whether to parse the output as a partial result. This is useful
|
|
279
|
-
for parsers that can parse partial results.
|
|
279
|
+
for parsers that can parse partial results.
|
|
280
280
|
|
|
281
281
|
Returns:
|
|
282
282
|
Structured output.
|
|
283
|
-
"""
|
|
283
|
+
""" # noqa: E501
|
|
284
284
|
return await run_in_executor(None, self.parse_result, result, partial=partial)
|
|
285
285
|
|
|
286
286
|
async def aparse(self, text: str) -> T:
|
|
@@ -302,13 +302,13 @@ class BaseOutputParser(
|
|
|
302
302
|
) -> Any:
|
|
303
303
|
"""Parse the output of an LLM call with the input prompt for context.
|
|
304
304
|
|
|
305
|
-
The prompt is largely provided in the event the OutputParser wants
|
|
305
|
+
The prompt is largely provided in the event the `OutputParser` wants
|
|
306
306
|
to retry or fix the output in some way, and needs information from
|
|
307
307
|
the prompt to do so.
|
|
308
308
|
|
|
309
309
|
Args:
|
|
310
310
|
completion: String output of a language model.
|
|
311
|
-
prompt: Input PromptValue
|
|
311
|
+
prompt: Input `PromptValue`.
|
|
312
312
|
|
|
313
313
|
Returns:
|
|
314
314
|
Structured output.
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
"""Format instructions."""
|
|
2
2
|
|
|
3
|
-
JSON_FORMAT_INSTRUCTIONS = """
|
|
3
|
+
JSON_FORMAT_INSTRUCTIONS = """STRICT OUTPUT FORMAT:
|
|
4
|
+
- Return only the JSON value that conforms to the schema. Do not include any additional text, explanations, headings, or separators.
|
|
5
|
+
- Do not wrap the JSON in Markdown or code fences (no ``` or ```json).
|
|
6
|
+
- Do not prepend or append any text (e.g., do not write "Here is the JSON:").
|
|
7
|
+
- The response must be a single top-level JSON value exactly as required by the schema (object/array/etc.), with no trailing commas or comments.
|
|
4
8
|
|
|
5
|
-
|
|
6
|
-
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
|
9
|
+
The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
|
7
10
|
|
|
8
|
-
|
|
11
|
+
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}} the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
|
12
|
+
|
|
13
|
+
Here is the output schema (shown in a code block for readability only — do not include any backticks or Markdown in your output):
|
|
9
14
|
```
|
|
10
15
|
{schema}
|
|
11
16
|
```""" # noqa: E501
|
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
from json import JSONDecodeError
|
|
7
|
-
from typing import Annotated, Any,
|
|
7
|
+
from typing import Annotated, Any, TypeVar
|
|
8
8
|
|
|
9
9
|
import jsonpatch # type: ignore[import-untyped]
|
|
10
10
|
import pydantic
|
|
@@ -23,7 +23,7 @@ from langchain_core.utils.json import (
|
|
|
23
23
|
)
|
|
24
24
|
|
|
25
25
|
# Union type needs to be last assignment to PydanticBaseModel to make mypy happy.
|
|
26
|
-
PydanticBaseModel =
|
|
26
|
+
PydanticBaseModel = BaseModel | pydantic.BaseModel
|
|
27
27
|
|
|
28
28
|
TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
|
|
29
29
|
|
|
@@ -31,19 +31,22 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
|
|
|
31
31
|
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
|
32
32
|
"""Parse the output of an LLM call to a JSON object.
|
|
33
33
|
|
|
34
|
+
Probably the most reliable output parser for getting structured data that does *not*
|
|
35
|
+
use function calling.
|
|
36
|
+
|
|
34
37
|
When used in streaming mode, it will yield partial JSON objects containing
|
|
35
38
|
all the keys that have been returned so far.
|
|
36
39
|
|
|
37
|
-
In streaming, if `diff` is set to `True`, yields JSONPatch operations
|
|
38
|
-
|
|
40
|
+
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
|
|
41
|
+
difference between the previous and the current object.
|
|
39
42
|
"""
|
|
40
43
|
|
|
41
|
-
pydantic_object: Annotated[
|
|
44
|
+
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]
|
|
42
45
|
"""The Pydantic object to use for validation.
|
|
43
|
-
If None
|
|
46
|
+
If `None`, no validation is performed."""
|
|
44
47
|
|
|
45
48
|
@override
|
|
46
|
-
def _diff(self, prev:
|
|
49
|
+
def _diff(self, prev: Any | None, next: Any) -> Any:
|
|
47
50
|
return jsonpatch.make_patch(prev, next).patch
|
|
48
51
|
|
|
49
52
|
@staticmethod
|
|
@@ -59,10 +62,9 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
|
|
59
62
|
Args:
|
|
60
63
|
result: The result of the LLM call.
|
|
61
64
|
partial: Whether to parse partial JSON objects.
|
|
62
|
-
If True
|
|
65
|
+
If `True`, the output will be a JSON object containing
|
|
63
66
|
all the keys that have been returned so far.
|
|
64
|
-
If False
|
|
65
|
-
Default is False.
|
|
67
|
+
If `False`, the output will be the full JSON object.
|
|
66
68
|
|
|
67
69
|
Returns:
|
|
68
70
|
The parsed JSON object.
|
|
@@ -7,7 +7,7 @@ import re
|
|
|
7
7
|
from abc import abstractmethod
|
|
8
8
|
from collections import deque
|
|
9
9
|
from io import StringIO
|
|
10
|
-
from typing import TYPE_CHECKING, TypeVar
|
|
10
|
+
from typing import TYPE_CHECKING, TypeVar
|
|
11
11
|
|
|
12
12
|
from typing_extensions import override
|
|
13
13
|
|
|
@@ -41,7 +41,7 @@ def droplastn(
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
|
44
|
-
"""Parse the output of
|
|
44
|
+
"""Parse the output of a model to a list."""
|
|
45
45
|
|
|
46
46
|
@property
|
|
47
47
|
def _type(self) -> str:
|
|
@@ -70,88 +70,86 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
|
|
70
70
|
raise NotImplementedError
|
|
71
71
|
|
|
72
72
|
@override
|
|
73
|
-
def _transform(
|
|
74
|
-
self, input: Iterator[Union[str, BaseMessage]]
|
|
75
|
-
) -> Iterator[list[str]]:
|
|
73
|
+
def _transform(self, input: Iterator[str | BaseMessage]) -> Iterator[list[str]]:
|
|
76
74
|
buffer = ""
|
|
77
75
|
for chunk in input:
|
|
78
76
|
if isinstance(chunk, BaseMessage):
|
|
79
|
-
#
|
|
77
|
+
# Extract text
|
|
80
78
|
chunk_content = chunk.content
|
|
81
79
|
if not isinstance(chunk_content, str):
|
|
82
80
|
continue
|
|
83
81
|
buffer += chunk_content
|
|
84
82
|
else:
|
|
85
|
-
#
|
|
83
|
+
# Add current chunk to buffer
|
|
86
84
|
buffer += chunk
|
|
87
|
-
#
|
|
85
|
+
# Parse buffer into a list of parts
|
|
88
86
|
try:
|
|
89
87
|
done_idx = 0
|
|
90
|
-
#
|
|
88
|
+
# Yield only complete parts
|
|
91
89
|
for m in droplastn(self.parse_iter(buffer), 1):
|
|
92
90
|
done_idx = m.end()
|
|
93
91
|
yield [m.group(1)]
|
|
94
92
|
buffer = buffer[done_idx:]
|
|
95
93
|
except NotImplementedError:
|
|
96
94
|
parts = self.parse(buffer)
|
|
97
|
-
#
|
|
95
|
+
# Yield only complete parts
|
|
98
96
|
if len(parts) > 1:
|
|
99
97
|
for part in parts[:-1]:
|
|
100
98
|
yield [part]
|
|
101
99
|
buffer = parts[-1]
|
|
102
|
-
#
|
|
100
|
+
# Yield the last part
|
|
103
101
|
for part in self.parse(buffer):
|
|
104
102
|
yield [part]
|
|
105
103
|
|
|
106
104
|
@override
|
|
107
105
|
async def _atransform(
|
|
108
|
-
self, input: AsyncIterator[
|
|
106
|
+
self, input: AsyncIterator[str | BaseMessage]
|
|
109
107
|
) -> AsyncIterator[list[str]]:
|
|
110
108
|
buffer = ""
|
|
111
109
|
async for chunk in input:
|
|
112
110
|
if isinstance(chunk, BaseMessage):
|
|
113
|
-
#
|
|
111
|
+
# Extract text
|
|
114
112
|
chunk_content = chunk.content
|
|
115
113
|
if not isinstance(chunk_content, str):
|
|
116
114
|
continue
|
|
117
115
|
buffer += chunk_content
|
|
118
116
|
else:
|
|
119
|
-
#
|
|
117
|
+
# Add current chunk to buffer
|
|
120
118
|
buffer += chunk
|
|
121
|
-
#
|
|
119
|
+
# Parse buffer into a list of parts
|
|
122
120
|
try:
|
|
123
121
|
done_idx = 0
|
|
124
|
-
#
|
|
122
|
+
# Yield only complete parts
|
|
125
123
|
for m in droplastn(self.parse_iter(buffer), 1):
|
|
126
124
|
done_idx = m.end()
|
|
127
125
|
yield [m.group(1)]
|
|
128
126
|
buffer = buffer[done_idx:]
|
|
129
127
|
except NotImplementedError:
|
|
130
128
|
parts = self.parse(buffer)
|
|
131
|
-
#
|
|
129
|
+
# Yield only complete parts
|
|
132
130
|
if len(parts) > 1:
|
|
133
131
|
for part in parts[:-1]:
|
|
134
132
|
yield [part]
|
|
135
133
|
buffer = parts[-1]
|
|
136
|
-
#
|
|
134
|
+
# Yield the last part
|
|
137
135
|
for part in self.parse(buffer):
|
|
138
136
|
yield [part]
|
|
139
137
|
|
|
140
138
|
|
|
141
139
|
class CommaSeparatedListOutputParser(ListOutputParser):
|
|
142
|
-
"""Parse the output of
|
|
140
|
+
"""Parse the output of a model to a comma-separated list."""
|
|
143
141
|
|
|
144
142
|
@classmethod
|
|
145
143
|
def is_lc_serializable(cls) -> bool:
|
|
146
|
-
"""Return True as this class is serializable."""
|
|
144
|
+
"""Return `True` as this class is serializable."""
|
|
147
145
|
return True
|
|
148
146
|
|
|
149
147
|
@classmethod
|
|
150
148
|
def get_lc_namespace(cls) -> list[str]:
|
|
151
|
-
"""Get the namespace of the
|
|
149
|
+
"""Get the namespace of the LangChain object.
|
|
152
150
|
|
|
153
151
|
Returns:
|
|
154
|
-
|
|
152
|
+
`["langchain", "output_parsers", "list"]`
|
|
155
153
|
"""
|
|
156
154
|
return ["langchain", "output_parsers", "list"]
|
|
157
155
|
|
|
@@ -179,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
|
|
179
177
|
)
|
|
180
178
|
return [item for sublist in reader for item in sublist]
|
|
181
179
|
except csv.Error:
|
|
182
|
-
#
|
|
180
|
+
# Keep old logic for backup
|
|
183
181
|
return [part.strip() for part in text.split(",")]
|
|
184
182
|
|
|
185
183
|
@property
|