langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/__init__.py +1 -1
- langchain_core/_api/__init__.py +3 -4
- langchain_core/_api/beta_decorator.py +45 -70
- langchain_core/_api/deprecation.py +80 -80
- langchain_core/_api/path.py +22 -8
- langchain_core/_import_utils.py +10 -4
- langchain_core/agents.py +25 -21
- langchain_core/caches.py +53 -63
- langchain_core/callbacks/__init__.py +1 -8
- langchain_core/callbacks/base.py +341 -348
- langchain_core/callbacks/file.py +55 -44
- langchain_core/callbacks/manager.py +546 -683
- langchain_core/callbacks/stdout.py +29 -30
- langchain_core/callbacks/streaming_stdout.py +35 -36
- langchain_core/callbacks/usage.py +65 -70
- langchain_core/chat_history.py +48 -55
- langchain_core/document_loaders/base.py +46 -21
- langchain_core/document_loaders/langsmith.py +39 -36
- langchain_core/documents/__init__.py +0 -1
- langchain_core/documents/base.py +96 -74
- langchain_core/documents/compressor.py +12 -9
- langchain_core/documents/transformers.py +29 -28
- langchain_core/embeddings/fake.py +56 -57
- langchain_core/env.py +2 -3
- langchain_core/example_selectors/base.py +12 -0
- langchain_core/example_selectors/length_based.py +1 -1
- langchain_core/example_selectors/semantic_similarity.py +21 -25
- langchain_core/exceptions.py +15 -9
- langchain_core/globals.py +4 -163
- langchain_core/indexing/api.py +132 -125
- langchain_core/indexing/base.py +64 -67
- langchain_core/indexing/in_memory.py +26 -6
- langchain_core/language_models/__init__.py +15 -27
- langchain_core/language_models/_utils.py +267 -117
- langchain_core/language_models/base.py +92 -177
- langchain_core/language_models/chat_models.py +547 -407
- langchain_core/language_models/fake.py +11 -11
- langchain_core/language_models/fake_chat_models.py +72 -118
- langchain_core/language_models/llms.py +168 -242
- langchain_core/load/dump.py +8 -11
- langchain_core/load/load.py +32 -28
- langchain_core/load/mapping.py +2 -4
- langchain_core/load/serializable.py +50 -56
- langchain_core/messages/__init__.py +36 -51
- langchain_core/messages/ai.py +377 -150
- langchain_core/messages/base.py +239 -47
- langchain_core/messages/block_translators/__init__.py +111 -0
- langchain_core/messages/block_translators/anthropic.py +470 -0
- langchain_core/messages/block_translators/bedrock.py +94 -0
- langchain_core/messages/block_translators/bedrock_converse.py +297 -0
- langchain_core/messages/block_translators/google_genai.py +530 -0
- langchain_core/messages/block_translators/google_vertexai.py +21 -0
- langchain_core/messages/block_translators/groq.py +143 -0
- langchain_core/messages/block_translators/langchain_v0.py +301 -0
- langchain_core/messages/block_translators/openai.py +1010 -0
- langchain_core/messages/chat.py +2 -3
- langchain_core/messages/content.py +1423 -0
- langchain_core/messages/function.py +7 -7
- langchain_core/messages/human.py +44 -38
- langchain_core/messages/modifier.py +3 -2
- langchain_core/messages/system.py +40 -27
- langchain_core/messages/tool.py +160 -58
- langchain_core/messages/utils.py +527 -638
- langchain_core/output_parsers/__init__.py +1 -14
- langchain_core/output_parsers/base.py +68 -104
- langchain_core/output_parsers/json.py +13 -17
- langchain_core/output_parsers/list.py +11 -33
- langchain_core/output_parsers/openai_functions.py +56 -74
- langchain_core/output_parsers/openai_tools.py +68 -109
- langchain_core/output_parsers/pydantic.py +15 -13
- langchain_core/output_parsers/string.py +6 -2
- langchain_core/output_parsers/transform.py +17 -60
- langchain_core/output_parsers/xml.py +34 -44
- langchain_core/outputs/__init__.py +1 -1
- langchain_core/outputs/chat_generation.py +26 -11
- langchain_core/outputs/chat_result.py +1 -3
- langchain_core/outputs/generation.py +17 -6
- langchain_core/outputs/llm_result.py +15 -8
- langchain_core/prompt_values.py +29 -123
- langchain_core/prompts/__init__.py +3 -27
- langchain_core/prompts/base.py +48 -63
- langchain_core/prompts/chat.py +259 -288
- langchain_core/prompts/dict.py +19 -11
- langchain_core/prompts/few_shot.py +84 -90
- langchain_core/prompts/few_shot_with_templates.py +14 -12
- langchain_core/prompts/image.py +19 -14
- langchain_core/prompts/loading.py +6 -8
- langchain_core/prompts/message.py +7 -8
- langchain_core/prompts/prompt.py +42 -43
- langchain_core/prompts/string.py +37 -16
- langchain_core/prompts/structured.py +43 -46
- langchain_core/rate_limiters.py +51 -60
- langchain_core/retrievers.py +52 -192
- langchain_core/runnables/base.py +1727 -1683
- langchain_core/runnables/branch.py +52 -73
- langchain_core/runnables/config.py +89 -103
- langchain_core/runnables/configurable.py +128 -130
- langchain_core/runnables/fallbacks.py +93 -82
- langchain_core/runnables/graph.py +127 -127
- langchain_core/runnables/graph_ascii.py +63 -41
- langchain_core/runnables/graph_mermaid.py +87 -70
- langchain_core/runnables/graph_png.py +31 -36
- langchain_core/runnables/history.py +145 -161
- langchain_core/runnables/passthrough.py +141 -144
- langchain_core/runnables/retry.py +84 -68
- langchain_core/runnables/router.py +33 -37
- langchain_core/runnables/schema.py +79 -72
- langchain_core/runnables/utils.py +95 -139
- langchain_core/stores.py +85 -131
- langchain_core/structured_query.py +11 -15
- langchain_core/sys_info.py +31 -32
- langchain_core/tools/__init__.py +1 -14
- langchain_core/tools/base.py +221 -247
- langchain_core/tools/convert.py +144 -161
- langchain_core/tools/render.py +10 -10
- langchain_core/tools/retriever.py +12 -19
- langchain_core/tools/simple.py +52 -29
- langchain_core/tools/structured.py +56 -60
- langchain_core/tracers/__init__.py +1 -9
- langchain_core/tracers/_streaming.py +6 -7
- langchain_core/tracers/base.py +103 -112
- langchain_core/tracers/context.py +29 -48
- langchain_core/tracers/core.py +142 -105
- langchain_core/tracers/evaluation.py +30 -34
- langchain_core/tracers/event_stream.py +162 -117
- langchain_core/tracers/langchain.py +34 -36
- langchain_core/tracers/log_stream.py +87 -49
- langchain_core/tracers/memory_stream.py +3 -3
- langchain_core/tracers/root_listeners.py +18 -34
- langchain_core/tracers/run_collector.py +8 -20
- langchain_core/tracers/schemas.py +0 -125
- langchain_core/tracers/stdout.py +3 -3
- langchain_core/utils/__init__.py +1 -4
- langchain_core/utils/_merge.py +47 -9
- langchain_core/utils/aiter.py +70 -66
- langchain_core/utils/env.py +12 -9
- langchain_core/utils/function_calling.py +139 -206
- langchain_core/utils/html.py +7 -8
- langchain_core/utils/input.py +6 -6
- langchain_core/utils/interactive_env.py +6 -2
- langchain_core/utils/iter.py +48 -45
- langchain_core/utils/json.py +14 -4
- langchain_core/utils/json_schema.py +159 -43
- langchain_core/utils/mustache.py +32 -25
- langchain_core/utils/pydantic.py +67 -40
- langchain_core/utils/strings.py +5 -5
- langchain_core/utils/usage.py +1 -1
- langchain_core/utils/utils.py +104 -62
- langchain_core/vectorstores/base.py +131 -179
- langchain_core/vectorstores/in_memory.py +113 -182
- langchain_core/vectorstores/utils.py +23 -17
- langchain_core/version.py +1 -1
- langchain_core-1.0.0.dist-info/METADATA +68 -0
- langchain_core-1.0.0.dist-info/RECORD +172 -0
- {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
- langchain_core/beta/__init__.py +0 -1
- langchain_core/beta/runnables/__init__.py +0 -1
- langchain_core/beta/runnables/context.py +0 -448
- langchain_core/memory.py +0 -116
- langchain_core/messages/content_blocks.py +0 -1435
- langchain_core/prompts/pipeline.py +0 -133
- langchain_core/pydantic_v1/__init__.py +0 -30
- langchain_core/pydantic_v1/dataclasses.py +0 -23
- langchain_core/pydantic_v1/main.py +0 -23
- langchain_core/tracers/langchain_v1.py +0 -23
- langchain_core/utils/loading.py +0 -31
- langchain_core/v1/__init__.py +0 -1
- langchain_core/v1/chat_models.py +0 -1047
- langchain_core/v1/messages.py +0 -755
- langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
- langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
- langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
"""Utilities for working with interactive environments."""
|
|
2
2
|
|
|
3
|
+
import sys
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
def is_interactive_env() -> bool:
|
|
5
|
-
"""Determine if running within IPython or Jupyter.
|
|
6
|
-
import sys
|
|
7
|
+
"""Determine if running within IPython or Jupyter.
|
|
7
8
|
|
|
9
|
+
Returns:
|
|
10
|
+
True if running in an interactive environment, `False` otherwise.
|
|
11
|
+
"""
|
|
8
12
|
return hasattr(sys, "ps2")
|
langchain_core/utils/iter.py
CHANGED
|
@@ -8,14 +8,11 @@ from types import TracebackType
|
|
|
8
8
|
from typing import (
|
|
9
9
|
Any,
|
|
10
10
|
Generic,
|
|
11
|
-
|
|
11
|
+
Literal,
|
|
12
12
|
TypeVar,
|
|
13
|
-
Union,
|
|
14
13
|
overload,
|
|
15
14
|
)
|
|
16
15
|
|
|
17
|
-
from typing_extensions import Literal
|
|
18
|
-
|
|
19
16
|
T = TypeVar("T")
|
|
20
17
|
|
|
21
18
|
|
|
@@ -27,11 +24,11 @@ class NoLock:
|
|
|
27
24
|
|
|
28
25
|
def __exit__(
|
|
29
26
|
self,
|
|
30
|
-
exc_type:
|
|
31
|
-
exc_val:
|
|
32
|
-
exc_tb:
|
|
27
|
+
exc_type: type[BaseException] | None,
|
|
28
|
+
exc_val: BaseException | None,
|
|
29
|
+
exc_tb: TracebackType | None,
|
|
33
30
|
) -> Literal[False]:
|
|
34
|
-
"""
|
|
31
|
+
"""Return False (exception not suppressed)."""
|
|
35
32
|
return False
|
|
36
33
|
|
|
37
34
|
|
|
@@ -43,10 +40,10 @@ def tee_peer(
|
|
|
43
40
|
peers: list[deque[T]],
|
|
44
41
|
lock: AbstractContextManager[Any],
|
|
45
42
|
) -> Generator[T, None, None]:
|
|
46
|
-
"""An individual iterator of a
|
|
43
|
+
"""An individual iterator of a `.tee`.
|
|
47
44
|
|
|
48
45
|
This function is a generator that yields items from the shared iterator
|
|
49
|
-
|
|
46
|
+
`iterator`. It buffers items until the least advanced iterator has
|
|
50
47
|
yielded them as well. The buffer is shared with all other peers.
|
|
51
48
|
|
|
52
49
|
Args:
|
|
@@ -92,39 +89,39 @@ def tee_peer(
|
|
|
92
89
|
|
|
93
90
|
|
|
94
91
|
class Tee(Generic[T]):
|
|
95
|
-
"""Create
|
|
92
|
+
"""Create `n` separate asynchronous iterators over `iterable`.
|
|
96
93
|
|
|
97
|
-
This splits a single
|
|
94
|
+
This splits a single `iterable` into multiple iterators, each providing
|
|
98
95
|
the same items in the same order.
|
|
99
96
|
All child iterators may advance separately but share the same items
|
|
100
|
-
from
|
|
97
|
+
from `iterable` -- when the most advanced iterator retrieves an item,
|
|
101
98
|
it is buffered until the least advanced iterator has yielded it as well.
|
|
102
|
-
A
|
|
99
|
+
A `tee` works lazily and can handle an infinite `iterable`, provided
|
|
103
100
|
that all iterators advance.
|
|
104
101
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
Unlike
|
|
113
|
-
of a :py
|
|
114
|
-
to get the child iterators. In addition, its
|
|
115
|
-
immediately closes all children, and it can be used in an
|
|
102
|
+
```python
|
|
103
|
+
async def derivative(sensor_data):
|
|
104
|
+
previous, current = a.tee(sensor_data, n=2)
|
|
105
|
+
await a.anext(previous) # advance one iterator
|
|
106
|
+
return a.map(operator.sub, previous, current)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Unlike `itertools.tee`, `.tee` returns a custom type instead
|
|
110
|
+
of a :py`tuple`. Like a tuple, it can be indexed, iterated and unpacked
|
|
111
|
+
to get the child iterators. In addition, its `.tee.aclose` method
|
|
112
|
+
immediately closes all children, and it can be used in an `async with` context
|
|
116
113
|
for the same effect.
|
|
117
114
|
|
|
118
|
-
If
|
|
119
|
-
provide these items. Also,
|
|
115
|
+
If `iterable` is an iterator and read elsewhere, `tee` will *not*
|
|
116
|
+
provide these items. Also, `tee` must internally buffer each item until the
|
|
120
117
|
last iterator has yielded it; if the most and least advanced iterator differ
|
|
121
|
-
by most data, using a :py
|
|
118
|
+
by most data, using a :py`list` is more efficient (but not lazy).
|
|
122
119
|
|
|
123
|
-
If the underlying iterable is concurrency safe (
|
|
120
|
+
If the underlying iterable is concurrency safe (`anext` may be awaited
|
|
124
121
|
concurrently) the resulting iterators are concurrency safe as well. Otherwise,
|
|
125
122
|
the iterators are safe if there is only ever one single "most advanced" iterator.
|
|
126
|
-
To enforce sequential use of
|
|
127
|
-
- e.g. an :py
|
|
123
|
+
To enforce sequential use of `anext`, provide a `lock`
|
|
124
|
+
- e.g. an :py`asyncio.Lock` instance in an :py:mod:`asyncio` application -
|
|
128
125
|
and access is automatically synchronised.
|
|
129
126
|
|
|
130
127
|
"""
|
|
@@ -134,15 +131,15 @@ class Tee(Generic[T]):
|
|
|
134
131
|
iterable: Iterator[T],
|
|
135
132
|
n: int = 2,
|
|
136
133
|
*,
|
|
137
|
-
lock:
|
|
134
|
+
lock: AbstractContextManager[Any] | None = None,
|
|
138
135
|
):
|
|
139
|
-
"""Create a
|
|
136
|
+
"""Create a `tee`.
|
|
140
137
|
|
|
141
138
|
Args:
|
|
142
139
|
iterable: The iterable to split.
|
|
143
|
-
n: The number of iterators to create.
|
|
140
|
+
n: The number of iterators to create.
|
|
144
141
|
lock: The lock to synchronise access to the shared buffers.
|
|
145
|
-
|
|
142
|
+
|
|
146
143
|
"""
|
|
147
144
|
self._iterator = iter(iterable)
|
|
148
145
|
self._buffers: list[deque[T]] = [deque() for _ in range(n)]
|
|
@@ -166,14 +163,16 @@ class Tee(Generic[T]):
|
|
|
166
163
|
@overload
|
|
167
164
|
def __getitem__(self, item: slice) -> tuple[Iterator[T], ...]: ...
|
|
168
165
|
|
|
169
|
-
def __getitem__(
|
|
170
|
-
self, item: Union[int, slice]
|
|
171
|
-
) -> Union[Iterator[T], tuple[Iterator[T], ...]]:
|
|
166
|
+
def __getitem__(self, item: int | slice) -> Iterator[T] | tuple[Iterator[T], ...]:
|
|
172
167
|
"""Return the child iterator(s) at the given index or slice."""
|
|
173
168
|
return self._children[item]
|
|
174
169
|
|
|
175
170
|
def __iter__(self) -> Iterator[Iterator[T]]:
|
|
176
|
-
"""Return an iterator over the child iterators.
|
|
171
|
+
"""Return an iterator over the child iterators.
|
|
172
|
+
|
|
173
|
+
Yields:
|
|
174
|
+
The child iterators.
|
|
175
|
+
"""
|
|
177
176
|
yield from self._children
|
|
178
177
|
|
|
179
178
|
def __enter__(self) -> "Tee[T]":
|
|
@@ -182,11 +181,15 @@ class Tee(Generic[T]):
|
|
|
182
181
|
|
|
183
182
|
def __exit__(
|
|
184
183
|
self,
|
|
185
|
-
exc_type:
|
|
186
|
-
exc_val:
|
|
187
|
-
exc_tb:
|
|
184
|
+
exc_type: type[BaseException] | None,
|
|
185
|
+
exc_val: BaseException | None,
|
|
186
|
+
exc_tb: TracebackType | None,
|
|
188
187
|
) -> Literal[False]:
|
|
189
|
-
"""Close all child iterators.
|
|
188
|
+
"""Close all child iterators.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
False (exception not suppressed).
|
|
192
|
+
"""
|
|
190
193
|
self.close()
|
|
191
194
|
return False
|
|
192
195
|
|
|
@@ -200,11 +203,11 @@ class Tee(Generic[T]):
|
|
|
200
203
|
safetee = Tee
|
|
201
204
|
|
|
202
205
|
|
|
203
|
-
def batch_iterate(size:
|
|
206
|
+
def batch_iterate(size: int | None, iterable: Iterable[T]) -> Iterator[list[T]]:
|
|
204
207
|
"""Utility batching function.
|
|
205
208
|
|
|
206
209
|
Args:
|
|
207
|
-
size: The size of the batch. If None
|
|
210
|
+
size: The size of the batch. If `None`, returns a single batch.
|
|
208
211
|
iterable: The iterable to batch.
|
|
209
212
|
|
|
210
213
|
Yields:
|
langchain_core/utils/json.py
CHANGED
|
@@ -4,7 +4,8 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import re
|
|
7
|
-
from
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from typing import Any
|
|
8
9
|
|
|
9
10
|
from langchain_core.exceptions import OutputParserException
|
|
10
11
|
|
|
@@ -19,13 +20,16 @@ def _replace_new_line(match: re.Match[str]) -> str:
|
|
|
19
20
|
return match.group(1) + value + match.group(3)
|
|
20
21
|
|
|
21
22
|
|
|
22
|
-
def _custom_parser(multiline_string: str) -> str:
|
|
23
|
+
def _custom_parser(multiline_string: str | bytes | bytearray) -> str:
|
|
23
24
|
r"""Custom parser for multiline strings.
|
|
24
25
|
|
|
25
26
|
The LLM response for `action_input` may be a multiline
|
|
26
27
|
string containing unescaped newlines, tabs or quotes. This function
|
|
27
28
|
replaces those characters with their escaped counterparts.
|
|
28
29
|
(newlines in JSON must be double-escaped: `\\n`).
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
The modified string with escaped newlines, tabs and quotes.
|
|
29
33
|
"""
|
|
30
34
|
if isinstance(multiline_string, (bytes, bytearray)):
|
|
31
35
|
multiline_string = multiline_string.decode()
|
|
@@ -47,7 +51,7 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
|
|
|
47
51
|
|
|
48
52
|
Args:
|
|
49
53
|
s: The JSON string to parse.
|
|
50
|
-
strict: Whether to use strict parsing.
|
|
54
|
+
strict: Whether to use strict parsing.
|
|
51
55
|
|
|
52
56
|
Returns:
|
|
53
57
|
The parsed JSON object as a Python dictionary.
|
|
@@ -98,7 +102,7 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
|
|
|
98
102
|
# If we're still inside a string at the end of processing,
|
|
99
103
|
# we need to close the string.
|
|
100
104
|
if is_inside_string:
|
|
101
|
-
if escaped: #
|
|
105
|
+
if escaped: # Remove unterminated escape character
|
|
102
106
|
new_chars.pop()
|
|
103
107
|
new_chars.append('"')
|
|
104
108
|
|
|
@@ -187,6 +191,12 @@ def parse_and_check_json_markdown(text: str, expected_keys: list[str]) -> dict:
|
|
|
187
191
|
except json.JSONDecodeError as e:
|
|
188
192
|
msg = f"Got invalid JSON object. Error: {e}"
|
|
189
193
|
raise OutputParserException(msg) from e
|
|
194
|
+
if not isinstance(json_obj, dict):
|
|
195
|
+
error_message = (
|
|
196
|
+
f"Expected JSON object (dict), but got: {type(json_obj).__name__}. "
|
|
197
|
+
)
|
|
198
|
+
raise OutputParserException(error_message, llm_output=text)
|
|
199
|
+
|
|
190
200
|
for key in expected_keys:
|
|
191
201
|
if key not in json_obj:
|
|
192
202
|
msg = (
|
|
@@ -3,13 +3,13 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from copy import deepcopy
|
|
6
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from collections.abc import Sequence
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def _retrieve_ref(path: str, schema: dict) -> dict:
|
|
12
|
+
def _retrieve_ref(path: str, schema: dict) -> list | dict:
|
|
13
13
|
components = path.split("/")
|
|
14
14
|
if components[0] != "#":
|
|
15
15
|
msg = (
|
|
@@ -17,9 +17,12 @@ def _retrieve_ref(path: str, schema: dict) -> dict:
|
|
|
17
17
|
"with #."
|
|
18
18
|
)
|
|
19
19
|
raise ValueError(msg)
|
|
20
|
-
out = schema
|
|
20
|
+
out: list | dict = schema
|
|
21
21
|
for component in components[1:]:
|
|
22
22
|
if component in out:
|
|
23
|
+
if isinstance(out, list):
|
|
24
|
+
msg = f"Reference '{path}' not found."
|
|
25
|
+
raise KeyError(msg)
|
|
23
26
|
out = out[component]
|
|
24
27
|
elif component.isdigit():
|
|
25
28
|
index = int(component)
|
|
@@ -36,58 +39,119 @@ def _retrieve_ref(path: str, schema: dict) -> dict:
|
|
|
36
39
|
return deepcopy(out)
|
|
37
40
|
|
|
38
41
|
|
|
42
|
+
def _process_dict_properties(
|
|
43
|
+
properties: dict[str, Any],
|
|
44
|
+
full_schema: dict[str, Any],
|
|
45
|
+
processed_refs: set[str],
|
|
46
|
+
skip_keys: Sequence[str],
|
|
47
|
+
*,
|
|
48
|
+
shallow_refs: bool,
|
|
49
|
+
) -> dict[str, Any]:
|
|
50
|
+
"""Process dictionary properties, recursing into nested structures."""
|
|
51
|
+
result: dict[str, Any] = {}
|
|
52
|
+
for key, value in properties.items():
|
|
53
|
+
if key in skip_keys:
|
|
54
|
+
# Skip recursion for specified keys, just copy the value as-is
|
|
55
|
+
result[key] = deepcopy(value)
|
|
56
|
+
elif isinstance(value, (dict, list)):
|
|
57
|
+
# Recursively process nested objects and arrays
|
|
58
|
+
result[key] = _dereference_refs_helper(
|
|
59
|
+
value, full_schema, processed_refs, skip_keys, shallow_refs
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
# Copy primitive values directly
|
|
63
|
+
result[key] = value
|
|
64
|
+
return result
|
|
65
|
+
|
|
66
|
+
|
|
39
67
|
def _dereference_refs_helper(
|
|
40
68
|
obj: Any,
|
|
41
69
|
full_schema: dict[str, Any],
|
|
42
|
-
processed_refs:
|
|
70
|
+
processed_refs: set[str] | None,
|
|
43
71
|
skip_keys: Sequence[str],
|
|
44
72
|
shallow_refs: bool, # noqa: FBT001
|
|
45
73
|
) -> Any:
|
|
46
|
-
"""
|
|
74
|
+
"""Dereference JSON Schema $ref objects, handling both pure and mixed references.
|
|
47
75
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
- if shallow_refs=False: deep-inline all nested refs
|
|
76
|
+
This function processes JSON Schema objects containing $ref properties by resolving
|
|
77
|
+
the references and merging any additional properties. It handles:
|
|
51
78
|
|
|
52
|
-
|
|
79
|
+
- Pure $ref objects: {"$ref": "#/path/to/definition"}
|
|
80
|
+
- Mixed $ref objects: {"$ref": "#/path", "title": "Custom Title", ...}
|
|
81
|
+
- Circular references by breaking cycles and preserving non-ref properties
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
obj: The object to process (can be dict, list, or primitive)
|
|
85
|
+
full_schema: The complete schema containing all definitions
|
|
86
|
+
processed_refs: Set tracking currently processing refs (for cycle detection)
|
|
87
|
+
skip_keys: Keys under which to skip recursion
|
|
88
|
+
shallow_refs: If `True`, only break cycles; if False, deep-inline all refs
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
The object with $ref properties resolved and merged with other properties.
|
|
53
92
|
"""
|
|
54
93
|
if processed_refs is None:
|
|
55
94
|
processed_refs = set()
|
|
56
95
|
|
|
57
|
-
# 1
|
|
58
|
-
if isinstance(obj, dict) and "$ref" in
|
|
96
|
+
# Case 1: Object contains a $ref property (pure or mixed with additional properties)
|
|
97
|
+
if isinstance(obj, dict) and "$ref" in obj:
|
|
59
98
|
ref_path = obj["$ref"]
|
|
60
|
-
|
|
99
|
+
additional_properties = {
|
|
100
|
+
key: value for key, value in obj.items() if key != "$ref"
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# Detect circular reference: if we're already processing this $ref,
|
|
104
|
+
# return only the additional properties to break the cycle
|
|
61
105
|
if ref_path in processed_refs:
|
|
62
|
-
return
|
|
63
|
-
|
|
106
|
+
return _process_dict_properties(
|
|
107
|
+
additional_properties,
|
|
108
|
+
full_schema,
|
|
109
|
+
processed_refs,
|
|
110
|
+
skip_keys,
|
|
111
|
+
shallow_refs=shallow_refs,
|
|
112
|
+
)
|
|
64
113
|
|
|
65
|
-
#
|
|
66
|
-
|
|
114
|
+
# Mark this reference as being processed (for cycle detection)
|
|
115
|
+
processed_refs.add(ref_path)
|
|
67
116
|
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
-
|
|
117
|
+
# Fetch and recursively resolve the referenced object
|
|
118
|
+
referenced_object = deepcopy(_retrieve_ref(ref_path, full_schema))
|
|
119
|
+
resolved_reference = _dereference_refs_helper(
|
|
120
|
+
referenced_object, full_schema, processed_refs, skip_keys, shallow_refs
|
|
71
121
|
)
|
|
72
122
|
|
|
123
|
+
# Clean up: remove from processing set before returning
|
|
73
124
|
processed_refs.remove(ref_path)
|
|
74
|
-
return result
|
|
75
125
|
|
|
76
|
-
|
|
126
|
+
# Pure $ref case: no additional properties, return resolved reference directly
|
|
127
|
+
if not additional_properties:
|
|
128
|
+
return resolved_reference
|
|
129
|
+
|
|
130
|
+
# Mixed $ref case: merge resolved reference with additional properties
|
|
131
|
+
# Additional properties take precedence over resolved properties
|
|
132
|
+
merged_result = {}
|
|
133
|
+
if isinstance(resolved_reference, dict):
|
|
134
|
+
merged_result.update(resolved_reference)
|
|
135
|
+
|
|
136
|
+
# Process additional properties and merge them (they override resolved ones)
|
|
137
|
+
processed_additional = _process_dict_properties(
|
|
138
|
+
additional_properties,
|
|
139
|
+
full_schema,
|
|
140
|
+
processed_refs,
|
|
141
|
+
skip_keys,
|
|
142
|
+
shallow_refs=shallow_refs,
|
|
143
|
+
)
|
|
144
|
+
merged_result.update(processed_additional)
|
|
145
|
+
|
|
146
|
+
return merged_result
|
|
147
|
+
|
|
148
|
+
# Case 2: Regular dictionary without $ref - process all properties
|
|
77
149
|
if isinstance(obj, dict):
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# do not recurse under this key
|
|
82
|
-
out[k] = deepcopy(v)
|
|
83
|
-
elif isinstance(v, (dict, list)):
|
|
84
|
-
out[k] = _dereference_refs_helper(
|
|
85
|
-
v, full_schema, processed_refs, skip_keys, shallow_refs
|
|
86
|
-
)
|
|
87
|
-
else:
|
|
88
|
-
out[k] = v
|
|
89
|
-
return out
|
|
150
|
+
return _process_dict_properties(
|
|
151
|
+
obj, full_schema, processed_refs, skip_keys, shallow_refs=shallow_refs
|
|
152
|
+
)
|
|
90
153
|
|
|
154
|
+
# Case 3: List - recursively process each item
|
|
91
155
|
if isinstance(obj, list):
|
|
92
156
|
return [
|
|
93
157
|
_dereference_refs_helper(
|
|
@@ -96,25 +160,77 @@ def _dereference_refs_helper(
|
|
|
96
160
|
for item in obj
|
|
97
161
|
]
|
|
98
162
|
|
|
163
|
+
# Case 4: Primitive value (string, number, boolean, null) - return unchanged
|
|
99
164
|
return obj
|
|
100
165
|
|
|
101
166
|
|
|
102
167
|
def dereference_refs(
|
|
103
168
|
schema_obj: dict,
|
|
104
169
|
*,
|
|
105
|
-
full_schema:
|
|
106
|
-
skip_keys:
|
|
170
|
+
full_schema: dict | None = None,
|
|
171
|
+
skip_keys: Sequence[str] | None = None,
|
|
107
172
|
) -> dict:
|
|
108
|
-
"""
|
|
173
|
+
"""Resolve and inline JSON Schema $ref references in a schema object.
|
|
174
|
+
|
|
175
|
+
This function processes a JSON Schema and resolves all $ref references by replacing
|
|
176
|
+
them with the actual referenced content. It handles both simple references and
|
|
177
|
+
complex cases like circular references and mixed $ref objects that contain
|
|
178
|
+
additional properties alongside the $ref.
|
|
109
179
|
|
|
110
180
|
Args:
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
181
|
+
schema_obj: The JSON Schema object or fragment to process. This can be a
|
|
182
|
+
complete schema or just a portion of one.
|
|
183
|
+
full_schema: The complete schema containing all definitions that $refs might
|
|
184
|
+
point to. If not provided, defaults to schema_obj (useful when the
|
|
185
|
+
schema is self-contained).
|
|
186
|
+
skip_keys: Controls recursion behavior and reference resolution depth:
|
|
187
|
+
- If `None` (Default): Only recurse under '$defs' and use shallow reference
|
|
188
|
+
resolution (break cycles but don't deep-inline nested refs)
|
|
189
|
+
- If provided (even as []): Recurse under all keys and use deep reference
|
|
190
|
+
resolution (fully inline all nested references)
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
A new dictionary with all $ref references resolved and inlined. The original
|
|
194
|
+
schema_obj is not modified.
|
|
195
|
+
|
|
196
|
+
Examples:
|
|
197
|
+
Basic reference resolution:
|
|
198
|
+
>>> schema = {
|
|
199
|
+
... "type": "object",
|
|
200
|
+
... "properties": {"name": {"$ref": "#/$defs/string_type"}},
|
|
201
|
+
... "$defs": {"string_type": {"type": "string"}},
|
|
202
|
+
... }
|
|
203
|
+
>>> result = dereference_refs(schema)
|
|
204
|
+
>>> result["properties"]["name"] # {"type": "string"}
|
|
205
|
+
|
|
206
|
+
Mixed $ref with additional properties:
|
|
207
|
+
>>> schema = {
|
|
208
|
+
... "properties": {
|
|
209
|
+
... "name": {"$ref": "#/$defs/base", "description": "User name"}
|
|
210
|
+
... },
|
|
211
|
+
... "$defs": {"base": {"type": "string", "minLength": 1}},
|
|
212
|
+
... }
|
|
213
|
+
>>> result = dereference_refs(schema)
|
|
214
|
+
>>> result["properties"]["name"]
|
|
215
|
+
# {"type": "string", "minLength": 1, "description": "User name"}
|
|
216
|
+
|
|
217
|
+
Handling circular references:
|
|
218
|
+
>>> schema = {
|
|
219
|
+
... "properties": {"user": {"$ref": "#/$defs/User"}},
|
|
220
|
+
... "$defs": {
|
|
221
|
+
... "User": {
|
|
222
|
+
... "type": "object",
|
|
223
|
+
... "properties": {"friend": {"$ref": "#/$defs/User"}},
|
|
224
|
+
... }
|
|
225
|
+
... },
|
|
226
|
+
... }
|
|
227
|
+
>>> result = dereference_refs(schema) # Won't cause infinite recursion
|
|
228
|
+
|
|
229
|
+
Note:
|
|
230
|
+
- Circular references are handled gracefully by breaking cycles
|
|
231
|
+
- Mixed $ref objects (with both $ref and other properties) are supported
|
|
232
|
+
- Additional properties in mixed $refs override resolved properties
|
|
233
|
+
- The $defs section is preserved in the output by default
|
|
118
234
|
"""
|
|
119
235
|
full = full_schema or schema_obj
|
|
120
236
|
keys_to_skip = list(skip_keys) if skip_keys is not None else ["$defs"]
|
langchain_core/utils/mustache.py
CHANGED
|
@@ -12,18 +12,16 @@ from typing import (
|
|
|
12
12
|
TYPE_CHECKING,
|
|
13
13
|
Any,
|
|
14
14
|
Literal,
|
|
15
|
-
Optional,
|
|
16
|
-
Union,
|
|
17
15
|
cast,
|
|
18
16
|
)
|
|
19
17
|
|
|
20
18
|
if TYPE_CHECKING:
|
|
21
|
-
from
|
|
19
|
+
from typing import TypeAlias
|
|
22
20
|
|
|
23
21
|
logger = logging.getLogger(__name__)
|
|
24
22
|
|
|
25
23
|
|
|
26
|
-
Scopes: TypeAlias = list[
|
|
24
|
+
Scopes: TypeAlias = list[Literal[False, 0] | Mapping[str, Any]]
|
|
27
25
|
|
|
28
26
|
|
|
29
27
|
# Globals
|
|
@@ -48,7 +46,7 @@ def grab_literal(template: str, l_del: str) -> tuple[str, str]:
|
|
|
48
46
|
l_del: The left delimiter.
|
|
49
47
|
|
|
50
48
|
Returns:
|
|
51
|
-
|
|
49
|
+
The literal and the template.
|
|
52
50
|
"""
|
|
53
51
|
global _CURRENT_LINE
|
|
54
52
|
|
|
@@ -78,11 +76,11 @@ def l_sa_check(
|
|
|
78
76
|
is_standalone: Whether the tag is standalone.
|
|
79
77
|
|
|
80
78
|
Returns:
|
|
81
|
-
|
|
79
|
+
Whether the tag could be a standalone.
|
|
82
80
|
"""
|
|
83
81
|
# If there is a newline, or the previous tag was a standalone
|
|
84
82
|
if literal.find("\n") != -1 or is_standalone:
|
|
85
|
-
padding = literal.
|
|
83
|
+
padding = literal.rsplit("\n", maxsplit=1)[-1]
|
|
86
84
|
|
|
87
85
|
# If all the characters since the last newline are spaces
|
|
88
86
|
# Then the next tag could be a standalone
|
|
@@ -104,7 +102,7 @@ def r_sa_check(
|
|
|
104
102
|
is_standalone: Whether the tag is standalone.
|
|
105
103
|
|
|
106
104
|
Returns:
|
|
107
|
-
|
|
105
|
+
Whether the tag could be a standalone.
|
|
108
106
|
"""
|
|
109
107
|
# Check right side if we might be a standalone
|
|
110
108
|
if is_standalone and tag_type not in {"variable", "no escape"}:
|
|
@@ -126,7 +124,7 @@ def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], s
|
|
|
126
124
|
r_del: The right delimiter.
|
|
127
125
|
|
|
128
126
|
Returns:
|
|
129
|
-
|
|
127
|
+
The tag and the template.
|
|
130
128
|
|
|
131
129
|
Raises:
|
|
132
130
|
ChevronError: If the tag is unclosed.
|
|
@@ -214,17 +212,22 @@ def tokenize(
|
|
|
214
212
|
def_rdel: The default right delimiter
|
|
215
213
|
("}}" by default, as in spec compliant mustache)
|
|
216
214
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
215
|
+
Yields:
|
|
216
|
+
Mustache tags in the form of a tuple (tag_type, tag_key)
|
|
217
|
+
where tag_type is one of:
|
|
218
|
+
|
|
219
|
+
* literal
|
|
220
|
+
* section
|
|
221
|
+
* inverted section
|
|
222
|
+
* end
|
|
223
|
+
* partial
|
|
224
|
+
* no escape
|
|
225
|
+
|
|
226
|
+
and tag_key is either the key or in the case of a literal tag,
|
|
227
|
+
the literal itself.
|
|
228
|
+
|
|
229
|
+
Raises:
|
|
230
|
+
ChevronError: If there is a syntax error in the template.
|
|
228
231
|
"""
|
|
229
232
|
global _CURRENT_LINE, _LAST_TAG_LINE
|
|
230
233
|
_CURRENT_LINE = 1
|
|
@@ -326,7 +329,7 @@ def tokenize(
|
|
|
326
329
|
|
|
327
330
|
|
|
328
331
|
def _html_escape(string: str) -> str:
|
|
329
|
-
"""HTML
|
|
332
|
+
"""Return the HTML-escaped string with these characters escaped: `" & < >`."""
|
|
330
333
|
html_codes = {
|
|
331
334
|
'"': """,
|
|
332
335
|
"<": "<",
|
|
@@ -349,7 +352,7 @@ def _get_key(
|
|
|
349
352
|
def_ldel: str,
|
|
350
353
|
def_rdel: str,
|
|
351
354
|
) -> Any:
|
|
352
|
-
"""
|
|
355
|
+
"""Return a key from the current scope."""
|
|
353
356
|
# If the key is a dot
|
|
354
357
|
if key == ".":
|
|
355
358
|
# Then just return the current scope
|
|
@@ -407,7 +410,11 @@ def _get_key(
|
|
|
407
410
|
|
|
408
411
|
|
|
409
412
|
def _get_partial(name: str, partials_dict: Mapping[str, str]) -> str:
|
|
410
|
-
"""Load a partial.
|
|
413
|
+
"""Load a partial.
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
The partial.
|
|
417
|
+
"""
|
|
411
418
|
try:
|
|
412
419
|
# Maybe the partial is in the dictionary
|
|
413
420
|
return partials_dict[name]
|
|
@@ -424,13 +431,13 @@ EMPTY_DICT: MappingProxyType[str, str] = MappingProxyType({})
|
|
|
424
431
|
|
|
425
432
|
|
|
426
433
|
def render(
|
|
427
|
-
template:
|
|
434
|
+
template: str | list[tuple[str, str]] = "",
|
|
428
435
|
data: Mapping[str, Any] = EMPTY_DICT,
|
|
429
436
|
partials_dict: Mapping[str, str] = EMPTY_DICT,
|
|
430
437
|
padding: str = "",
|
|
431
438
|
def_ldel: str = "{{",
|
|
432
439
|
def_rdel: str = "}}",
|
|
433
|
-
scopes:
|
|
440
|
+
scopes: Scopes | None = None,
|
|
434
441
|
warn: bool = False, # noqa: FBT001,FBT002
|
|
435
442
|
keep: bool = False, # noqa: FBT001,FBT002
|
|
436
443
|
) -> str:
|