langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (172) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +45 -70
  4. langchain_core/_api/deprecation.py +80 -80
  5. langchain_core/_api/path.py +22 -8
  6. langchain_core/_import_utils.py +10 -4
  7. langchain_core/agents.py +25 -21
  8. langchain_core/caches.py +53 -63
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +341 -348
  11. langchain_core/callbacks/file.py +55 -44
  12. langchain_core/callbacks/manager.py +546 -683
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +35 -36
  15. langchain_core/callbacks/usage.py +65 -70
  16. langchain_core/chat_history.py +48 -55
  17. langchain_core/document_loaders/base.py +46 -21
  18. langchain_core/document_loaders/langsmith.py +39 -36
  19. langchain_core/documents/__init__.py +0 -1
  20. langchain_core/documents/base.py +96 -74
  21. langchain_core/documents/compressor.py +12 -9
  22. langchain_core/documents/transformers.py +29 -28
  23. langchain_core/embeddings/fake.py +56 -57
  24. langchain_core/env.py +2 -3
  25. langchain_core/example_selectors/base.py +12 -0
  26. langchain_core/example_selectors/length_based.py +1 -1
  27. langchain_core/example_selectors/semantic_similarity.py +21 -25
  28. langchain_core/exceptions.py +15 -9
  29. langchain_core/globals.py +4 -163
  30. langchain_core/indexing/api.py +132 -125
  31. langchain_core/indexing/base.py +64 -67
  32. langchain_core/indexing/in_memory.py +26 -6
  33. langchain_core/language_models/__init__.py +15 -27
  34. langchain_core/language_models/_utils.py +267 -117
  35. langchain_core/language_models/base.py +92 -177
  36. langchain_core/language_models/chat_models.py +547 -407
  37. langchain_core/language_models/fake.py +11 -11
  38. langchain_core/language_models/fake_chat_models.py +72 -118
  39. langchain_core/language_models/llms.py +168 -242
  40. langchain_core/load/dump.py +8 -11
  41. langchain_core/load/load.py +32 -28
  42. langchain_core/load/mapping.py +2 -4
  43. langchain_core/load/serializable.py +50 -56
  44. langchain_core/messages/__init__.py +36 -51
  45. langchain_core/messages/ai.py +377 -150
  46. langchain_core/messages/base.py +239 -47
  47. langchain_core/messages/block_translators/__init__.py +111 -0
  48. langchain_core/messages/block_translators/anthropic.py +470 -0
  49. langchain_core/messages/block_translators/bedrock.py +94 -0
  50. langchain_core/messages/block_translators/bedrock_converse.py +297 -0
  51. langchain_core/messages/block_translators/google_genai.py +530 -0
  52. langchain_core/messages/block_translators/google_vertexai.py +21 -0
  53. langchain_core/messages/block_translators/groq.py +143 -0
  54. langchain_core/messages/block_translators/langchain_v0.py +301 -0
  55. langchain_core/messages/block_translators/openai.py +1010 -0
  56. langchain_core/messages/chat.py +2 -3
  57. langchain_core/messages/content.py +1423 -0
  58. langchain_core/messages/function.py +7 -7
  59. langchain_core/messages/human.py +44 -38
  60. langchain_core/messages/modifier.py +3 -2
  61. langchain_core/messages/system.py +40 -27
  62. langchain_core/messages/tool.py +160 -58
  63. langchain_core/messages/utils.py +527 -638
  64. langchain_core/output_parsers/__init__.py +1 -14
  65. langchain_core/output_parsers/base.py +68 -104
  66. langchain_core/output_parsers/json.py +13 -17
  67. langchain_core/output_parsers/list.py +11 -33
  68. langchain_core/output_parsers/openai_functions.py +56 -74
  69. langchain_core/output_parsers/openai_tools.py +68 -109
  70. langchain_core/output_parsers/pydantic.py +15 -13
  71. langchain_core/output_parsers/string.py +6 -2
  72. langchain_core/output_parsers/transform.py +17 -60
  73. langchain_core/output_parsers/xml.py +34 -44
  74. langchain_core/outputs/__init__.py +1 -1
  75. langchain_core/outputs/chat_generation.py +26 -11
  76. langchain_core/outputs/chat_result.py +1 -3
  77. langchain_core/outputs/generation.py +17 -6
  78. langchain_core/outputs/llm_result.py +15 -8
  79. langchain_core/prompt_values.py +29 -123
  80. langchain_core/prompts/__init__.py +3 -27
  81. langchain_core/prompts/base.py +48 -63
  82. langchain_core/prompts/chat.py +259 -288
  83. langchain_core/prompts/dict.py +19 -11
  84. langchain_core/prompts/few_shot.py +84 -90
  85. langchain_core/prompts/few_shot_with_templates.py +14 -12
  86. langchain_core/prompts/image.py +19 -14
  87. langchain_core/prompts/loading.py +6 -8
  88. langchain_core/prompts/message.py +7 -8
  89. langchain_core/prompts/prompt.py +42 -43
  90. langchain_core/prompts/string.py +37 -16
  91. langchain_core/prompts/structured.py +43 -46
  92. langchain_core/rate_limiters.py +51 -60
  93. langchain_core/retrievers.py +52 -192
  94. langchain_core/runnables/base.py +1727 -1683
  95. langchain_core/runnables/branch.py +52 -73
  96. langchain_core/runnables/config.py +89 -103
  97. langchain_core/runnables/configurable.py +128 -130
  98. langchain_core/runnables/fallbacks.py +93 -82
  99. langchain_core/runnables/graph.py +127 -127
  100. langchain_core/runnables/graph_ascii.py +63 -41
  101. langchain_core/runnables/graph_mermaid.py +87 -70
  102. langchain_core/runnables/graph_png.py +31 -36
  103. langchain_core/runnables/history.py +145 -161
  104. langchain_core/runnables/passthrough.py +141 -144
  105. langchain_core/runnables/retry.py +84 -68
  106. langchain_core/runnables/router.py +33 -37
  107. langchain_core/runnables/schema.py +79 -72
  108. langchain_core/runnables/utils.py +95 -139
  109. langchain_core/stores.py +85 -131
  110. langchain_core/structured_query.py +11 -15
  111. langchain_core/sys_info.py +31 -32
  112. langchain_core/tools/__init__.py +1 -14
  113. langchain_core/tools/base.py +221 -247
  114. langchain_core/tools/convert.py +144 -161
  115. langchain_core/tools/render.py +10 -10
  116. langchain_core/tools/retriever.py +12 -19
  117. langchain_core/tools/simple.py +52 -29
  118. langchain_core/tools/structured.py +56 -60
  119. langchain_core/tracers/__init__.py +1 -9
  120. langchain_core/tracers/_streaming.py +6 -7
  121. langchain_core/tracers/base.py +103 -112
  122. langchain_core/tracers/context.py +29 -48
  123. langchain_core/tracers/core.py +142 -105
  124. langchain_core/tracers/evaluation.py +30 -34
  125. langchain_core/tracers/event_stream.py +162 -117
  126. langchain_core/tracers/langchain.py +34 -36
  127. langchain_core/tracers/log_stream.py +87 -49
  128. langchain_core/tracers/memory_stream.py +3 -3
  129. langchain_core/tracers/root_listeners.py +18 -34
  130. langchain_core/tracers/run_collector.py +8 -20
  131. langchain_core/tracers/schemas.py +0 -125
  132. langchain_core/tracers/stdout.py +3 -3
  133. langchain_core/utils/__init__.py +1 -4
  134. langchain_core/utils/_merge.py +47 -9
  135. langchain_core/utils/aiter.py +70 -66
  136. langchain_core/utils/env.py +12 -9
  137. langchain_core/utils/function_calling.py +139 -206
  138. langchain_core/utils/html.py +7 -8
  139. langchain_core/utils/input.py +6 -6
  140. langchain_core/utils/interactive_env.py +6 -2
  141. langchain_core/utils/iter.py +48 -45
  142. langchain_core/utils/json.py +14 -4
  143. langchain_core/utils/json_schema.py +159 -43
  144. langchain_core/utils/mustache.py +32 -25
  145. langchain_core/utils/pydantic.py +67 -40
  146. langchain_core/utils/strings.py +5 -5
  147. langchain_core/utils/usage.py +1 -1
  148. langchain_core/utils/utils.py +104 -62
  149. langchain_core/vectorstores/base.py +131 -179
  150. langchain_core/vectorstores/in_memory.py +113 -182
  151. langchain_core/vectorstores/utils.py +23 -17
  152. langchain_core/version.py +1 -1
  153. langchain_core-1.0.0.dist-info/METADATA +68 -0
  154. langchain_core-1.0.0.dist-info/RECORD +172 -0
  155. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
  156. langchain_core/beta/__init__.py +0 -1
  157. langchain_core/beta/runnables/__init__.py +0 -1
  158. langchain_core/beta/runnables/context.py +0 -448
  159. langchain_core/memory.py +0 -116
  160. langchain_core/messages/content_blocks.py +0 -1435
  161. langchain_core/prompts/pipeline.py +0 -133
  162. langchain_core/pydantic_v1/__init__.py +0 -30
  163. langchain_core/pydantic_v1/dataclasses.py +0 -23
  164. langchain_core/pydantic_v1/main.py +0 -23
  165. langchain_core/tracers/langchain_v1.py +0 -23
  166. langchain_core/utils/loading.py +0 -31
  167. langchain_core/v1/__init__.py +0 -1
  168. langchain_core/v1/chat_models.py +0 -1047
  169. langchain_core/v1/messages.py +0 -755
  170. langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
  171. langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
  172. langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4
@@ -1,8 +1,12 @@
1
1
  """Utilities for working with interactive environments."""
2
2
 
3
+ import sys
4
+
3
5
 
4
6
  def is_interactive_env() -> bool:
5
- """Determine if running within IPython or Jupyter."""
6
- import sys
7
+ """Determine if running within IPython or Jupyter.
7
8
 
9
+ Returns:
10
+ True if running in an interactive environment, `False` otherwise.
11
+ """
8
12
  return hasattr(sys, "ps2")
@@ -8,14 +8,11 @@ from types import TracebackType
8
8
  from typing import (
9
9
  Any,
10
10
  Generic,
11
- Optional,
11
+ Literal,
12
12
  TypeVar,
13
- Union,
14
13
  overload,
15
14
  )
16
15
 
17
- from typing_extensions import Literal
18
-
19
16
  T = TypeVar("T")
20
17
 
21
18
 
@@ -27,11 +24,11 @@ class NoLock:
27
24
 
28
25
  def __exit__(
29
26
  self,
30
- exc_type: Optional[type[BaseException]],
31
- exc_val: Optional[BaseException],
32
- exc_tb: Optional[TracebackType],
27
+ exc_type: type[BaseException] | None,
28
+ exc_val: BaseException | None,
29
+ exc_tb: TracebackType | None,
33
30
  ) -> Literal[False]:
34
- """Exception not handled."""
31
+ """Return False (exception not suppressed)."""
35
32
  return False
36
33
 
37
34
 
@@ -43,10 +40,10 @@ def tee_peer(
43
40
  peers: list[deque[T]],
44
41
  lock: AbstractContextManager[Any],
45
42
  ) -> Generator[T, None, None]:
46
- """An individual iterator of a :py:func:`~.tee`.
43
+ """An individual iterator of a `.tee`.
47
44
 
48
45
  This function is a generator that yields items from the shared iterator
49
- ``iterator``. It buffers items until the least advanced iterator has
46
+ `iterator`. It buffers items until the least advanced iterator has
50
47
  yielded them as well. The buffer is shared with all other peers.
51
48
 
52
49
  Args:
@@ -92,39 +89,39 @@ def tee_peer(
92
89
 
93
90
 
94
91
  class Tee(Generic[T]):
95
- """Create ``n`` separate asynchronous iterators over ``iterable``.
92
+ """Create `n` separate asynchronous iterators over `iterable`.
96
93
 
97
- This splits a single ``iterable`` into multiple iterators, each providing
94
+ This splits a single `iterable` into multiple iterators, each providing
98
95
  the same items in the same order.
99
96
  All child iterators may advance separately but share the same items
100
- from ``iterable`` -- when the most advanced iterator retrieves an item,
97
+ from `iterable` -- when the most advanced iterator retrieves an item,
101
98
  it is buffered until the least advanced iterator has yielded it as well.
102
- A ``tee`` works lazily and can handle an infinite ``iterable``, provided
99
+ A `tee` works lazily and can handle an infinite `iterable`, provided
103
100
  that all iterators advance.
104
101
 
105
- .. code-block:: python3
106
-
107
- async def derivative(sensor_data):
108
- previous, current = a.tee(sensor_data, n=2)
109
- await a.anext(previous) # advance one iterator
110
- return a.map(operator.sub, previous, current)
111
-
112
- Unlike :py:func:`itertools.tee`, :py:func:`~.tee` returns a custom type instead
113
- of a :py:class:`tuple`. Like a tuple, it can be indexed, iterated and unpacked
114
- to get the child iterators. In addition, its :py:meth:`~.tee.aclose` method
115
- immediately closes all children, and it can be used in an ``async with`` context
102
+ ```python
103
+ async def derivative(sensor_data):
104
+ previous, current = a.tee(sensor_data, n=2)
105
+ await a.anext(previous) # advance one iterator
106
+ return a.map(operator.sub, previous, current)
107
+ ```
108
+
109
+ Unlike `itertools.tee`, `.tee` returns a custom type instead
110
+ of a :py`tuple`. Like a tuple, it can be indexed, iterated and unpacked
111
+ to get the child iterators. In addition, its `.tee.aclose` method
112
+ immediately closes all children, and it can be used in an `async with` context
116
113
  for the same effect.
117
114
 
118
- If ``iterable`` is an iterator and read elsewhere, ``tee`` will *not*
119
- provide these items. Also, ``tee`` must internally buffer each item until the
115
+ If `iterable` is an iterator and read elsewhere, `tee` will *not*
116
+ provide these items. Also, `tee` must internally buffer each item until the
120
117
  last iterator has yielded it; if the most and least advanced iterator differ
121
- by most data, using a :py:class:`list` is more efficient (but not lazy).
118
+ by most data, using a :py`list` is more efficient (but not lazy).
122
119
 
123
- If the underlying iterable is concurrency safe (``anext`` may be awaited
120
+ If the underlying iterable is concurrency safe (`anext` may be awaited
124
121
  concurrently) the resulting iterators are concurrency safe as well. Otherwise,
125
122
  the iterators are safe if there is only ever one single "most advanced" iterator.
126
- To enforce sequential use of ``anext``, provide a ``lock``
127
- - e.g. an :py:class:`asyncio.Lock` instance in an :py:mod:`asyncio` application -
123
+ To enforce sequential use of `anext`, provide a `lock`
124
+ - e.g. an :py`asyncio.Lock` instance in an :py:mod:`asyncio` application -
128
125
  and access is automatically synchronised.
129
126
 
130
127
  """
@@ -134,15 +131,15 @@ class Tee(Generic[T]):
134
131
  iterable: Iterator[T],
135
132
  n: int = 2,
136
133
  *,
137
- lock: Optional[AbstractContextManager[Any]] = None,
134
+ lock: AbstractContextManager[Any] | None = None,
138
135
  ):
139
- """Create a ``tee``.
136
+ """Create a `tee`.
140
137
 
141
138
  Args:
142
139
  iterable: The iterable to split.
143
- n: The number of iterators to create. Defaults to 2.
140
+ n: The number of iterators to create.
144
141
  lock: The lock to synchronise access to the shared buffers.
145
- Defaults to None.
142
+
146
143
  """
147
144
  self._iterator = iter(iterable)
148
145
  self._buffers: list[deque[T]] = [deque() for _ in range(n)]
@@ -166,14 +163,16 @@ class Tee(Generic[T]):
166
163
  @overload
167
164
  def __getitem__(self, item: slice) -> tuple[Iterator[T], ...]: ...
168
165
 
169
- def __getitem__(
170
- self, item: Union[int, slice]
171
- ) -> Union[Iterator[T], tuple[Iterator[T], ...]]:
166
+ def __getitem__(self, item: int | slice) -> Iterator[T] | tuple[Iterator[T], ...]:
172
167
  """Return the child iterator(s) at the given index or slice."""
173
168
  return self._children[item]
174
169
 
175
170
  def __iter__(self) -> Iterator[Iterator[T]]:
176
- """Return an iterator over the child iterators."""
171
+ """Return an iterator over the child iterators.
172
+
173
+ Yields:
174
+ The child iterators.
175
+ """
177
176
  yield from self._children
178
177
 
179
178
  def __enter__(self) -> "Tee[T]":
@@ -182,11 +181,15 @@ class Tee(Generic[T]):
182
181
 
183
182
  def __exit__(
184
183
  self,
185
- exc_type: Optional[type[BaseException]],
186
- exc_val: Optional[BaseException],
187
- exc_tb: Optional[TracebackType],
184
+ exc_type: type[BaseException] | None,
185
+ exc_val: BaseException | None,
186
+ exc_tb: TracebackType | None,
188
187
  ) -> Literal[False]:
189
- """Close all child iterators."""
188
+ """Close all child iterators.
189
+
190
+ Returns:
191
+ False (exception not suppressed).
192
+ """
190
193
  self.close()
191
194
  return False
192
195
 
@@ -200,11 +203,11 @@ class Tee(Generic[T]):
200
203
  safetee = Tee
201
204
 
202
205
 
203
- def batch_iterate(size: Optional[int], iterable: Iterable[T]) -> Iterator[list[T]]:
206
+ def batch_iterate(size: int | None, iterable: Iterable[T]) -> Iterator[list[T]]:
204
207
  """Utility batching function.
205
208
 
206
209
  Args:
207
- size: The size of the batch. If None, returns a single batch.
210
+ size: The size of the batch. If `None`, returns a single batch.
208
211
  iterable: The iterable to batch.
209
212
 
210
213
  Yields:
@@ -4,7 +4,8 @@ from __future__ import annotations
4
4
 
5
5
  import json
6
6
  import re
7
- from typing import Any, Callable
7
+ from collections.abc import Callable
8
+ from typing import Any
8
9
 
9
10
  from langchain_core.exceptions import OutputParserException
10
11
 
@@ -19,13 +20,16 @@ def _replace_new_line(match: re.Match[str]) -> str:
19
20
  return match.group(1) + value + match.group(3)
20
21
 
21
22
 
22
- def _custom_parser(multiline_string: str) -> str:
23
+ def _custom_parser(multiline_string: str | bytes | bytearray) -> str:
23
24
  r"""Custom parser for multiline strings.
24
25
 
25
26
  The LLM response for `action_input` may be a multiline
26
27
  string containing unescaped newlines, tabs or quotes. This function
27
28
  replaces those characters with their escaped counterparts.
28
29
  (newlines in JSON must be double-escaped: `\\n`).
30
+
31
+ Returns:
32
+ The modified string with escaped newlines, tabs and quotes.
29
33
  """
30
34
  if isinstance(multiline_string, (bytes, bytearray)):
31
35
  multiline_string = multiline_string.decode()
@@ -47,7 +51,7 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
47
51
 
48
52
  Args:
49
53
  s: The JSON string to parse.
50
- strict: Whether to use strict parsing. Defaults to False.
54
+ strict: Whether to use strict parsing.
51
55
 
52
56
  Returns:
53
57
  The parsed JSON object as a Python dictionary.
@@ -98,7 +102,7 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
98
102
  # If we're still inside a string at the end of processing,
99
103
  # we need to close the string.
100
104
  if is_inside_string:
101
- if escaped: # Remoe unterminated escape character
105
+ if escaped: # Remove unterminated escape character
102
106
  new_chars.pop()
103
107
  new_chars.append('"')
104
108
 
@@ -187,6 +191,12 @@ def parse_and_check_json_markdown(text: str, expected_keys: list[str]) -> dict:
187
191
  except json.JSONDecodeError as e:
188
192
  msg = f"Got invalid JSON object. Error: {e}"
189
193
  raise OutputParserException(msg) from e
194
+ if not isinstance(json_obj, dict):
195
+ error_message = (
196
+ f"Expected JSON object (dict), but got: {type(json_obj).__name__}. "
197
+ )
198
+ raise OutputParserException(error_message, llm_output=text)
199
+
190
200
  for key in expected_keys:
191
201
  if key not in json_obj:
192
202
  msg = (
@@ -3,13 +3,13 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from copy import deepcopy
6
- from typing import TYPE_CHECKING, Any, Optional
6
+ from typing import TYPE_CHECKING, Any
7
7
 
8
8
  if TYPE_CHECKING:
9
9
  from collections.abc import Sequence
10
10
 
11
11
 
12
- def _retrieve_ref(path: str, schema: dict) -> dict:
12
+ def _retrieve_ref(path: str, schema: dict) -> list | dict:
13
13
  components = path.split("/")
14
14
  if components[0] != "#":
15
15
  msg = (
@@ -17,9 +17,12 @@ def _retrieve_ref(path: str, schema: dict) -> dict:
17
17
  "with #."
18
18
  )
19
19
  raise ValueError(msg)
20
- out = schema
20
+ out: list | dict = schema
21
21
  for component in components[1:]:
22
22
  if component in out:
23
+ if isinstance(out, list):
24
+ msg = f"Reference '{path}' not found."
25
+ raise KeyError(msg)
23
26
  out = out[component]
24
27
  elif component.isdigit():
25
28
  index = int(component)
@@ -36,58 +39,119 @@ def _retrieve_ref(path: str, schema: dict) -> dict:
36
39
  return deepcopy(out)
37
40
 
38
41
 
42
+ def _process_dict_properties(
43
+ properties: dict[str, Any],
44
+ full_schema: dict[str, Any],
45
+ processed_refs: set[str],
46
+ skip_keys: Sequence[str],
47
+ *,
48
+ shallow_refs: bool,
49
+ ) -> dict[str, Any]:
50
+ """Process dictionary properties, recursing into nested structures."""
51
+ result: dict[str, Any] = {}
52
+ for key, value in properties.items():
53
+ if key in skip_keys:
54
+ # Skip recursion for specified keys, just copy the value as-is
55
+ result[key] = deepcopy(value)
56
+ elif isinstance(value, (dict, list)):
57
+ # Recursively process nested objects and arrays
58
+ result[key] = _dereference_refs_helper(
59
+ value, full_schema, processed_refs, skip_keys, shallow_refs
60
+ )
61
+ else:
62
+ # Copy primitive values directly
63
+ result[key] = value
64
+ return result
65
+
66
+
39
67
  def _dereference_refs_helper(
40
68
  obj: Any,
41
69
  full_schema: dict[str, Any],
42
- processed_refs: Optional[set[str]],
70
+ processed_refs: set[str] | None,
43
71
  skip_keys: Sequence[str],
44
72
  shallow_refs: bool, # noqa: FBT001
45
73
  ) -> Any:
46
- """Inline every pure {'$ref':...}.
74
+ """Dereference JSON Schema $ref objects, handling both pure and mixed references.
47
75
 
48
- But:
49
- - if shallow_refs=True: only break cycles, do not inline nested refs
50
- - if shallow_refs=False: deep-inline all nested refs
76
+ This function processes JSON Schema objects containing $ref properties by resolving
77
+ the references and merging any additional properties. It handles:
51
78
 
52
- Also skip recursion under any key in skip_keys.
79
+ - Pure $ref objects: {"$ref": "#/path/to/definition"}
80
+ - Mixed $ref objects: {"$ref": "#/path", "title": "Custom Title", ...}
81
+ - Circular references by breaking cycles and preserving non-ref properties
82
+
83
+ Args:
84
+ obj: The object to process (can be dict, list, or primitive)
85
+ full_schema: The complete schema containing all definitions
86
+ processed_refs: Set tracking currently processing refs (for cycle detection)
87
+ skip_keys: Keys under which to skip recursion
88
+ shallow_refs: If `True`, only break cycles; if False, deep-inline all refs
89
+
90
+ Returns:
91
+ The object with $ref properties resolved and merged with other properties.
53
92
  """
54
93
  if processed_refs is None:
55
94
  processed_refs = set()
56
95
 
57
- # 1) Pure $ref node?
58
- if isinstance(obj, dict) and "$ref" in set(obj.keys()):
96
+ # Case 1: Object contains a $ref property (pure or mixed with additional properties)
97
+ if isinstance(obj, dict) and "$ref" in obj:
59
98
  ref_path = obj["$ref"]
60
- # cycle?
99
+ additional_properties = {
100
+ key: value for key, value in obj.items() if key != "$ref"
101
+ }
102
+
103
+ # Detect circular reference: if we're already processing this $ref,
104
+ # return only the additional properties to break the cycle
61
105
  if ref_path in processed_refs:
62
- return {}
63
- processed_refs.add(ref_path)
106
+ return _process_dict_properties(
107
+ additional_properties,
108
+ full_schema,
109
+ processed_refs,
110
+ skip_keys,
111
+ shallow_refs=shallow_refs,
112
+ )
64
113
 
65
- # grab + copy the target
66
- target = deepcopy(_retrieve_ref(ref_path, full_schema))
114
+ # Mark this reference as being processed (for cycle detection)
115
+ processed_refs.add(ref_path)
67
116
 
68
- # deep inlining: recurse into everything
69
- result = _dereference_refs_helper(
70
- target, full_schema, processed_refs, skip_keys, shallow_refs
117
+ # Fetch and recursively resolve the referenced object
118
+ referenced_object = deepcopy(_retrieve_ref(ref_path, full_schema))
119
+ resolved_reference = _dereference_refs_helper(
120
+ referenced_object, full_schema, processed_refs, skip_keys, shallow_refs
71
121
  )
72
122
 
123
+ # Clean up: remove from processing set before returning
73
124
  processed_refs.remove(ref_path)
74
- return result
75
125
 
76
- # 2) Not a pure-$ref: recurse, skipping any keys in skip_keys
126
+ # Pure $ref case: no additional properties, return resolved reference directly
127
+ if not additional_properties:
128
+ return resolved_reference
129
+
130
+ # Mixed $ref case: merge resolved reference with additional properties
131
+ # Additional properties take precedence over resolved properties
132
+ merged_result = {}
133
+ if isinstance(resolved_reference, dict):
134
+ merged_result.update(resolved_reference)
135
+
136
+ # Process additional properties and merge them (they override resolved ones)
137
+ processed_additional = _process_dict_properties(
138
+ additional_properties,
139
+ full_schema,
140
+ processed_refs,
141
+ skip_keys,
142
+ shallow_refs=shallow_refs,
143
+ )
144
+ merged_result.update(processed_additional)
145
+
146
+ return merged_result
147
+
148
+ # Case 2: Regular dictionary without $ref - process all properties
77
149
  if isinstance(obj, dict):
78
- out: dict[str, Any] = {}
79
- for k, v in obj.items():
80
- if k in skip_keys:
81
- # do not recurse under this key
82
- out[k] = deepcopy(v)
83
- elif isinstance(v, (dict, list)):
84
- out[k] = _dereference_refs_helper(
85
- v, full_schema, processed_refs, skip_keys, shallow_refs
86
- )
87
- else:
88
- out[k] = v
89
- return out
150
+ return _process_dict_properties(
151
+ obj, full_schema, processed_refs, skip_keys, shallow_refs=shallow_refs
152
+ )
90
153
 
154
+ # Case 3: List - recursively process each item
91
155
  if isinstance(obj, list):
92
156
  return [
93
157
  _dereference_refs_helper(
@@ -96,25 +160,77 @@ def _dereference_refs_helper(
96
160
  for item in obj
97
161
  ]
98
162
 
163
+ # Case 4: Primitive value (string, number, boolean, null) - return unchanged
99
164
  return obj
100
165
 
101
166
 
102
167
  def dereference_refs(
103
168
  schema_obj: dict,
104
169
  *,
105
- full_schema: Optional[dict] = None,
106
- skip_keys: Optional[Sequence[str]] = None,
170
+ full_schema: dict | None = None,
171
+ skip_keys: Sequence[str] | None = None,
107
172
  ) -> dict:
108
- """Try to substitute $refs in JSON Schema.
173
+ """Resolve and inline JSON Schema $ref references in a schema object.
174
+
175
+ This function processes a JSON Schema and resolves all $ref references by replacing
176
+ them with the actual referenced content. It handles both simple references and
177
+ complex cases like circular references and mixed $ref objects that contain
178
+ additional properties alongside the $ref.
109
179
 
110
180
  Args:
111
- schema_obj: The fragment to dereference.
112
- full_schema: The complete schema (defaults to schema_obj).
113
- skip_keys:
114
- - If None (the default), we skip recursion under '$defs' *and* only
115
- shallow-inline refs.
116
- - If provided (even as an empty list), we will recurse under every key and
117
- deep-inline all refs.
181
+ schema_obj: The JSON Schema object or fragment to process. This can be a
182
+ complete schema or just a portion of one.
183
+ full_schema: The complete schema containing all definitions that $refs might
184
+ point to. If not provided, defaults to schema_obj (useful when the
185
+ schema is self-contained).
186
+ skip_keys: Controls recursion behavior and reference resolution depth:
187
+ - If `None` (Default): Only recurse under '$defs' and use shallow reference
188
+ resolution (break cycles but don't deep-inline nested refs)
189
+ - If provided (even as []): Recurse under all keys and use deep reference
190
+ resolution (fully inline all nested references)
191
+
192
+ Returns:
193
+ A new dictionary with all $ref references resolved and inlined. The original
194
+ schema_obj is not modified.
195
+
196
+ Examples:
197
+ Basic reference resolution:
198
+ >>> schema = {
199
+ ... "type": "object",
200
+ ... "properties": {"name": {"$ref": "#/$defs/string_type"}},
201
+ ... "$defs": {"string_type": {"type": "string"}},
202
+ ... }
203
+ >>> result = dereference_refs(schema)
204
+ >>> result["properties"]["name"] # {"type": "string"}
205
+
206
+ Mixed $ref with additional properties:
207
+ >>> schema = {
208
+ ... "properties": {
209
+ ... "name": {"$ref": "#/$defs/base", "description": "User name"}
210
+ ... },
211
+ ... "$defs": {"base": {"type": "string", "minLength": 1}},
212
+ ... }
213
+ >>> result = dereference_refs(schema)
214
+ >>> result["properties"]["name"]
215
+ # {"type": "string", "minLength": 1, "description": "User name"}
216
+
217
+ Handling circular references:
218
+ >>> schema = {
219
+ ... "properties": {"user": {"$ref": "#/$defs/User"}},
220
+ ... "$defs": {
221
+ ... "User": {
222
+ ... "type": "object",
223
+ ... "properties": {"friend": {"$ref": "#/$defs/User"}},
224
+ ... }
225
+ ... },
226
+ ... }
227
+ >>> result = dereference_refs(schema) # Won't cause infinite recursion
228
+
229
+ Note:
230
+ - Circular references are handled gracefully by breaking cycles
231
+ - Mixed $ref objects (with both $ref and other properties) are supported
232
+ - Additional properties in mixed $refs override resolved properties
233
+ - The $defs section is preserved in the output by default
118
234
  """
119
235
  full = full_schema or schema_obj
120
236
  keys_to_skip = list(skip_keys) if skip_keys is not None else ["$defs"]
@@ -12,18 +12,16 @@ from typing import (
12
12
  TYPE_CHECKING,
13
13
  Any,
14
14
  Literal,
15
- Optional,
16
- Union,
17
15
  cast,
18
16
  )
19
17
 
20
18
  if TYPE_CHECKING:
21
- from typing_extensions import TypeAlias
19
+ from typing import TypeAlias
22
20
 
23
21
  logger = logging.getLogger(__name__)
24
22
 
25
23
 
26
- Scopes: TypeAlias = list[Union[Literal[False, 0], Mapping[str, Any]]]
24
+ Scopes: TypeAlias = list[Literal[False, 0] | Mapping[str, Any]]
27
25
 
28
26
 
29
27
  # Globals
@@ -48,7 +46,7 @@ def grab_literal(template: str, l_del: str) -> tuple[str, str]:
48
46
  l_del: The left delimiter.
49
47
 
50
48
  Returns:
51
- tuple[str, str]: The literal and the template.
49
+ The literal and the template.
52
50
  """
53
51
  global _CURRENT_LINE
54
52
 
@@ -78,11 +76,11 @@ def l_sa_check(
78
76
  is_standalone: Whether the tag is standalone.
79
77
 
80
78
  Returns:
81
- bool: Whether the tag could be a standalone.
79
+ Whether the tag could be a standalone.
82
80
  """
83
81
  # If there is a newline, or the previous tag was a standalone
84
82
  if literal.find("\n") != -1 or is_standalone:
85
- padding = literal.split("\n")[-1]
83
+ padding = literal.rsplit("\n", maxsplit=1)[-1]
86
84
 
87
85
  # If all the characters since the last newline are spaces
88
86
  # Then the next tag could be a standalone
@@ -104,7 +102,7 @@ def r_sa_check(
104
102
  is_standalone: Whether the tag is standalone.
105
103
 
106
104
  Returns:
107
- bool: Whether the tag could be a standalone.
105
+ Whether the tag could be a standalone.
108
106
  """
109
107
  # Check right side if we might be a standalone
110
108
  if is_standalone and tag_type not in {"variable", "no escape"}:
@@ -126,7 +124,7 @@ def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], s
126
124
  r_del: The right delimiter.
127
125
 
128
126
  Returns:
129
- tuple[tuple[str, str], str]: The tag and the template.
127
+ The tag and the template.
130
128
 
131
129
  Raises:
132
130
  ChevronError: If the tag is unclosed.
@@ -214,17 +212,22 @@ def tokenize(
214
212
  def_rdel: The default right delimiter
215
213
  ("}}" by default, as in spec compliant mustache)
216
214
 
217
- Returns:
218
- A generator of mustache tags in the form of a tuple (tag_type, tag_key)
219
- Where tag_type is one of:
220
- * literal
221
- * section
222
- * inverted section
223
- * end
224
- * partial
225
- * no escape
226
- And tag_key is either the key or in the case of a literal tag,
227
- the literal itself.
215
+ Yields:
216
+ Mustache tags in the form of a tuple (tag_type, tag_key)
217
+ where tag_type is one of:
218
+
219
+ * literal
220
+ * section
221
+ * inverted section
222
+ * end
223
+ * partial
224
+ * no escape
225
+
226
+ and tag_key is either the key or in the case of a literal tag,
227
+ the literal itself.
228
+
229
+ Raises:
230
+ ChevronError: If there is a syntax error in the template.
228
231
  """
229
232
  global _CURRENT_LINE, _LAST_TAG_LINE
230
233
  _CURRENT_LINE = 1
@@ -326,7 +329,7 @@ def tokenize(
326
329
 
327
330
 
328
331
  def _html_escape(string: str) -> str:
329
- """HTML escape all of these " & < >."""
332
+ """Return the HTML-escaped string with these characters escaped: `" & < >`."""
330
333
  html_codes = {
331
334
  '"': "&quot;",
332
335
  "<": "&lt;",
@@ -349,7 +352,7 @@ def _get_key(
349
352
  def_ldel: str,
350
353
  def_rdel: str,
351
354
  ) -> Any:
352
- """Get a key from the current scope."""
355
+ """Return a key from the current scope."""
353
356
  # If the key is a dot
354
357
  if key == ".":
355
358
  # Then just return the current scope
@@ -407,7 +410,11 @@ def _get_key(
407
410
 
408
411
 
409
412
  def _get_partial(name: str, partials_dict: Mapping[str, str]) -> str:
410
- """Load a partial."""
413
+ """Load a partial.
414
+
415
+ Returns:
416
+ The partial.
417
+ """
411
418
  try:
412
419
  # Maybe the partial is in the dictionary
413
420
  return partials_dict[name]
@@ -424,13 +431,13 @@ EMPTY_DICT: MappingProxyType[str, str] = MappingProxyType({})
424
431
 
425
432
 
426
433
  def render(
427
- template: Union[str, list[tuple[str, str]]] = "",
434
+ template: str | list[tuple[str, str]] = "",
428
435
  data: Mapping[str, Any] = EMPTY_DICT,
429
436
  partials_dict: Mapping[str, str] = EMPTY_DICT,
430
437
  padding: str = "",
431
438
  def_ldel: str = "{{",
432
439
  def_rdel: str = "}}",
433
- scopes: Optional[Scopes] = None,
440
+ scopes: Scopes | None = None,
434
441
  warn: bool = False, # noqa: FBT001,FBT002
435
442
  keep: bool = False, # noqa: FBT001,FBT002
436
443
  ) -> str: