langchain-core 1.0.0rc1__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (92) hide show
  1. langchain_core/agents.py +3 -3
  2. langchain_core/caches.py +44 -48
  3. langchain_core/callbacks/base.py +5 -5
  4. langchain_core/callbacks/file.py +2 -2
  5. langchain_core/callbacks/stdout.py +1 -1
  6. langchain_core/chat_history.py +1 -1
  7. langchain_core/document_loaders/base.py +21 -21
  8. langchain_core/document_loaders/langsmith.py +2 -2
  9. langchain_core/documents/base.py +39 -39
  10. langchain_core/embeddings/fake.py +4 -2
  11. langchain_core/example_selectors/semantic_similarity.py +4 -6
  12. langchain_core/exceptions.py +3 -4
  13. langchain_core/indexing/api.py +8 -14
  14. langchain_core/language_models/__init__.py +11 -25
  15. langchain_core/language_models/_utils.py +2 -1
  16. langchain_core/language_models/base.py +7 -0
  17. langchain_core/language_models/chat_models.py +14 -16
  18. langchain_core/language_models/fake_chat_models.py +3 -3
  19. langchain_core/language_models/llms.py +4 -4
  20. langchain_core/load/dump.py +3 -4
  21. langchain_core/load/load.py +0 -9
  22. langchain_core/load/serializable.py +3 -3
  23. langchain_core/messages/ai.py +20 -22
  24. langchain_core/messages/base.py +8 -8
  25. langchain_core/messages/block_translators/__init__.py +1 -1
  26. langchain_core/messages/block_translators/anthropic.py +1 -1
  27. langchain_core/messages/block_translators/bedrock_converse.py +1 -1
  28. langchain_core/messages/block_translators/google_genai.py +3 -2
  29. langchain_core/messages/block_translators/google_vertexai.py +4 -32
  30. langchain_core/messages/block_translators/langchain_v0.py +1 -1
  31. langchain_core/messages/block_translators/openai.py +1 -1
  32. langchain_core/messages/chat.py +2 -6
  33. langchain_core/messages/content.py +34 -17
  34. langchain_core/messages/function.py +3 -7
  35. langchain_core/messages/human.py +4 -9
  36. langchain_core/messages/modifier.py +1 -1
  37. langchain_core/messages/system.py +2 -10
  38. langchain_core/messages/tool.py +30 -42
  39. langchain_core/messages/utils.py +24 -30
  40. langchain_core/output_parsers/base.py +24 -24
  41. langchain_core/output_parsers/json.py +0 -1
  42. langchain_core/output_parsers/list.py +1 -1
  43. langchain_core/output_parsers/openai_functions.py +2 -2
  44. langchain_core/output_parsers/openai_tools.py +4 -9
  45. langchain_core/output_parsers/string.py +1 -1
  46. langchain_core/outputs/generation.py +1 -1
  47. langchain_core/prompt_values.py +7 -7
  48. langchain_core/prompts/base.py +1 -1
  49. langchain_core/prompts/chat.py +12 -13
  50. langchain_core/prompts/dict.py +2 -2
  51. langchain_core/prompts/few_shot_with_templates.py +1 -1
  52. langchain_core/prompts/image.py +1 -1
  53. langchain_core/prompts/message.py +2 -2
  54. langchain_core/prompts/prompt.py +7 -8
  55. langchain_core/prompts/string.py +1 -1
  56. langchain_core/prompts/structured.py +2 -2
  57. langchain_core/rate_limiters.py +23 -29
  58. langchain_core/retrievers.py +29 -29
  59. langchain_core/runnables/base.py +9 -16
  60. langchain_core/runnables/branch.py +1 -1
  61. langchain_core/runnables/config.py +1 -1
  62. langchain_core/runnables/configurable.py +2 -2
  63. langchain_core/runnables/fallbacks.py +1 -1
  64. langchain_core/runnables/graph.py +23 -28
  65. langchain_core/runnables/graph_mermaid.py +9 -9
  66. langchain_core/runnables/graph_png.py +1 -1
  67. langchain_core/runnables/history.py +2 -2
  68. langchain_core/runnables/passthrough.py +3 -3
  69. langchain_core/runnables/router.py +1 -1
  70. langchain_core/runnables/utils.py +5 -5
  71. langchain_core/tools/base.py +9 -10
  72. langchain_core/tools/convert.py +13 -17
  73. langchain_core/tools/retriever.py +6 -6
  74. langchain_core/tools/simple.py +1 -1
  75. langchain_core/tools/structured.py +5 -10
  76. langchain_core/tracers/memory_stream.py +1 -1
  77. langchain_core/tracers/root_listeners.py +2 -2
  78. langchain_core/tracers/stdout.py +1 -2
  79. langchain_core/utils/__init__.py +1 -1
  80. langchain_core/utils/aiter.py +1 -1
  81. langchain_core/utils/function_calling.py +15 -38
  82. langchain_core/utils/input.py +1 -1
  83. langchain_core/utils/iter.py +1 -1
  84. langchain_core/utils/json.py +1 -1
  85. langchain_core/utils/strings.py +1 -1
  86. langchain_core/vectorstores/base.py +14 -25
  87. langchain_core/vectorstores/utils.py +2 -2
  88. langchain_core/version.py +1 -1
  89. {langchain_core-1.0.0rc1.dist-info → langchain_core-1.0.0rc2.dist-info}/METADATA +1 -1
  90. langchain_core-1.0.0rc2.dist-info/RECORD +172 -0
  91. langchain_core-1.0.0rc1.dist-info/RECORD +0 -172
  92. {langchain_core-1.0.0rc1.dist-info → langchain_core-1.0.0rc2.dist-info}/WHEEL +0 -0
langchain_core/agents.py CHANGED
@@ -84,7 +84,7 @@ class AgentAction(Serializable):
84
84
 
85
85
  @classmethod
86
86
  def get_lc_namespace(cls) -> list[str]:
87
- """Get the namespace of the langchain object.
87
+ """Get the namespace of the LangChain object.
88
88
 
89
89
  Returns:
90
90
  `["langchain", "schema", "agent"]`
@@ -112,7 +112,7 @@ class AgentActionMessageLog(AgentAction):
112
112
  if (tool, tool_input) cannot be used to fully recreate the LLM
113
113
  prediction, and you need that LLM prediction (for future agent iteration).
114
114
  Compared to `log`, this is useful when the underlying LLM is a
115
- ChatModel (and therefore returns messages rather than a string)."""
115
+ chat model (and therefore returns messages rather than a string)."""
116
116
  # Ignoring type because we're overriding the type from AgentAction.
117
117
  # And this is the correct thing to do in this case.
118
118
  # The type literal is used for serialization purposes.
@@ -161,7 +161,7 @@ class AgentFinish(Serializable):
161
161
 
162
162
  @classmethod
163
163
  def get_lc_namespace(cls) -> list[str]:
164
- """Get the namespace of the langchain object.
164
+ """Get the namespace of the LangChain object.
165
165
 
166
166
  Returns:
167
167
  `["langchain", "schema", "agent"]`
langchain_core/caches.py CHANGED
@@ -1,18 +1,15 @@
1
- """Cache classes.
1
+ """`caches` provides an optional caching layer for language models.
2
2
 
3
3
  !!! warning
4
- Beta Feature!
4
+ This is a beta feature! Please be wary of deploying experimental code to production
5
+ unless you've taken appropriate precautions.
5
6
 
6
- **Cache** provides an optional caching layer for LLMs.
7
+ A cache is useful for two reasons:
7
8
 
8
- Cache is useful for two reasons:
9
-
10
- - It can save you money by reducing the number of API calls you make to the LLM
9
+ 1. It can save you money by reducing the number of API calls you make to the LLM
11
10
  provider if you're often requesting the same completion multiple times.
12
- - It can speed up your application by reducing the number of API calls you make
13
- to the LLM provider.
14
-
15
- Cache directly competes with Memory. See documentation for Pros and Cons.
11
+ 2. It can speed up your application by reducing the number of API calls you make to the
12
+ LLM provider.
16
13
  """
17
14
 
18
15
  from __future__ import annotations
@@ -34,8 +31,8 @@ class BaseCache(ABC):
34
31
 
35
32
  The cache interface consists of the following methods:
36
33
 
37
- - lookup: Look up a value based on a prompt and llm_string.
38
- - update: Update the cache based on a prompt and llm_string.
34
+ - lookup: Look up a value based on a prompt and `llm_string`.
35
+ - update: Update the cache based on a prompt and `llm_string`.
39
36
  - clear: Clear the cache.
40
37
 
41
38
  In addition, the cache interface provides an async version of each method.
@@ -47,14 +44,14 @@ class BaseCache(ABC):
47
44
 
48
45
  @abstractmethod
49
46
  def lookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:
50
- """Look up based on prompt and llm_string.
47
+ """Look up based on `prompt` and `llm_string`.
51
48
 
52
49
  A cache implementation is expected to generate a key from the 2-tuple
53
50
  of prompt and llm_string (e.g., by concatenating them with a delimiter).
54
51
 
55
52
  Args:
56
- prompt: a string representation of the prompt.
57
- In the case of a Chat model, the prompt is a non-trivial
53
+ prompt: A string representation of the prompt.
54
+ In the case of a chat model, the prompt is a non-trivial
58
55
  serialization of the prompt into the language model.
59
56
  llm_string: A string representation of the LLM configuration.
60
57
  This is used to capture the invocation parameters of the LLM
@@ -63,27 +60,27 @@ class BaseCache(ABC):
63
60
  representation.
64
61
 
65
62
  Returns:
66
- On a cache miss, return None. On a cache hit, return the cached value.
67
- The cached value is a list of Generations (or subclasses).
63
+ On a cache miss, return `None`. On a cache hit, return the cached value.
64
+ The cached value is a list of `Generation` (or subclasses).
68
65
  """
69
66
 
70
67
  @abstractmethod
71
68
  def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
72
- """Update cache based on prompt and llm_string.
69
+ """Update cache based on `prompt` and `llm_string`.
73
70
 
74
71
  The prompt and llm_string are used to generate a key for the cache.
75
72
  The key should match that of the lookup method.
76
73
 
77
74
  Args:
78
- prompt: a string representation of the prompt.
79
- In the case of a Chat model, the prompt is a non-trivial
75
+ prompt: A string representation of the prompt.
76
+ In the case of a chat model, the prompt is a non-trivial
80
77
  serialization of the prompt into the language model.
81
78
  llm_string: A string representation of the LLM configuration.
82
79
  This is used to capture the invocation parameters of the LLM
83
80
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
84
81
  These invocation parameters are serialized into a string
85
82
  representation.
86
- return_val: The value to be cached. The value is a list of Generations
83
+ return_val: The value to be cached. The value is a list of `Generation`
87
84
  (or subclasses).
88
85
  """
89
86
 
@@ -92,14 +89,14 @@ class BaseCache(ABC):
92
89
  """Clear cache that can take additional keyword arguments."""
93
90
 
94
91
  async def alookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:
95
- """Async look up based on prompt and llm_string.
92
+ """Async look up based on `prompt` and `llm_string`.
96
93
 
97
94
  A cache implementation is expected to generate a key from the 2-tuple
98
95
  of prompt and llm_string (e.g., by concatenating them with a delimiter).
99
96
 
100
97
  Args:
101
- prompt: a string representation of the prompt.
102
- In the case of a Chat model, the prompt is a non-trivial
98
+ prompt: A string representation of the prompt.
99
+ In the case of a chat model, the prompt is a non-trivial
103
100
  serialization of the prompt into the language model.
104
101
  llm_string: A string representation of the LLM configuration.
105
102
  This is used to capture the invocation parameters of the LLM
@@ -108,29 +105,29 @@ class BaseCache(ABC):
108
105
  representation.
109
106
 
110
107
  Returns:
111
- On a cache miss, return None. On a cache hit, return the cached value.
112
- The cached value is a list of Generations (or subclasses).
108
+ On a cache miss, return `None`. On a cache hit, return the cached value.
109
+ The cached value is a list of `Generation` (or subclasses).
113
110
  """
114
111
  return await run_in_executor(None, self.lookup, prompt, llm_string)
115
112
 
116
113
  async def aupdate(
117
114
  self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
118
115
  ) -> None:
119
- """Async update cache based on prompt and llm_string.
116
+ """Async update cache based on `prompt` and `llm_string`.
120
117
 
121
118
  The prompt and llm_string are used to generate a key for the cache.
122
119
  The key should match that of the look up method.
123
120
 
124
121
  Args:
125
- prompt: a string representation of the prompt.
126
- In the case of a Chat model, the prompt is a non-trivial
122
+ prompt: A string representation of the prompt.
123
+ In the case of a chat model, the prompt is a non-trivial
127
124
  serialization of the prompt into the language model.
128
125
  llm_string: A string representation of the LLM configuration.
129
126
  This is used to capture the invocation parameters of the LLM
130
127
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
131
128
  These invocation parameters are serialized into a string
132
129
  representation.
133
- return_val: The value to be cached. The value is a list of Generations
130
+ return_val: The value to be cached. The value is a list of `Generation`
134
131
  (or subclasses).
135
132
  """
136
133
  return await run_in_executor(None, self.update, prompt, llm_string, return_val)
@@ -150,10 +147,9 @@ class InMemoryCache(BaseCache):
150
147
  maxsize: The maximum number of items to store in the cache.
151
148
  If `None`, the cache has no maximum size.
152
149
  If the cache exceeds the maximum size, the oldest items are removed.
153
- Default is None.
154
150
 
155
151
  Raises:
156
- ValueError: If maxsize is less than or equal to 0.
152
+ ValueError: If `maxsize` is less than or equal to `0`.
157
153
  """
158
154
  self._cache: dict[tuple[str, str], RETURN_VAL_TYPE] = {}
159
155
  if maxsize is not None and maxsize <= 0:
@@ -162,28 +158,28 @@ class InMemoryCache(BaseCache):
162
158
  self._maxsize = maxsize
163
159
 
164
160
  def lookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:
165
- """Look up based on prompt and llm_string.
161
+ """Look up based on `prompt` and `llm_string`.
166
162
 
167
163
  Args:
168
- prompt: a string representation of the prompt.
169
- In the case of a Chat model, the prompt is a non-trivial
164
+ prompt: A string representation of the prompt.
165
+ In the case of a chat model, the prompt is a non-trivial
170
166
  serialization of the prompt into the language model.
171
167
  llm_string: A string representation of the LLM configuration.
172
168
 
173
169
  Returns:
174
- On a cache miss, return None. On a cache hit, return the cached value.
170
+ On a cache miss, return `None`. On a cache hit, return the cached value.
175
171
  """
176
172
  return self._cache.get((prompt, llm_string), None)
177
173
 
178
174
  def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
179
- """Update cache based on prompt and llm_string.
175
+ """Update cache based on `prompt` and `llm_string`.
180
176
 
181
177
  Args:
182
- prompt: a string representation of the prompt.
183
- In the case of a Chat model, the prompt is a non-trivial
178
+ prompt: A string representation of the prompt.
179
+ In the case of a chat model, the prompt is a non-trivial
184
180
  serialization of the prompt into the language model.
185
181
  llm_string: A string representation of the LLM configuration.
186
- return_val: The value to be cached. The value is a list of Generations
182
+ return_val: The value to be cached. The value is a list of `Generation`
187
183
  (or subclasses).
188
184
  """
189
185
  if self._maxsize is not None and len(self._cache) == self._maxsize:
@@ -196,30 +192,30 @@ class InMemoryCache(BaseCache):
196
192
  self._cache = {}
197
193
 
198
194
  async def alookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:
199
- """Async look up based on prompt and llm_string.
195
+ """Async look up based on `prompt` and `llm_string`.
200
196
 
201
197
  Args:
202
- prompt: a string representation of the prompt.
203
- In the case of a Chat model, the prompt is a non-trivial
198
+ prompt: A string representation of the prompt.
199
+ In the case of a chat model, the prompt is a non-trivial
204
200
  serialization of the prompt into the language model.
205
201
  llm_string: A string representation of the LLM configuration.
206
202
 
207
203
  Returns:
208
- On a cache miss, return None. On a cache hit, return the cached value.
204
+ On a cache miss, return `None`. On a cache hit, return the cached value.
209
205
  """
210
206
  return self.lookup(prompt, llm_string)
211
207
 
212
208
  async def aupdate(
213
209
  self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
214
210
  ) -> None:
215
- """Async update cache based on prompt and llm_string.
211
+ """Async update cache based on `prompt` and `llm_string`.
216
212
 
217
213
  Args:
218
- prompt: a string representation of the prompt.
219
- In the case of a Chat model, the prompt is a non-trivial
214
+ prompt: A string representation of the prompt.
215
+ In the case of a chat model, the prompt is a non-trivial
220
216
  serialization of the prompt into the language model.
221
217
  llm_string: A string representation of the LLM configuration.
222
- return_val: The value to be cached. The value is a list of Generations
218
+ return_val: The value to be cached. The value is a list of `Generation`
223
219
  (or subclasses).
224
220
  """
225
221
  self.update(prompt, llm_string, return_val)
@@ -1001,7 +1001,7 @@ class BaseCallbackManager(CallbackManagerMixin):
1001
1001
 
1002
1002
  Args:
1003
1003
  handler: The handler to add.
1004
- inherit: Whether to inherit the handler. Default is True.
1004
+ inherit: Whether to inherit the handler.
1005
1005
  """
1006
1006
  if handler not in self.handlers:
1007
1007
  self.handlers.append(handler)
@@ -1028,7 +1028,7 @@ class BaseCallbackManager(CallbackManagerMixin):
1028
1028
 
1029
1029
  Args:
1030
1030
  handlers: The handlers to set.
1031
- inherit: Whether to inherit the handlers. Default is True.
1031
+ inherit: Whether to inherit the handlers.
1032
1032
  """
1033
1033
  self.handlers = []
1034
1034
  self.inheritable_handlers = []
@@ -1044,7 +1044,7 @@ class BaseCallbackManager(CallbackManagerMixin):
1044
1044
 
1045
1045
  Args:
1046
1046
  handler: The handler to set.
1047
- inherit: Whether to inherit the handler. Default is True.
1047
+ inherit: Whether to inherit the handler.
1048
1048
  """
1049
1049
  self.set_handlers([handler], inherit=inherit)
1050
1050
 
@@ -1057,7 +1057,7 @@ class BaseCallbackManager(CallbackManagerMixin):
1057
1057
 
1058
1058
  Args:
1059
1059
  tags: The tags to add.
1060
- inherit: Whether to inherit the tags. Default is True.
1060
+ inherit: Whether to inherit the tags.
1061
1061
  """
1062
1062
  for tag in tags:
1063
1063
  if tag in self.tags:
@@ -1087,7 +1087,7 @@ class BaseCallbackManager(CallbackManagerMixin):
1087
1087
 
1088
1088
  Args:
1089
1089
  metadata: The metadata to add.
1090
- inherit: Whether to inherit the metadata. Default is True.
1090
+ inherit: Whether to inherit the metadata.
1091
1091
  """
1092
1092
  self.metadata.update(metadata)
1093
1093
  if inherit:
@@ -132,7 +132,7 @@ class FileCallbackHandler(BaseCallbackHandler):
132
132
  Args:
133
133
  text: The text to write to the file.
134
134
  color: Optional color for the text. Defaults to `self.color`.
135
- end: String appended after the text. Defaults to `""`.
135
+ end: String appended after the text.
136
136
  file: Optional file to write to. Defaults to `self.file`.
137
137
 
138
138
  Raises:
@@ -239,7 +239,7 @@ class FileCallbackHandler(BaseCallbackHandler):
239
239
  text: The text to write.
240
240
  color: Color override for this specific output. If `None`, uses
241
241
  `self.color`.
242
- end: String appended after the text. Defaults to `""`.
242
+ end: String appended after the text.
243
243
  **kwargs: Additional keyword arguments.
244
244
 
245
245
  """
@@ -104,7 +104,7 @@ class StdOutCallbackHandler(BaseCallbackHandler):
104
104
  Args:
105
105
  text: The text to print.
106
106
  color: The color to use for the text.
107
- end: The end character to use. Defaults to "".
107
+ end: The end character to use.
108
108
  **kwargs: Additional keyword arguments.
109
109
  """
110
110
  print_text(text, color=color or self.color, end=end)
@@ -153,7 +153,7 @@ class BaseChatMessageHistory(ABC):
153
153
 
154
154
  Raises:
155
155
  NotImplementedError: If the sub-class has not implemented an efficient
156
- add_messages method.
156
+ `add_messages` method.
157
157
  """
158
158
  if type(self).add_messages != BaseChatMessageHistory.add_messages:
159
159
  # This means that the sub-class has implemented an efficient add_messages
@@ -35,38 +35,38 @@ class BaseLoader(ABC): # noqa: B024
35
35
  # Sub-classes should not implement this method directly. Instead, they
36
36
  # should implement the lazy load method.
37
37
  def load(self) -> list[Document]:
38
- """Load data into Document objects.
38
+ """Load data into `Document` objects.
39
39
 
40
40
  Returns:
41
- the documents.
41
+ The documents.
42
42
  """
43
43
  return list(self.lazy_load())
44
44
 
45
45
  async def aload(self) -> list[Document]:
46
- """Load data into Document objects.
46
+ """Load data into `Document` objects.
47
47
 
48
48
  Returns:
49
- the documents.
49
+ The documents.
50
50
  """
51
51
  return [document async for document in self.alazy_load()]
52
52
 
53
53
  def load_and_split(
54
54
  self, text_splitter: TextSplitter | None = None
55
55
  ) -> list[Document]:
56
- """Load Documents and split into chunks. Chunks are returned as Documents.
56
+ """Load Documents and split into chunks. Chunks are returned as `Document`.
57
57
 
58
58
  Do not override this method. It should be considered to be deprecated!
59
59
 
60
60
  Args:
61
- text_splitter: TextSplitter instance to use for splitting documents.
62
- Defaults to RecursiveCharacterTextSplitter.
61
+ text_splitter: `TextSplitter` instance to use for splitting documents.
62
+ Defaults to `RecursiveCharacterTextSplitter`.
63
63
 
64
64
  Raises:
65
- ImportError: If langchain-text-splitters is not installed
66
- and no text_splitter is provided.
65
+ ImportError: If `langchain-text-splitters` is not installed
66
+ and no `text_splitter` is provided.
67
67
 
68
68
  Returns:
69
- List of Documents.
69
+ List of `Document`.
70
70
  """
71
71
  if text_splitter is None:
72
72
  if not _HAS_TEXT_SPLITTERS:
@@ -86,10 +86,10 @@ class BaseLoader(ABC): # noqa: B024
86
86
  # Attention: This method will be upgraded into an abstractmethod once it's
87
87
  # implemented in all the existing subclasses.
88
88
  def lazy_load(self) -> Iterator[Document]:
89
- """A lazy loader for Documents.
89
+ """A lazy loader for `Document`.
90
90
 
91
91
  Yields:
92
- the documents.
92
+ The `Document` objects.
93
93
  """
94
94
  if type(self).load != BaseLoader.load:
95
95
  return iter(self.load())
@@ -97,10 +97,10 @@ class BaseLoader(ABC): # noqa: B024
97
97
  raise NotImplementedError(msg)
98
98
 
99
99
  async def alazy_load(self) -> AsyncIterator[Document]:
100
- """A lazy loader for Documents.
100
+ """A lazy loader for `Document`.
101
101
 
102
102
  Yields:
103
- the documents.
103
+ The `Document` objects.
104
104
  """
105
105
  iterator = await run_in_executor(None, self.lazy_load)
106
106
  done = object()
@@ -115,7 +115,7 @@ class BaseBlobParser(ABC):
115
115
  """Abstract interface for blob parsers.
116
116
 
117
117
  A blob parser provides a way to parse raw data stored in a blob into one
118
- or more documents.
118
+ or more `Document` objects.
119
119
 
120
120
  The parser can be composed with blob loaders, making it easy to reuse
121
121
  a parser independent of how the blob was originally loaded.
@@ -128,25 +128,25 @@ class BaseBlobParser(ABC):
128
128
  Subclasses are required to implement this method.
129
129
 
130
130
  Args:
131
- blob: Blob instance
131
+ blob: `Blob` instance
132
132
 
133
133
  Returns:
134
- Generator of documents
134
+ Generator of `Document` objects
135
135
  """
136
136
 
137
137
  def parse(self, blob: Blob) -> list[Document]:
138
- """Eagerly parse the blob into a document or documents.
138
+ """Eagerly parse the blob into a `Document` or `Document` objects.
139
139
 
140
140
  This is a convenience method for interactive development environment.
141
141
 
142
- Production applications should favor the lazy_parse method instead.
142
+ Production applications should favor the `lazy_parse` method instead.
143
143
 
144
144
  Subclasses should generally not over-ride this parse method.
145
145
 
146
146
  Args:
147
- blob: Blob instance
147
+ blob: `Blob` instance
148
148
 
149
149
  Returns:
150
- List of documents
150
+ List of `Document` objects
151
151
  """
152
152
  return list(self.lazy_parse(blob))
@@ -76,8 +76,8 @@ class LangSmithLoader(BaseLoader):
76
76
  splits: A list of dataset splits, which are
77
77
  divisions of your dataset such as 'train', 'test', or 'validation'.
78
78
  Returns examples only from the specified splits.
79
- inline_s3_urls: Whether to inline S3 URLs. Defaults to `True`.
80
- offset: The offset to start from. Defaults to 0.
79
+ inline_s3_urls: Whether to inline S3 URLs.
80
+ offset: The offset to start from.
81
81
  limit: The maximum number of examples to return.
82
82
  metadata: Metadata to filter by.
83
83
  filter: A structured filter string to apply to the examples.
@@ -57,51 +57,51 @@ class Blob(BaseMedia):
57
57
 
58
58
  Example: Initialize a blob from in-memory data
59
59
 
60
- ```python
61
- from langchain_core.documents import Blob
60
+ ```python
61
+ from langchain_core.documents import Blob
62
62
 
63
- blob = Blob.from_data("Hello, world!")
63
+ blob = Blob.from_data("Hello, world!")
64
64
 
65
- # Read the blob as a string
66
- print(blob.as_string())
65
+ # Read the blob as a string
66
+ print(blob.as_string())
67
67
 
68
- # Read the blob as bytes
69
- print(blob.as_bytes())
68
+ # Read the blob as bytes
69
+ print(blob.as_bytes())
70
70
 
71
- # Read the blob as a byte stream
72
- with blob.as_bytes_io() as f:
73
- print(f.read())
74
- ```
71
+ # Read the blob as a byte stream
72
+ with blob.as_bytes_io() as f:
73
+ print(f.read())
74
+ ```
75
75
 
76
76
  Example: Load from memory and specify mime-type and metadata
77
77
 
78
- ```python
79
- from langchain_core.documents import Blob
78
+ ```python
79
+ from langchain_core.documents import Blob
80
80
 
81
- blob = Blob.from_data(
82
- data="Hello, world!",
83
- mime_type="text/plain",
84
- metadata={"source": "https://example.com"},
85
- )
86
- ```
81
+ blob = Blob.from_data(
82
+ data="Hello, world!",
83
+ mime_type="text/plain",
84
+ metadata={"source": "https://example.com"},
85
+ )
86
+ ```
87
87
 
88
88
  Example: Load the blob from a file
89
89
 
90
- ```python
91
- from langchain_core.documents import Blob
90
+ ```python
91
+ from langchain_core.documents import Blob
92
92
 
93
- blob = Blob.from_path("path/to/file.txt")
93
+ blob = Blob.from_path("path/to/file.txt")
94
94
 
95
- # Read the blob as a string
96
- print(blob.as_string())
95
+ # Read the blob as a string
96
+ print(blob.as_string())
97
97
 
98
- # Read the blob as bytes
99
- print(blob.as_bytes())
98
+ # Read the blob as bytes
99
+ print(blob.as_bytes())
100
100
 
101
- # Read the blob as a byte stream
102
- with blob.as_bytes_io() as f:
103
- print(f.read())
104
- ```
101
+ # Read the blob as a byte stream
102
+ with blob.as_bytes_io() as f:
103
+ print(f.read())
104
+ ```
105
105
  """
106
106
 
107
107
  data: bytes | str | None = None
@@ -111,7 +111,7 @@ class Blob(BaseMedia):
111
111
  encoding: str = "utf-8"
112
112
  """Encoding to use if decoding the bytes into a string.
113
113
 
114
- Use utf-8 as default encoding, if decoding to string.
114
+ Use `utf-8` as default encoding, if decoding to string.
115
115
  """
116
116
  path: PathLike | None = None
117
117
  """Location where the original content was found."""
@@ -127,7 +127,7 @@ class Blob(BaseMedia):
127
127
 
128
128
  If a path is associated with the blob, it will default to the path location.
129
129
 
130
- Unless explicitly set via a metadata field called "source", in which
130
+ Unless explicitly set via a metadata field called `"source"`, in which
131
131
  case that value will be used instead.
132
132
  """
133
133
  if self.metadata and "source" in self.metadata:
@@ -211,11 +211,11 @@ class Blob(BaseMedia):
211
211
  """Load the blob from a path like object.
212
212
 
213
213
  Args:
214
- path: path like object to file to be read
214
+ path: Path-like object to file to be read
215
215
  encoding: Encoding to use if decoding the bytes into a string
216
- mime_type: if provided, will be set as the mime-type of the data
216
+ mime_type: If provided, will be set as the mime-type of the data
217
217
  guess_type: If `True`, the mimetype will be guessed from the file extension,
218
- if a mime-type was not provided
218
+ if a mime-type was not provided
219
219
  metadata: Metadata to associate with the blob
220
220
 
221
221
  Returns:
@@ -248,10 +248,10 @@ class Blob(BaseMedia):
248
248
  """Initialize the blob from in-memory data.
249
249
 
250
250
  Args:
251
- data: the in-memory data associated with the blob
251
+ data: The in-memory data associated with the blob
252
252
  encoding: Encoding to use if decoding the bytes into a string
253
- mime_type: if provided, will be set as the mime-type of the data
254
- path: if provided, will be set as the source from which the data came
253
+ mime_type: If provided, will be set as the mime-type of the data
254
+ path: If provided, will be set as the source from which the data came
255
255
  metadata: Metadata to associate with the blob
256
256
 
257
257
  Returns:
@@ -303,7 +303,7 @@ class Document(BaseMedia):
303
303
 
304
304
  @classmethod
305
305
  def get_lc_namespace(cls) -> list[str]:
306
- """Get the namespace of the langchain object.
306
+ """Get the namespace of the LangChain object.
307
307
 
308
308
  Returns:
309
309
  ["langchain", "schema", "document"]
@@ -18,7 +18,8 @@ class FakeEmbeddings(Embeddings, BaseModel):
18
18
 
19
19
  This embedding model creates embeddings by sampling from a normal distribution.
20
20
 
21
- Do not use this outside of testing, as it is not a real embedding model.
21
+ !!! warning
22
+ Do not use this outside of testing, as it is not a real embedding model.
22
23
 
23
24
  Instantiate:
24
25
  ```python
@@ -72,7 +73,8 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
72
73
  This embedding model creates embeddings by sampling from a normal distribution
73
74
  with a seed based on the hash of the text.
74
75
 
75
- Do not use this outside of testing, as it is not a real embedding model.
76
+ !!! warning
77
+ Do not use this outside of testing, as it is not a real embedding model.
76
78
 
77
79
  Instantiate:
78
80
  ```python