langchain-core 0.3.79__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/__init__.py +1 -1
- langchain_core/_api/__init__.py +3 -4
- langchain_core/_api/beta_decorator.py +23 -26
- langchain_core/_api/deprecation.py +52 -65
- langchain_core/_api/path.py +3 -6
- langchain_core/_import_utils.py +3 -4
- langchain_core/agents.py +19 -19
- langchain_core/caches.py +53 -63
- langchain_core/callbacks/__init__.py +1 -8
- langchain_core/callbacks/base.py +323 -334
- langchain_core/callbacks/file.py +44 -44
- langchain_core/callbacks/manager.py +441 -507
- langchain_core/callbacks/stdout.py +29 -30
- langchain_core/callbacks/streaming_stdout.py +32 -32
- langchain_core/callbacks/usage.py +60 -57
- langchain_core/chat_history.py +48 -63
- langchain_core/document_loaders/base.py +23 -23
- langchain_core/document_loaders/langsmith.py +37 -37
- langchain_core/documents/__init__.py +0 -1
- langchain_core/documents/base.py +62 -65
- langchain_core/documents/compressor.py +4 -4
- langchain_core/documents/transformers.py +28 -29
- langchain_core/embeddings/fake.py +50 -54
- langchain_core/example_selectors/length_based.py +1 -1
- langchain_core/example_selectors/semantic_similarity.py +21 -25
- langchain_core/exceptions.py +10 -11
- langchain_core/globals.py +3 -151
- langchain_core/indexing/api.py +61 -66
- langchain_core/indexing/base.py +58 -58
- langchain_core/indexing/in_memory.py +3 -3
- langchain_core/language_models/__init__.py +14 -27
- langchain_core/language_models/_utils.py +270 -84
- langchain_core/language_models/base.py +55 -162
- langchain_core/language_models/chat_models.py +442 -402
- langchain_core/language_models/fake.py +11 -11
- langchain_core/language_models/fake_chat_models.py +61 -39
- langchain_core/language_models/llms.py +123 -231
- langchain_core/load/dump.py +4 -5
- langchain_core/load/load.py +18 -28
- langchain_core/load/mapping.py +2 -4
- langchain_core/load/serializable.py +39 -40
- langchain_core/messages/__init__.py +61 -22
- langchain_core/messages/ai.py +368 -163
- langchain_core/messages/base.py +214 -43
- langchain_core/messages/block_translators/__init__.py +111 -0
- langchain_core/messages/block_translators/anthropic.py +470 -0
- langchain_core/messages/block_translators/bedrock.py +94 -0
- langchain_core/messages/block_translators/bedrock_converse.py +297 -0
- langchain_core/messages/block_translators/google_genai.py +530 -0
- langchain_core/messages/block_translators/google_vertexai.py +21 -0
- langchain_core/messages/block_translators/groq.py +143 -0
- langchain_core/messages/block_translators/langchain_v0.py +301 -0
- langchain_core/messages/block_translators/openai.py +1010 -0
- langchain_core/messages/chat.py +2 -6
- langchain_core/messages/content.py +1423 -0
- langchain_core/messages/function.py +6 -10
- langchain_core/messages/human.py +41 -38
- langchain_core/messages/modifier.py +2 -2
- langchain_core/messages/system.py +38 -28
- langchain_core/messages/tool.py +96 -103
- langchain_core/messages/utils.py +478 -504
- langchain_core/output_parsers/__init__.py +1 -14
- langchain_core/output_parsers/base.py +58 -61
- langchain_core/output_parsers/json.py +7 -8
- langchain_core/output_parsers/list.py +5 -7
- langchain_core/output_parsers/openai_functions.py +49 -47
- langchain_core/output_parsers/openai_tools.py +14 -19
- langchain_core/output_parsers/pydantic.py +12 -13
- langchain_core/output_parsers/string.py +2 -2
- langchain_core/output_parsers/transform.py +15 -17
- langchain_core/output_parsers/xml.py +8 -10
- langchain_core/outputs/__init__.py +1 -1
- langchain_core/outputs/chat_generation.py +18 -18
- langchain_core/outputs/chat_result.py +1 -3
- langchain_core/outputs/generation.py +8 -8
- langchain_core/outputs/llm_result.py +10 -10
- langchain_core/prompt_values.py +12 -12
- langchain_core/prompts/__init__.py +3 -27
- langchain_core/prompts/base.py +45 -55
- langchain_core/prompts/chat.py +254 -313
- langchain_core/prompts/dict.py +5 -5
- langchain_core/prompts/few_shot.py +81 -88
- langchain_core/prompts/few_shot_with_templates.py +11 -13
- langchain_core/prompts/image.py +12 -14
- langchain_core/prompts/loading.py +6 -8
- langchain_core/prompts/message.py +3 -3
- langchain_core/prompts/prompt.py +24 -39
- langchain_core/prompts/string.py +4 -4
- langchain_core/prompts/structured.py +42 -50
- langchain_core/rate_limiters.py +51 -60
- langchain_core/retrievers.py +49 -190
- langchain_core/runnables/base.py +1484 -1709
- langchain_core/runnables/branch.py +45 -61
- langchain_core/runnables/config.py +80 -88
- langchain_core/runnables/configurable.py +117 -134
- langchain_core/runnables/fallbacks.py +83 -79
- langchain_core/runnables/graph.py +85 -95
- langchain_core/runnables/graph_ascii.py +27 -28
- langchain_core/runnables/graph_mermaid.py +38 -50
- langchain_core/runnables/graph_png.py +15 -16
- langchain_core/runnables/history.py +135 -148
- langchain_core/runnables/passthrough.py +124 -150
- langchain_core/runnables/retry.py +46 -51
- langchain_core/runnables/router.py +25 -30
- langchain_core/runnables/schema.py +79 -74
- langchain_core/runnables/utils.py +62 -68
- langchain_core/stores.py +81 -115
- langchain_core/structured_query.py +8 -8
- langchain_core/sys_info.py +27 -29
- langchain_core/tools/__init__.py +1 -14
- langchain_core/tools/base.py +179 -187
- langchain_core/tools/convert.py +131 -139
- langchain_core/tools/render.py +10 -10
- langchain_core/tools/retriever.py +11 -11
- langchain_core/tools/simple.py +19 -24
- langchain_core/tools/structured.py +30 -39
- langchain_core/tracers/__init__.py +1 -9
- langchain_core/tracers/base.py +97 -99
- langchain_core/tracers/context.py +29 -52
- langchain_core/tracers/core.py +50 -60
- langchain_core/tracers/evaluation.py +11 -11
- langchain_core/tracers/event_stream.py +115 -70
- langchain_core/tracers/langchain.py +21 -21
- langchain_core/tracers/log_stream.py +43 -43
- langchain_core/tracers/memory_stream.py +3 -3
- langchain_core/tracers/root_listeners.py +16 -16
- langchain_core/tracers/run_collector.py +2 -4
- langchain_core/tracers/schemas.py +0 -129
- langchain_core/tracers/stdout.py +3 -3
- langchain_core/utils/__init__.py +1 -4
- langchain_core/utils/_merge.py +46 -8
- langchain_core/utils/aiter.py +57 -61
- langchain_core/utils/env.py +9 -9
- langchain_core/utils/function_calling.py +89 -191
- langchain_core/utils/html.py +7 -8
- langchain_core/utils/input.py +6 -6
- langchain_core/utils/interactive_env.py +1 -1
- langchain_core/utils/iter.py +37 -42
- langchain_core/utils/json.py +4 -3
- langchain_core/utils/json_schema.py +8 -8
- langchain_core/utils/mustache.py +9 -11
- langchain_core/utils/pydantic.py +33 -35
- langchain_core/utils/strings.py +5 -5
- langchain_core/utils/usage.py +1 -1
- langchain_core/utils/utils.py +80 -54
- langchain_core/vectorstores/base.py +129 -164
- langchain_core/vectorstores/in_memory.py +99 -174
- langchain_core/vectorstores/utils.py +5 -5
- langchain_core/version.py +1 -1
- {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/METADATA +28 -27
- langchain_core-1.0.0.dist-info/RECORD +172 -0
- {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
- langchain_core/beta/__init__.py +0 -1
- langchain_core/beta/runnables/__init__.py +0 -1
- langchain_core/beta/runnables/context.py +0 -447
- langchain_core/memory.py +0 -120
- langchain_core/messages/content_blocks.py +0 -176
- langchain_core/prompts/pipeline.py +0 -138
- langchain_core/pydantic_v1/__init__.py +0 -30
- langchain_core/pydantic_v1/dataclasses.py +0 -23
- langchain_core/pydantic_v1/main.py +0 -23
- langchain_core/tracers/langchain_v1.py +0 -31
- langchain_core/utils/loading.py +0 -35
- langchain_core-0.3.79.dist-info/RECORD +0 -174
- langchain_core-0.3.79.dist-info/entry_points.txt +0 -4
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
|
-
from typing import TYPE_CHECKING
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
from langchain_core.runnables import run_in_executor
|
|
9
9
|
|
|
@@ -35,38 +35,38 @@ class BaseLoader(ABC): # noqa: B024
|
|
|
35
35
|
# Sub-classes should not implement this method directly. Instead, they
|
|
36
36
|
# should implement the lazy load method.
|
|
37
37
|
def load(self) -> list[Document]:
|
|
38
|
-
"""Load data into Document objects.
|
|
38
|
+
"""Load data into `Document` objects.
|
|
39
39
|
|
|
40
40
|
Returns:
|
|
41
|
-
|
|
41
|
+
The documents.
|
|
42
42
|
"""
|
|
43
43
|
return list(self.lazy_load())
|
|
44
44
|
|
|
45
45
|
async def aload(self) -> list[Document]:
|
|
46
|
-
"""Load data into Document objects.
|
|
46
|
+
"""Load data into `Document` objects.
|
|
47
47
|
|
|
48
48
|
Returns:
|
|
49
|
-
|
|
49
|
+
The documents.
|
|
50
50
|
"""
|
|
51
51
|
return [document async for document in self.alazy_load()]
|
|
52
52
|
|
|
53
53
|
def load_and_split(
|
|
54
|
-
self, text_splitter:
|
|
54
|
+
self, text_splitter: TextSplitter | None = None
|
|
55
55
|
) -> list[Document]:
|
|
56
|
-
"""Load Documents and split into chunks. Chunks are returned as
|
|
56
|
+
"""Load Documents and split into chunks. Chunks are returned as `Document`.
|
|
57
57
|
|
|
58
58
|
Do not override this method. It should be considered to be deprecated!
|
|
59
59
|
|
|
60
60
|
Args:
|
|
61
|
-
text_splitter: TextSplitter instance to use for splitting documents.
|
|
62
|
-
Defaults to RecursiveCharacterTextSplitter
|
|
61
|
+
text_splitter: `TextSplitter` instance to use for splitting documents.
|
|
62
|
+
Defaults to `RecursiveCharacterTextSplitter`.
|
|
63
63
|
|
|
64
64
|
Raises:
|
|
65
|
-
ImportError: If langchain-text-splitters is not installed
|
|
66
|
-
and no text_splitter is provided.
|
|
65
|
+
ImportError: If `langchain-text-splitters` is not installed
|
|
66
|
+
and no `text_splitter` is provided.
|
|
67
67
|
|
|
68
68
|
Returns:
|
|
69
|
-
List of
|
|
69
|
+
List of `Document`.
|
|
70
70
|
"""
|
|
71
71
|
if text_splitter is None:
|
|
72
72
|
if not _HAS_TEXT_SPLITTERS:
|
|
@@ -86,10 +86,10 @@ class BaseLoader(ABC): # noqa: B024
|
|
|
86
86
|
# Attention: This method will be upgraded into an abstractmethod once it's
|
|
87
87
|
# implemented in all the existing subclasses.
|
|
88
88
|
def lazy_load(self) -> Iterator[Document]:
|
|
89
|
-
"""A lazy loader for
|
|
89
|
+
"""A lazy loader for `Document`.
|
|
90
90
|
|
|
91
91
|
Yields:
|
|
92
|
-
|
|
92
|
+
The `Document` objects.
|
|
93
93
|
"""
|
|
94
94
|
if type(self).load != BaseLoader.load:
|
|
95
95
|
return iter(self.load())
|
|
@@ -97,10 +97,10 @@ class BaseLoader(ABC): # noqa: B024
|
|
|
97
97
|
raise NotImplementedError(msg)
|
|
98
98
|
|
|
99
99
|
async def alazy_load(self) -> AsyncIterator[Document]:
|
|
100
|
-
"""A lazy loader for
|
|
100
|
+
"""A lazy loader for `Document`.
|
|
101
101
|
|
|
102
102
|
Yields:
|
|
103
|
-
|
|
103
|
+
The `Document` objects.
|
|
104
104
|
"""
|
|
105
105
|
iterator = await run_in_executor(None, self.lazy_load)
|
|
106
106
|
done = object()
|
|
@@ -115,7 +115,7 @@ class BaseBlobParser(ABC):
|
|
|
115
115
|
"""Abstract interface for blob parsers.
|
|
116
116
|
|
|
117
117
|
A blob parser provides a way to parse raw data stored in a blob into one
|
|
118
|
-
or more
|
|
118
|
+
or more `Document` objects.
|
|
119
119
|
|
|
120
120
|
The parser can be composed with blob loaders, making it easy to reuse
|
|
121
121
|
a parser independent of how the blob was originally loaded.
|
|
@@ -128,25 +128,25 @@ class BaseBlobParser(ABC):
|
|
|
128
128
|
Subclasses are required to implement this method.
|
|
129
129
|
|
|
130
130
|
Args:
|
|
131
|
-
blob: Blob instance
|
|
131
|
+
blob: `Blob` instance
|
|
132
132
|
|
|
133
133
|
Returns:
|
|
134
|
-
Generator of
|
|
134
|
+
Generator of `Document` objects
|
|
135
135
|
"""
|
|
136
136
|
|
|
137
137
|
def parse(self, blob: Blob) -> list[Document]:
|
|
138
|
-
"""Eagerly parse the blob into a
|
|
138
|
+
"""Eagerly parse the blob into a `Document` or `Document` objects.
|
|
139
139
|
|
|
140
140
|
This is a convenience method for interactive development environment.
|
|
141
141
|
|
|
142
|
-
Production applications should favor the lazy_parse method instead.
|
|
142
|
+
Production applications should favor the `lazy_parse` method instead.
|
|
143
143
|
|
|
144
144
|
Subclasses should generally not over-ride this parse method.
|
|
145
145
|
|
|
146
146
|
Args:
|
|
147
|
-
blob: Blob instance
|
|
147
|
+
blob: `Blob` instance
|
|
148
148
|
|
|
149
149
|
Returns:
|
|
150
|
-
List of
|
|
150
|
+
List of `Document` objects
|
|
151
151
|
"""
|
|
152
152
|
return list(self.lazy_parse(blob))
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
import datetime
|
|
4
4
|
import json
|
|
5
5
|
import uuid
|
|
6
|
-
from collections.abc import Iterator, Sequence
|
|
7
|
-
from typing import Any
|
|
6
|
+
from collections.abc import Callable, Iterator, Sequence
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
from langsmith import Client as LangSmithClient
|
|
10
10
|
from typing_extensions import override
|
|
@@ -20,55 +20,55 @@ class LangSmithLoader(BaseLoader):
|
|
|
20
20
|
into the Document metadata. This allows you to easily create few-shot example
|
|
21
21
|
retrievers from the loaded documents.
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
??? note "Lazy load"
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
```python
|
|
26
|
+
from langchain_core.document_loaders import LangSmithLoader
|
|
26
27
|
|
|
27
|
-
|
|
28
|
+
loader = LangSmithLoader(dataset_id="...", limit=100)
|
|
29
|
+
docs = []
|
|
30
|
+
for doc in loader.lazy_load():
|
|
31
|
+
docs.append(doc)
|
|
32
|
+
```
|
|
28
33
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
docs.append(doc)
|
|
34
|
+
```python
|
|
35
|
+
# -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
|
|
36
|
+
```
|
|
33
37
|
|
|
34
|
-
|
|
38
|
+
!!! version-added "Added in version 0.2.34"
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
.. versionadded:: 0.2.34
|
|
39
|
-
|
|
40
|
-
""" # noqa: E501
|
|
40
|
+
"""
|
|
41
41
|
|
|
42
42
|
def __init__(
|
|
43
43
|
self,
|
|
44
44
|
*,
|
|
45
|
-
dataset_id:
|
|
46
|
-
dataset_name:
|
|
47
|
-
example_ids:
|
|
48
|
-
as_of:
|
|
49
|
-
splits:
|
|
45
|
+
dataset_id: uuid.UUID | str | None = None,
|
|
46
|
+
dataset_name: str | None = None,
|
|
47
|
+
example_ids: Sequence[uuid.UUID | str] | None = None,
|
|
48
|
+
as_of: datetime.datetime | str | None = None,
|
|
49
|
+
splits: Sequence[str] | None = None,
|
|
50
50
|
inline_s3_urls: bool = True,
|
|
51
51
|
offset: int = 0,
|
|
52
|
-
limit:
|
|
53
|
-
metadata:
|
|
54
|
-
filter:
|
|
52
|
+
limit: int | None = None,
|
|
53
|
+
metadata: dict | None = None,
|
|
54
|
+
filter: str | None = None, # noqa: A002
|
|
55
55
|
content_key: str = "",
|
|
56
|
-
format_content:
|
|
57
|
-
client:
|
|
56
|
+
format_content: Callable[..., str] | None = None,
|
|
57
|
+
client: LangSmithClient | None = None,
|
|
58
58
|
**client_kwargs: Any,
|
|
59
59
|
) -> None:
|
|
60
60
|
"""Create a LangSmith loader.
|
|
61
61
|
|
|
62
62
|
Args:
|
|
63
|
-
dataset_id: The ID of the dataset to filter by.
|
|
64
|
-
dataset_name: The name of the dataset to filter by.
|
|
65
|
-
content_key: The inputs key to set as Document page content.
|
|
66
|
-
are interpreted as nested keys. E.g.
|
|
63
|
+
dataset_id: The ID of the dataset to filter by.
|
|
64
|
+
dataset_name: The name of the dataset to filter by.
|
|
65
|
+
content_key: The inputs key to set as Document page content. `'.'` characters
|
|
66
|
+
are interpreted as nested keys. E.g. `content_key="first.second"` will
|
|
67
67
|
result in
|
|
68
|
-
|
|
68
|
+
`Document(page_content=format_content(example.inputs["first"]["second"]))`
|
|
69
69
|
format_content: Function for converting the content extracted from the example
|
|
70
70
|
inputs into a string. Defaults to JSON-encoding the contents.
|
|
71
|
-
example_ids: The IDs of the examples to filter by.
|
|
71
|
+
example_ids: The IDs of the examples to filter by.
|
|
72
72
|
as_of: The dataset version tag OR
|
|
73
73
|
timestamp to retrieve the examples as of.
|
|
74
74
|
Response examples will only be those that were present at the time
|
|
@@ -76,17 +76,17 @@ class LangSmithLoader(BaseLoader):
|
|
|
76
76
|
splits: A list of dataset splits, which are
|
|
77
77
|
divisions of your dataset such as 'train', 'test', or 'validation'.
|
|
78
78
|
Returns examples only from the specified splits.
|
|
79
|
-
inline_s3_urls: Whether to inline S3 URLs.
|
|
80
|
-
offset: The offset to start from.
|
|
79
|
+
inline_s3_urls: Whether to inline S3 URLs.
|
|
80
|
+
offset: The offset to start from.
|
|
81
81
|
limit: The maximum number of examples to return.
|
|
82
|
-
metadata: Metadata to filter by.
|
|
82
|
+
metadata: Metadata to filter by.
|
|
83
83
|
filter: A structured filter string to apply to the examples.
|
|
84
84
|
client: LangSmith Client. If not provided will be initialized from below args.
|
|
85
85
|
client_kwargs: Keyword args to pass to LangSmith client init. Should only be
|
|
86
|
-
specified if
|
|
86
|
+
specified if `client` isn't.
|
|
87
87
|
|
|
88
88
|
Raises:
|
|
89
|
-
ValueError: If both
|
|
89
|
+
ValueError: If both `client` and `client_kwargs` are provided.
|
|
90
90
|
""" # noqa: E501
|
|
91
91
|
if client and client_kwargs:
|
|
92
92
|
raise ValueError
|
|
@@ -129,7 +129,7 @@ class LangSmithLoader(BaseLoader):
|
|
|
129
129
|
yield Document(content_str, metadata=metadata)
|
|
130
130
|
|
|
131
131
|
|
|
132
|
-
def _stringify(x:
|
|
132
|
+
def _stringify(x: str | dict) -> str:
|
|
133
133
|
if isinstance(x, str):
|
|
134
134
|
return x
|
|
135
135
|
try:
|
langchain_core/documents/base.py
CHANGED
|
@@ -6,7 +6,7 @@ import contextlib
|
|
|
6
6
|
import mimetypes
|
|
7
7
|
from io import BufferedReader, BytesIO
|
|
8
8
|
from pathlib import Path, PurePath
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Literal,
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
10
10
|
|
|
11
11
|
from pydantic import ConfigDict, Field, model_validator
|
|
12
12
|
|
|
@@ -15,7 +15,7 @@ from langchain_core.load.serializable import Serializable
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
16
|
from collections.abc import Generator
|
|
17
17
|
|
|
18
|
-
PathLike =
|
|
18
|
+
PathLike = str | PurePath
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class BaseMedia(Serializable):
|
|
@@ -33,13 +33,13 @@ class BaseMedia(Serializable):
|
|
|
33
33
|
# The ID field is optional at the moment.
|
|
34
34
|
# It will likely become required in a future major release after
|
|
35
35
|
# it has been adopted by enough vectorstore implementations.
|
|
36
|
-
id:
|
|
36
|
+
id: str | None = Field(default=None, coerce_numbers_to_str=True)
|
|
37
37
|
"""An optional identifier for the document.
|
|
38
38
|
|
|
39
39
|
Ideally this should be unique across the document collection and formatted
|
|
40
40
|
as a UUID, but this will not be enforced.
|
|
41
41
|
|
|
42
|
-
|
|
42
|
+
!!! version-added "Added in version 0.2.11"
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
45
|
metadata: dict = Field(default_factory=dict)
|
|
@@ -57,64 +57,63 @@ class Blob(BaseMedia):
|
|
|
57
57
|
|
|
58
58
|
Example: Initialize a blob from in-memory data
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
```python
|
|
61
|
+
from langchain_core.documents import Blob
|
|
61
62
|
|
|
62
|
-
|
|
63
|
+
blob = Blob.from_data("Hello, world!")
|
|
63
64
|
|
|
64
|
-
|
|
65
|
+
# Read the blob as a string
|
|
66
|
+
print(blob.as_string())
|
|
65
67
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
+
# Read the blob as bytes
|
|
69
|
+
print(blob.as_bytes())
|
|
68
70
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
with blob.as_bytes_io() as f:
|
|
74
|
-
print(f.read())
|
|
71
|
+
# Read the blob as a byte stream
|
|
72
|
+
with blob.as_bytes_io() as f:
|
|
73
|
+
print(f.read())
|
|
74
|
+
```
|
|
75
75
|
|
|
76
76
|
Example: Load from memory and specify mime-type and metadata
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
from langchain_core.documents import Blob
|
|
78
|
+
```python
|
|
79
|
+
from langchain_core.documents import Blob
|
|
81
80
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
81
|
+
blob = Blob.from_data(
|
|
82
|
+
data="Hello, world!",
|
|
83
|
+
mime_type="text/plain",
|
|
84
|
+
metadata={"source": "https://example.com"},
|
|
85
|
+
)
|
|
86
|
+
```
|
|
87
87
|
|
|
88
88
|
Example: Load the blob from a file
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
from langchain_core.documents import Blob
|
|
93
|
-
|
|
94
|
-
blob = Blob.from_path("path/to/file.txt")
|
|
90
|
+
```python
|
|
91
|
+
from langchain_core.documents import Blob
|
|
95
92
|
|
|
96
|
-
|
|
97
|
-
print(blob.as_string())
|
|
93
|
+
blob = Blob.from_path("path/to/file.txt")
|
|
98
94
|
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
# Read the blob as a string
|
|
96
|
+
print(blob.as_string())
|
|
101
97
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
print(f.read())
|
|
98
|
+
# Read the blob as bytes
|
|
99
|
+
print(blob.as_bytes())
|
|
105
100
|
|
|
101
|
+
# Read the blob as a byte stream
|
|
102
|
+
with blob.as_bytes_io() as f:
|
|
103
|
+
print(f.read())
|
|
104
|
+
```
|
|
106
105
|
"""
|
|
107
106
|
|
|
108
|
-
data:
|
|
107
|
+
data: bytes | str | None = None
|
|
109
108
|
"""Raw data associated with the blob."""
|
|
110
|
-
mimetype:
|
|
109
|
+
mimetype: str | None = None
|
|
111
110
|
"""MimeType not to be confused with a file extension."""
|
|
112
111
|
encoding: str = "utf-8"
|
|
113
112
|
"""Encoding to use if decoding the bytes into a string.
|
|
114
113
|
|
|
115
|
-
Use utf-8 as default encoding, if decoding to string.
|
|
114
|
+
Use `utf-8` as default encoding, if decoding to string.
|
|
116
115
|
"""
|
|
117
|
-
path:
|
|
116
|
+
path: PathLike | None = None
|
|
118
117
|
"""Location where the original content was found."""
|
|
119
118
|
|
|
120
119
|
model_config = ConfigDict(
|
|
@@ -123,16 +122,16 @@ class Blob(BaseMedia):
|
|
|
123
122
|
)
|
|
124
123
|
|
|
125
124
|
@property
|
|
126
|
-
def source(self) ->
|
|
125
|
+
def source(self) -> str | None:
|
|
127
126
|
"""The source location of the blob as string if known otherwise none.
|
|
128
127
|
|
|
129
128
|
If a path is associated with the blob, it will default to the path location.
|
|
130
129
|
|
|
131
|
-
Unless explicitly set via a metadata field called "source"
|
|
130
|
+
Unless explicitly set via a metadata field called `"source"`, in which
|
|
132
131
|
case that value will be used instead.
|
|
133
132
|
"""
|
|
134
133
|
if self.metadata and "source" in self.metadata:
|
|
135
|
-
return cast("
|
|
134
|
+
return cast("str | None", self.metadata["source"])
|
|
136
135
|
return str(self.path) if self.path else None
|
|
137
136
|
|
|
138
137
|
@model_validator(mode="before")
|
|
@@ -181,7 +180,7 @@ class Blob(BaseMedia):
|
|
|
181
180
|
raise ValueError(msg)
|
|
182
181
|
|
|
183
182
|
@contextlib.contextmanager
|
|
184
|
-
def as_bytes_io(self) -> Generator[
|
|
183
|
+
def as_bytes_io(self) -> Generator[BytesIO | BufferedReader, None, None]:
|
|
185
184
|
"""Read data as a byte stream.
|
|
186
185
|
|
|
187
186
|
Raises:
|
|
@@ -205,18 +204,18 @@ class Blob(BaseMedia):
|
|
|
205
204
|
path: PathLike,
|
|
206
205
|
*,
|
|
207
206
|
encoding: str = "utf-8",
|
|
208
|
-
mime_type:
|
|
207
|
+
mime_type: str | None = None,
|
|
209
208
|
guess_type: bool = True,
|
|
210
|
-
metadata:
|
|
209
|
+
metadata: dict | None = None,
|
|
211
210
|
) -> Blob:
|
|
212
211
|
"""Load the blob from a path like object.
|
|
213
212
|
|
|
214
213
|
Args:
|
|
215
|
-
path:
|
|
214
|
+
path: Path-like object to file to be read
|
|
216
215
|
encoding: Encoding to use if decoding the bytes into a string
|
|
217
|
-
mime_type:
|
|
218
|
-
guess_type: If True
|
|
219
|
-
|
|
216
|
+
mime_type: If provided, will be set as the mime-type of the data
|
|
217
|
+
guess_type: If `True`, the mimetype will be guessed from the file extension,
|
|
218
|
+
if a mime-type was not provided
|
|
220
219
|
metadata: Metadata to associate with the blob
|
|
221
220
|
|
|
222
221
|
Returns:
|
|
@@ -239,20 +238,20 @@ class Blob(BaseMedia):
|
|
|
239
238
|
@classmethod
|
|
240
239
|
def from_data(
|
|
241
240
|
cls,
|
|
242
|
-
data:
|
|
241
|
+
data: str | bytes,
|
|
243
242
|
*,
|
|
244
243
|
encoding: str = "utf-8",
|
|
245
|
-
mime_type:
|
|
246
|
-
path:
|
|
247
|
-
metadata:
|
|
244
|
+
mime_type: str | None = None,
|
|
245
|
+
path: str | None = None,
|
|
246
|
+
metadata: dict | None = None,
|
|
248
247
|
) -> Blob:
|
|
249
248
|
"""Initialize the blob from in-memory data.
|
|
250
249
|
|
|
251
250
|
Args:
|
|
252
|
-
data:
|
|
251
|
+
data: The in-memory data associated with the blob
|
|
253
252
|
encoding: Encoding to use if decoding the bytes into a string
|
|
254
|
-
mime_type:
|
|
255
|
-
path:
|
|
253
|
+
mime_type: If provided, will be set as the mime-type of the data
|
|
254
|
+
path: If provided, will be set as the source from which the data came
|
|
256
255
|
metadata: Metadata to associate with the blob
|
|
257
256
|
|
|
258
257
|
Returns:
|
|
@@ -278,15 +277,13 @@ class Document(BaseMedia):
|
|
|
278
277
|
"""Class for storing a piece of text and associated metadata.
|
|
279
278
|
|
|
280
279
|
Example:
|
|
280
|
+
```python
|
|
281
|
+
from langchain_core.documents import Document
|
|
281
282
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
document = Document(
|
|
287
|
-
page_content="Hello, world!", metadata={"source": "https://example.com"}
|
|
288
|
-
)
|
|
289
|
-
|
|
283
|
+
document = Document(
|
|
284
|
+
page_content="Hello, world!", metadata={"source": "https://example.com"}
|
|
285
|
+
)
|
|
286
|
+
```
|
|
290
287
|
"""
|
|
291
288
|
|
|
292
289
|
page_content: str
|
|
@@ -306,7 +303,7 @@ class Document(BaseMedia):
|
|
|
306
303
|
|
|
307
304
|
@classmethod
|
|
308
305
|
def get_lc_namespace(cls) -> list[str]:
|
|
309
|
-
"""Get the namespace of the
|
|
306
|
+
"""Get the namespace of the LangChain object.
|
|
310
307
|
|
|
311
308
|
Returns:
|
|
312
309
|
["langchain", "schema", "document"]
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
|
-
from typing import TYPE_CHECKING
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
|
|
@@ -27,7 +27,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
|
|
27
27
|
|
|
28
28
|
For example, one could re-rank the retrieved documents using an LLM.
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
!!! note
|
|
31
31
|
Users should favor using a RunnableLambda instead of sub-classing from this
|
|
32
32
|
interface.
|
|
33
33
|
|
|
@@ -38,7 +38,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
|
|
38
38
|
self,
|
|
39
39
|
documents: Sequence[Document],
|
|
40
40
|
query: str,
|
|
41
|
-
callbacks:
|
|
41
|
+
callbacks: Callbacks | None = None,
|
|
42
42
|
) -> Sequence[Document]:
|
|
43
43
|
"""Compress retrieved documents given the query context.
|
|
44
44
|
|
|
@@ -56,7 +56,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
|
|
56
56
|
self,
|
|
57
57
|
documents: Sequence[Document],
|
|
58
58
|
query: str,
|
|
59
|
-
callbacks:
|
|
59
|
+
callbacks: Callbacks | None = None,
|
|
60
60
|
) -> Sequence[Document]:
|
|
61
61
|
"""Async compress retrieved documents given the query context.
|
|
62
62
|
|
|
@@ -20,35 +20,34 @@ class BaseDocumentTransformer(ABC):
|
|
|
20
20
|
sequence of transformed Documents.
|
|
21
21
|
|
|
22
22
|
Example:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
23
|
+
```python
|
|
24
|
+
class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
|
|
25
|
+
embeddings: Embeddings
|
|
26
|
+
similarity_fn: Callable = cosine_similarity
|
|
27
|
+
similarity_threshold: float = 0.95
|
|
28
|
+
|
|
29
|
+
class Config:
|
|
30
|
+
arbitrary_types_allowed = True
|
|
31
|
+
|
|
32
|
+
def transform_documents(
|
|
33
|
+
self, documents: Sequence[Document], **kwargs: Any
|
|
34
|
+
) -> Sequence[Document]:
|
|
35
|
+
stateful_documents = get_stateful_documents(documents)
|
|
36
|
+
embedded_documents = _get_embeddings_from_stateful_docs(
|
|
37
|
+
self.embeddings, stateful_documents
|
|
38
|
+
)
|
|
39
|
+
included_idxs = _filter_similar_embeddings(
|
|
40
|
+
embedded_documents,
|
|
41
|
+
self.similarity_fn,
|
|
42
|
+
self.similarity_threshold,
|
|
43
|
+
)
|
|
44
|
+
return [stateful_documents[i] for i in sorted(included_idxs)]
|
|
45
|
+
|
|
46
|
+
async def atransform_documents(
|
|
47
|
+
self, documents: Sequence[Document], **kwargs: Any
|
|
48
|
+
) -> Sequence[Document]:
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
```
|
|
52
51
|
"""
|
|
53
52
|
|
|
54
53
|
@abstractmethod
|