langchain 0.3.22__py3-none-any.whl → 0.3.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/_api/module_import.py +3 -3
- langchain/agents/agent.py +104 -109
- langchain/agents/agent_iterator.py +11 -15
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +2 -2
- langchain/agents/agent_toolkits/vectorstore/base.py +3 -3
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +4 -6
- langchain/agents/chat/base.py +7 -6
- langchain/agents/chat/output_parser.py +2 -1
- langchain/agents/conversational/base.py +5 -4
- langchain/agents/conversational_chat/base.py +9 -8
- langchain/agents/format_scratchpad/log.py +1 -3
- langchain/agents/format_scratchpad/log_to_messages.py +3 -5
- langchain/agents/format_scratchpad/openai_functions.py +4 -4
- langchain/agents/format_scratchpad/tools.py +3 -3
- langchain/agents/format_scratchpad/xml.py +1 -3
- langchain/agents/initialize.py +2 -1
- langchain/agents/json_chat/base.py +3 -2
- langchain/agents/loading.py +5 -5
- langchain/agents/mrkl/base.py +6 -5
- langchain/agents/openai_assistant/base.py +13 -17
- langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +6 -6
- langchain/agents/openai_functions_agent/base.py +13 -12
- langchain/agents/openai_functions_multi_agent/base.py +15 -14
- langchain/agents/openai_tools/base.py +2 -1
- langchain/agents/output_parsers/openai_functions.py +2 -2
- langchain/agents/output_parsers/openai_tools.py +6 -6
- langchain/agents/output_parsers/react_json_single_input.py +2 -1
- langchain/agents/output_parsers/self_ask.py +2 -1
- langchain/agents/output_parsers/tools.py +7 -7
- langchain/agents/react/agent.py +3 -2
- langchain/agents/react/base.py +4 -3
- langchain/agents/schema.py +3 -3
- langchain/agents/self_ask_with_search/base.py +2 -1
- langchain/agents/structured_chat/base.py +9 -8
- langchain/agents/structured_chat/output_parser.py +2 -1
- langchain/agents/tool_calling_agent/base.py +3 -2
- langchain/agents/tools.py +4 -4
- langchain/agents/types.py +3 -3
- langchain/agents/utils.py +1 -1
- langchain/agents/xml/base.py +7 -6
- langchain/callbacks/streaming_aiter.py +3 -2
- langchain/callbacks/streaming_aiter_final_only.py +3 -3
- langchain/callbacks/streaming_stdout_final_only.py +3 -3
- langchain/chains/api/base.py +11 -12
- langchain/chains/base.py +47 -50
- langchain/chains/combine_documents/base.py +23 -23
- langchain/chains/combine_documents/map_reduce.py +12 -12
- langchain/chains/combine_documents/map_rerank.py +16 -15
- langchain/chains/combine_documents/reduce.py +17 -17
- langchain/chains/combine_documents/refine.py +12 -12
- langchain/chains/combine_documents/stuff.py +10 -10
- langchain/chains/constitutional_ai/base.py +9 -9
- langchain/chains/conversation/base.py +2 -4
- langchain/chains/conversational_retrieval/base.py +30 -30
- langchain/chains/elasticsearch_database/base.py +13 -13
- langchain/chains/example_generator.py +1 -3
- langchain/chains/flare/base.py +13 -12
- langchain/chains/flare/prompts.py +2 -4
- langchain/chains/hyde/base.py +8 -8
- langchain/chains/llm.py +31 -30
- langchain/chains/llm_checker/base.py +6 -6
- langchain/chains/llm_math/base.py +10 -10
- langchain/chains/llm_summarization_checker/base.py +6 -6
- langchain/chains/loading.py +12 -14
- langchain/chains/mapreduce.py +7 -6
- langchain/chains/moderation.py +8 -8
- langchain/chains/natbot/base.py +6 -6
- langchain/chains/openai_functions/base.py +8 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +4 -4
- langchain/chains/openai_functions/extraction.py +3 -3
- langchain/chains/openai_functions/openapi.py +12 -12
- langchain/chains/openai_functions/qa_with_structure.py +4 -4
- langchain/chains/openai_functions/utils.py +2 -2
- langchain/chains/openai_tools/extraction.py +2 -2
- langchain/chains/prompt_selector.py +3 -3
- langchain/chains/qa_generation/base.py +5 -5
- langchain/chains/qa_with_sources/base.py +21 -21
- langchain/chains/qa_with_sources/loading.py +2 -1
- langchain/chains/qa_with_sources/retrieval.py +6 -6
- langchain/chains/qa_with_sources/vector_db.py +8 -8
- langchain/chains/query_constructor/base.py +4 -3
- langchain/chains/query_constructor/parser.py +5 -4
- langchain/chains/question_answering/chain.py +3 -2
- langchain/chains/retrieval.py +2 -2
- langchain/chains/retrieval_qa/base.py +16 -16
- langchain/chains/router/base.py +12 -11
- langchain/chains/router/embedding_router.py +12 -11
- langchain/chains/router/llm_router.py +12 -12
- langchain/chains/router/multi_prompt.py +3 -3
- langchain/chains/router/multi_retrieval_qa.py +5 -4
- langchain/chains/sequential.py +18 -18
- langchain/chains/sql_database/query.py +4 -4
- langchain/chains/structured_output/base.py +14 -13
- langchain/chains/summarize/chain.py +4 -3
- langchain/chains/transform.py +12 -11
- langchain/chat_models/base.py +34 -31
- langchain/embeddings/__init__.py +1 -1
- langchain/embeddings/base.py +4 -4
- langchain/embeddings/cache.py +19 -18
- langchain/evaluation/agents/trajectory_eval_chain.py +16 -19
- langchain/evaluation/comparison/eval_chain.py +10 -10
- langchain/evaluation/criteria/eval_chain.py +11 -10
- langchain/evaluation/embedding_distance/base.py +21 -21
- langchain/evaluation/exact_match/base.py +3 -3
- langchain/evaluation/loading.py +7 -8
- langchain/evaluation/qa/eval_chain.py +7 -6
- langchain/evaluation/regex_match/base.py +3 -3
- langchain/evaluation/schema.py +6 -5
- langchain/evaluation/scoring/eval_chain.py +9 -9
- langchain/evaluation/string_distance/base.py +23 -23
- langchain/hub.py +2 -1
- langchain/indexes/_sql_record_manager.py +8 -7
- langchain/indexes/vectorstore.py +11 -11
- langchain/llms/__init__.py +3 -3
- langchain/memory/buffer.py +13 -13
- langchain/memory/buffer_window.py +5 -5
- langchain/memory/chat_memory.py +5 -5
- langchain/memory/combined.py +10 -10
- langchain/memory/entity.py +8 -7
- langchain/memory/readonly.py +4 -4
- langchain/memory/simple.py +5 -5
- langchain/memory/summary.py +8 -8
- langchain/memory/summary_buffer.py +11 -11
- langchain/memory/token_buffer.py +5 -5
- langchain/memory/utils.py +2 -2
- langchain/memory/vectorstore.py +15 -14
- langchain/memory/vectorstore_token_buffer_memory.py +7 -7
- langchain/model_laboratory.py +4 -3
- langchain/output_parsers/combining.py +5 -5
- langchain/output_parsers/datetime.py +1 -2
- langchain/output_parsers/enum.py +4 -5
- langchain/output_parsers/pandas_dataframe.py +5 -5
- langchain/output_parsers/regex.py +4 -4
- langchain/output_parsers/regex_dict.py +4 -4
- langchain/output_parsers/retry.py +2 -2
- langchain/output_parsers/structured.py +5 -5
- langchain/output_parsers/yaml.py +3 -3
- langchain/pydantic_v1/__init__.py +1 -6
- langchain/pydantic_v1/dataclasses.py +1 -5
- langchain/pydantic_v1/main.py +1 -5
- langchain/retrievers/contextual_compression.py +3 -3
- langchain/retrievers/document_compressors/base.py +3 -2
- langchain/retrievers/document_compressors/chain_extract.py +4 -3
- langchain/retrievers/document_compressors/chain_filter.py +3 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +4 -3
- langchain/retrievers/document_compressors/cross_encoder.py +1 -2
- langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -1
- langchain/retrievers/document_compressors/embeddings_filter.py +3 -2
- langchain/retrievers/document_compressors/listwise_rerank.py +6 -5
- langchain/retrievers/ensemble.py +15 -19
- langchain/retrievers/merger_retriever.py +7 -12
- langchain/retrievers/multi_query.py +14 -13
- langchain/retrievers/multi_vector.py +4 -4
- langchain/retrievers/parent_document_retriever.py +9 -8
- langchain/retrievers/re_phraser.py +2 -3
- langchain/retrievers/self_query/base.py +13 -12
- langchain/retrievers/time_weighted_retriever.py +14 -14
- langchain/runnables/openai_functions.py +4 -3
- langchain/smith/evaluation/config.py +7 -6
- langchain/smith/evaluation/progress.py +3 -2
- langchain/smith/evaluation/runner_utils.py +58 -61
- langchain/smith/evaluation/string_run_evaluator.py +29 -29
- langchain/storage/encoder_backed.py +7 -11
- langchain/storage/file_system.py +5 -4
- {langchain-0.3.22.dist-info → langchain-0.3.24.dist-info}/METADATA +5 -3
- {langchain-0.3.22.dist-info → langchain-0.3.24.dist-info}/RECORD +169 -169
- {langchain-0.3.22.dist-info → langchain-0.3.24.dist-info}/WHEEL +1 -1
- langchain-0.3.24.dist-info/entry_points.txt +4 -0
- langchain-0.3.22.dist-info/entry_points.txt +0 -5
- {langchain-0.3.22.dist-info → langchain-0.3.24.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Filter that uses an LLM to rerank documents listwise and select top-k."""
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import Any, Optional
|
|
4
5
|
|
|
5
6
|
from langchain_core.callbacks import Callbacks
|
|
6
7
|
from langchain_core.documents import BaseDocumentCompressor, Document
|
|
@@ -17,7 +18,7 @@ _DEFAULT_PROMPT = ChatPromptTemplate.from_messages(
|
|
|
17
18
|
)
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def _get_prompt_input(input_: dict) ->
|
|
21
|
+
def _get_prompt_input(input_: dict) -> dict[str, Any]:
|
|
21
22
|
"""Return the compression chain input."""
|
|
22
23
|
documents = input_["documents"]
|
|
23
24
|
context = ""
|
|
@@ -27,7 +28,7 @@ def _get_prompt_input(input_: dict) -> Dict[str, Any]:
|
|
|
27
28
|
return {"query": input_["query"], "context": context}
|
|
28
29
|
|
|
29
30
|
|
|
30
|
-
def _parse_ranking(results: dict) ->
|
|
31
|
+
def _parse_ranking(results: dict) -> list[Document]:
|
|
31
32
|
ranking = results["ranking"]
|
|
32
33
|
docs = results["documents"]
|
|
33
34
|
return [docs[i] for i in ranking.ranked_document_ids]
|
|
@@ -68,7 +69,7 @@ class LLMListwiseRerank(BaseDocumentCompressor):
|
|
|
68
69
|
assert "Steve" in compressed_docs[0].page_content
|
|
69
70
|
"""
|
|
70
71
|
|
|
71
|
-
reranker: Runnable[
|
|
72
|
+
reranker: Runnable[dict, list[Document]]
|
|
72
73
|
"""LLM-based reranker to use for filtering documents. Expected to take in a dict
|
|
73
74
|
with 'documents: Sequence[Document]' and 'query: str' keys and output a
|
|
74
75
|
List[Document]."""
|
|
@@ -121,7 +122,7 @@ class LLMListwiseRerank(BaseDocumentCompressor):
|
|
|
121
122
|
"""Rank the documents by their relevance to the user question.
|
|
122
123
|
Rank from most to least relevant."""
|
|
123
124
|
|
|
124
|
-
ranked_document_ids:
|
|
125
|
+
ranked_document_ids: list[int] = Field(
|
|
125
126
|
...,
|
|
126
127
|
description=(
|
|
127
128
|
"The integer IDs of the documents, sorted from most to least "
|
langchain/retrievers/ensemble.py
CHANGED
|
@@ -5,15 +5,11 @@ multiple retrievers by using weighted Reciprocal Rank Fusion
|
|
|
5
5
|
|
|
6
6
|
import asyncio
|
|
7
7
|
from collections import defaultdict
|
|
8
|
-
from collections.abc import Hashable
|
|
8
|
+
from collections.abc import Hashable, Iterable, Iterator
|
|
9
9
|
from itertools import chain
|
|
10
10
|
from typing import (
|
|
11
11
|
Any,
|
|
12
12
|
Callable,
|
|
13
|
-
Dict,
|
|
14
|
-
Iterable,
|
|
15
|
-
Iterator,
|
|
16
|
-
List,
|
|
17
13
|
Optional,
|
|
18
14
|
TypeVar,
|
|
19
15
|
cast,
|
|
@@ -70,13 +66,13 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
70
66
|
If not specified, page_content is used.
|
|
71
67
|
"""
|
|
72
68
|
|
|
73
|
-
retrievers:
|
|
74
|
-
weights:
|
|
69
|
+
retrievers: list[RetrieverLike]
|
|
70
|
+
weights: list[float]
|
|
75
71
|
c: int = 60
|
|
76
72
|
id_key: Optional[str] = None
|
|
77
73
|
|
|
78
74
|
@property
|
|
79
|
-
def config_specs(self) ->
|
|
75
|
+
def config_specs(self) -> list[ConfigurableFieldSpec]:
|
|
80
76
|
"""List configurable fields for this runnable."""
|
|
81
77
|
return get_unique_config_specs(
|
|
82
78
|
spec for retriever in self.retrievers for spec in retriever.config_specs
|
|
@@ -84,7 +80,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
84
80
|
|
|
85
81
|
@model_validator(mode="before")
|
|
86
82
|
@classmethod
|
|
87
|
-
def set_weights(cls, values:
|
|
83
|
+
def set_weights(cls, values: dict[str, Any]) -> Any:
|
|
88
84
|
if not values.get("weights"):
|
|
89
85
|
n_retrievers = len(values["retrievers"])
|
|
90
86
|
values["weights"] = [1 / n_retrievers] * n_retrievers
|
|
@@ -92,7 +88,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
92
88
|
|
|
93
89
|
def invoke(
|
|
94
90
|
self, input: str, config: Optional[RunnableConfig] = None, **kwargs: Any
|
|
95
|
-
) ->
|
|
91
|
+
) -> list[Document]:
|
|
96
92
|
from langchain_core.callbacks import CallbackManager
|
|
97
93
|
|
|
98
94
|
config = ensure_config(config)
|
|
@@ -125,7 +121,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
125
121
|
|
|
126
122
|
async def ainvoke(
|
|
127
123
|
self, input: str, config: Optional[RunnableConfig] = None, **kwargs: Any
|
|
128
|
-
) ->
|
|
124
|
+
) -> list[Document]:
|
|
129
125
|
from langchain_core.callbacks import AsyncCallbackManager
|
|
130
126
|
|
|
131
127
|
config = ensure_config(config)
|
|
@@ -163,7 +159,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
163
159
|
query: str,
|
|
164
160
|
*,
|
|
165
161
|
run_manager: CallbackManagerForRetrieverRun,
|
|
166
|
-
) ->
|
|
162
|
+
) -> list[Document]:
|
|
167
163
|
"""
|
|
168
164
|
Get the relevant documents for a given query.
|
|
169
165
|
|
|
@@ -184,7 +180,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
184
180
|
query: str,
|
|
185
181
|
*,
|
|
186
182
|
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
187
|
-
) ->
|
|
183
|
+
) -> list[Document]:
|
|
188
184
|
"""
|
|
189
185
|
Asynchronously get the relevant documents for a given query.
|
|
190
186
|
|
|
@@ -206,7 +202,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
206
202
|
run_manager: CallbackManagerForRetrieverRun,
|
|
207
203
|
*,
|
|
208
204
|
config: Optional[RunnableConfig] = None,
|
|
209
|
-
) ->
|
|
205
|
+
) -> list[Document]:
|
|
210
206
|
"""
|
|
211
207
|
Retrieve the results of the retrievers and use rank_fusion_func to get
|
|
212
208
|
the final result.
|
|
@@ -247,7 +243,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
247
243
|
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
248
244
|
*,
|
|
249
245
|
config: Optional[RunnableConfig] = None,
|
|
250
|
-
) ->
|
|
246
|
+
) -> list[Document]:
|
|
251
247
|
"""
|
|
252
248
|
Asynchronously retrieve the results of the retrievers
|
|
253
249
|
and use rank_fusion_func to get the final result.
|
|
@@ -276,7 +272,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
276
272
|
# Enforce that retrieved docs are Documents for each list in retriever_docs
|
|
277
273
|
for i in range(len(retriever_docs)):
|
|
278
274
|
retriever_docs[i] = [
|
|
279
|
-
Document(page_content=doc) if not isinstance(doc, Document) else doc
|
|
275
|
+
Document(page_content=doc) if not isinstance(doc, Document) else doc
|
|
280
276
|
for doc in retriever_docs[i]
|
|
281
277
|
]
|
|
282
278
|
|
|
@@ -286,8 +282,8 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
286
282
|
return fused_documents
|
|
287
283
|
|
|
288
284
|
def weighted_reciprocal_rank(
|
|
289
|
-
self, doc_lists:
|
|
290
|
-
) ->
|
|
285
|
+
self, doc_lists: list[list[Document]]
|
|
286
|
+
) -> list[Document]:
|
|
291
287
|
"""
|
|
292
288
|
Perform weighted Reciprocal Rank Fusion on multiple rank lists.
|
|
293
289
|
You can find more details about RRF here:
|
|
@@ -307,7 +303,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
|
307
303
|
|
|
308
304
|
# Associate each doc's content with its RRF score for later sorting by it
|
|
309
305
|
# Duplicated contents across retrievers are collapsed & scored cumulatively
|
|
310
|
-
rrf_score:
|
|
306
|
+
rrf_score: dict[str, float] = defaultdict(float)
|
|
311
307
|
for doc_list, weight in zip(doc_lists, self.weights):
|
|
312
308
|
for rank, doc in enumerate(doc_list, start=1):
|
|
313
309
|
rrf_score[
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import List
|
|
3
2
|
|
|
4
3
|
from langchain_core.callbacks import (
|
|
5
4
|
AsyncCallbackManagerForRetrieverRun,
|
|
@@ -12,7 +11,7 @@ from langchain_core.retrievers import BaseRetriever
|
|
|
12
11
|
class MergerRetriever(BaseRetriever):
|
|
13
12
|
"""Retriever that merges the results of multiple retrievers."""
|
|
14
13
|
|
|
15
|
-
retrievers:
|
|
14
|
+
retrievers: list[BaseRetriever]
|
|
16
15
|
"""A list of retrievers to merge."""
|
|
17
16
|
|
|
18
17
|
def _get_relevant_documents(
|
|
@@ -20,7 +19,7 @@ class MergerRetriever(BaseRetriever):
|
|
|
20
19
|
query: str,
|
|
21
20
|
*,
|
|
22
21
|
run_manager: CallbackManagerForRetrieverRun,
|
|
23
|
-
) ->
|
|
22
|
+
) -> list[Document]:
|
|
24
23
|
"""
|
|
25
24
|
Get the relevant documents for a given query.
|
|
26
25
|
|
|
@@ -41,7 +40,7 @@ class MergerRetriever(BaseRetriever):
|
|
|
41
40
|
query: str,
|
|
42
41
|
*,
|
|
43
42
|
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
44
|
-
) ->
|
|
43
|
+
) -> list[Document]:
|
|
45
44
|
"""
|
|
46
45
|
Asynchronously get the relevant documents for a given query.
|
|
47
46
|
|
|
@@ -59,7 +58,7 @@ class MergerRetriever(BaseRetriever):
|
|
|
59
58
|
|
|
60
59
|
def merge_documents(
|
|
61
60
|
self, query: str, run_manager: CallbackManagerForRetrieverRun
|
|
62
|
-
) ->
|
|
61
|
+
) -> list[Document]:
|
|
63
62
|
"""
|
|
64
63
|
Merge the results of the retrievers.
|
|
65
64
|
|
|
@@ -74,9 +73,7 @@ class MergerRetriever(BaseRetriever):
|
|
|
74
73
|
retriever_docs = [
|
|
75
74
|
retriever.invoke(
|
|
76
75
|
query,
|
|
77
|
-
config={
|
|
78
|
-
"callbacks": run_manager.get_child("retriever_{}".format(i + 1))
|
|
79
|
-
},
|
|
76
|
+
config={"callbacks": run_manager.get_child(f"retriever_{i + 1}")},
|
|
80
77
|
)
|
|
81
78
|
for i, retriever in enumerate(self.retrievers)
|
|
82
79
|
]
|
|
@@ -93,7 +90,7 @@ class MergerRetriever(BaseRetriever):
|
|
|
93
90
|
|
|
94
91
|
async def amerge_documents(
|
|
95
92
|
self, query: str, run_manager: AsyncCallbackManagerForRetrieverRun
|
|
96
|
-
) ->
|
|
93
|
+
) -> list[Document]:
|
|
97
94
|
"""
|
|
98
95
|
Asynchronously merge the results of the retrievers.
|
|
99
96
|
|
|
@@ -109,9 +106,7 @@ class MergerRetriever(BaseRetriever):
|
|
|
109
106
|
*(
|
|
110
107
|
retriever.ainvoke(
|
|
111
108
|
query,
|
|
112
|
-
config={
|
|
113
|
-
"callbacks": run_manager.get_child("retriever_{}".format(i + 1))
|
|
114
|
-
},
|
|
109
|
+
config={"callbacks": run_manager.get_child(f"retriever_{i + 1}")},
|
|
115
110
|
)
|
|
116
111
|
for i, retriever in enumerate(self.retrievers)
|
|
117
112
|
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import logging
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
from langchain_core.callbacks import (
|
|
6
7
|
AsyncCallbackManagerForRetrieverRun,
|
|
@@ -19,10 +20,10 @@ from langchain.chains.llm import LLMChain
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
21
22
|
|
|
22
|
-
class LineListOutputParser(BaseOutputParser[
|
|
23
|
+
class LineListOutputParser(BaseOutputParser[list[str]]):
|
|
23
24
|
"""Output parser for a list of lines."""
|
|
24
25
|
|
|
25
|
-
def parse(self, text: str) ->
|
|
26
|
+
def parse(self, text: str) -> list[str]:
|
|
26
27
|
lines = text.strip().split("\n")
|
|
27
28
|
return list(filter(None, lines)) # Remove empty lines
|
|
28
29
|
|
|
@@ -40,7 +41,7 @@ DEFAULT_QUERY_PROMPT = PromptTemplate(
|
|
|
40
41
|
)
|
|
41
42
|
|
|
42
43
|
|
|
43
|
-
def _unique_documents(documents: Sequence[Document]) ->
|
|
44
|
+
def _unique_documents(documents: Sequence[Document]) -> list[Document]:
|
|
44
45
|
return [doc for i, doc in enumerate(documents) if doc not in documents[:i]]
|
|
45
46
|
|
|
46
47
|
|
|
@@ -93,7 +94,7 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
93
94
|
query: str,
|
|
94
95
|
*,
|
|
95
96
|
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
96
|
-
) ->
|
|
97
|
+
) -> list[Document]:
|
|
97
98
|
"""Get relevant documents given a user query.
|
|
98
99
|
|
|
99
100
|
Args:
|
|
@@ -110,7 +111,7 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
110
111
|
|
|
111
112
|
async def agenerate_queries(
|
|
112
113
|
self, question: str, run_manager: AsyncCallbackManagerForRetrieverRun
|
|
113
|
-
) ->
|
|
114
|
+
) -> list[str]:
|
|
114
115
|
"""Generate queries based upon user input.
|
|
115
116
|
|
|
116
117
|
Args:
|
|
@@ -131,8 +132,8 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
131
132
|
return lines
|
|
132
133
|
|
|
133
134
|
async def aretrieve_documents(
|
|
134
|
-
self, queries:
|
|
135
|
-
) ->
|
|
135
|
+
self, queries: list[str], run_manager: AsyncCallbackManagerForRetrieverRun
|
|
136
|
+
) -> list[Document]:
|
|
136
137
|
"""Run all LLM generated queries.
|
|
137
138
|
|
|
138
139
|
Args:
|
|
@@ -156,7 +157,7 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
156
157
|
query: str,
|
|
157
158
|
*,
|
|
158
159
|
run_manager: CallbackManagerForRetrieverRun,
|
|
159
|
-
) ->
|
|
160
|
+
) -> list[Document]:
|
|
160
161
|
"""Get relevant documents given a user query.
|
|
161
162
|
|
|
162
163
|
Args:
|
|
@@ -173,7 +174,7 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
173
174
|
|
|
174
175
|
def generate_queries(
|
|
175
176
|
self, question: str, run_manager: CallbackManagerForRetrieverRun
|
|
176
|
-
) ->
|
|
177
|
+
) -> list[str]:
|
|
177
178
|
"""Generate queries based upon user input.
|
|
178
179
|
|
|
179
180
|
Args:
|
|
@@ -194,8 +195,8 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
194
195
|
return lines
|
|
195
196
|
|
|
196
197
|
def retrieve_documents(
|
|
197
|
-
self, queries:
|
|
198
|
-
) ->
|
|
198
|
+
self, queries: list[str], run_manager: CallbackManagerForRetrieverRun
|
|
199
|
+
) -> list[Document]:
|
|
199
200
|
"""Run all LLM generated queries.
|
|
200
201
|
|
|
201
202
|
Args:
|
|
@@ -212,7 +213,7 @@ class MultiQueryRetriever(BaseRetriever):
|
|
|
212
213
|
documents.extend(docs)
|
|
213
214
|
return documents
|
|
214
215
|
|
|
215
|
-
def unique_union(self, documents:
|
|
216
|
+
def unique_union(self, documents: list[Document]) -> list[Document]:
|
|
216
217
|
"""Get unique Documents.
|
|
217
218
|
|
|
218
219
|
Args:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import Any,
|
|
2
|
+
from typing import Any, Optional
|
|
3
3
|
|
|
4
4
|
from langchain_core.callbacks import (
|
|
5
5
|
AsyncCallbackManagerForRetrieverRun,
|
|
@@ -43,7 +43,7 @@ class MultiVectorRetriever(BaseRetriever):
|
|
|
43
43
|
|
|
44
44
|
@model_validator(mode="before")
|
|
45
45
|
@classmethod
|
|
46
|
-
def shim_docstore(cls, values:
|
|
46
|
+
def shim_docstore(cls, values: dict) -> Any:
|
|
47
47
|
byte_store = values.get("byte_store")
|
|
48
48
|
docstore = values.get("docstore")
|
|
49
49
|
if byte_store is not None:
|
|
@@ -55,7 +55,7 @@ class MultiVectorRetriever(BaseRetriever):
|
|
|
55
55
|
|
|
56
56
|
def _get_relevant_documents(
|
|
57
57
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
58
|
-
) ->
|
|
58
|
+
) -> list[Document]:
|
|
59
59
|
"""Get documents relevant to a query.
|
|
60
60
|
Args:
|
|
61
61
|
query: String to find relevant documents for
|
|
@@ -87,7 +87,7 @@ class MultiVectorRetriever(BaseRetriever):
|
|
|
87
87
|
|
|
88
88
|
async def _aget_relevant_documents(
|
|
89
89
|
self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
|
|
90
|
-
) ->
|
|
90
|
+
) -> list[Document]:
|
|
91
91
|
"""Asynchronously get documents relevant to a query.
|
|
92
92
|
Args:
|
|
93
93
|
query: String to find relevant documents for
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import uuid
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Sequence
|
|
3
|
+
from typing import Any, Optional
|
|
3
4
|
|
|
4
5
|
from langchain_core.documents import Document
|
|
5
6
|
from langchain_text_splitters import TextSplitter
|
|
@@ -71,10 +72,10 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
71
72
|
|
|
72
73
|
def _split_docs_for_adding(
|
|
73
74
|
self,
|
|
74
|
-
documents:
|
|
75
|
-
ids: Optional[
|
|
75
|
+
documents: list[Document],
|
|
76
|
+
ids: Optional[list[str]] = None,
|
|
76
77
|
add_to_docstore: bool = True,
|
|
77
|
-
) ->
|
|
78
|
+
) -> tuple[list[Document], list[tuple[str, Document]]]:
|
|
78
79
|
if self.parent_splitter is not None:
|
|
79
80
|
documents = self.parent_splitter.split_documents(documents)
|
|
80
81
|
if ids is None:
|
|
@@ -110,8 +111,8 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
110
111
|
|
|
111
112
|
def add_documents(
|
|
112
113
|
self,
|
|
113
|
-
documents:
|
|
114
|
-
ids: Optional[
|
|
114
|
+
documents: list[Document],
|
|
115
|
+
ids: Optional[list[str]] = None,
|
|
115
116
|
add_to_docstore: bool = True,
|
|
116
117
|
**kwargs: Any,
|
|
117
118
|
) -> None:
|
|
@@ -136,8 +137,8 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|
|
136
137
|
|
|
137
138
|
async def aadd_documents(
|
|
138
139
|
self,
|
|
139
|
-
documents:
|
|
140
|
-
ids: Optional[
|
|
140
|
+
documents: list[Document],
|
|
141
|
+
ids: Optional[list[str]] = None,
|
|
141
142
|
add_to_docstore: bool = True,
|
|
142
143
|
**kwargs: Any,
|
|
143
144
|
) -> None:
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import List
|
|
3
2
|
|
|
4
3
|
from langchain_core.callbacks import (
|
|
5
4
|
AsyncCallbackManagerForRetrieverRun,
|
|
@@ -62,7 +61,7 @@ class RePhraseQueryRetriever(BaseRetriever):
|
|
|
62
61
|
query: str,
|
|
63
62
|
*,
|
|
64
63
|
run_manager: CallbackManagerForRetrieverRun,
|
|
65
|
-
) ->
|
|
64
|
+
) -> list[Document]:
|
|
66
65
|
"""Get relevant documents given a user question.
|
|
67
66
|
|
|
68
67
|
Args:
|
|
@@ -85,5 +84,5 @@ class RePhraseQueryRetriever(BaseRetriever):
|
|
|
85
84
|
query: str,
|
|
86
85
|
*,
|
|
87
86
|
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
88
|
-
) ->
|
|
87
|
+
) -> list[Document]:
|
|
89
88
|
raise NotImplementedError
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Retriever that generates and executes structured queries over its own data source."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from typing import Any, Optional, Union
|
|
5
6
|
|
|
6
7
|
from langchain_core.callbacks.manager import (
|
|
7
8
|
AsyncCallbackManagerForRetrieverRun,
|
|
@@ -95,7 +96,7 @@ def _get_builtin_translator(vectorstore: VectorStore) -> Visitor:
|
|
|
95
96
|
Pinecone as CommunityPinecone,
|
|
96
97
|
)
|
|
97
98
|
|
|
98
|
-
BUILTIN_TRANSLATORS:
|
|
99
|
+
BUILTIN_TRANSLATORS: dict[type[VectorStore], type[Visitor]] = {
|
|
99
100
|
AstraDB: AstraDBTranslator,
|
|
100
101
|
PGVector: PGVectorTranslator,
|
|
101
102
|
CommunityPinecone: PineconeTranslator,
|
|
@@ -179,7 +180,7 @@ def _get_builtin_translator(vectorstore: VectorStore) -> Visitor:
|
|
|
179
180
|
return ChromaTranslator()
|
|
180
181
|
|
|
181
182
|
try:
|
|
182
|
-
from langchain_postgres import PGVector
|
|
183
|
+
from langchain_postgres import PGVector
|
|
183
184
|
from langchain_postgres import PGVectorTranslator as NewPGVectorTranslator
|
|
184
185
|
except ImportError:
|
|
185
186
|
pass
|
|
@@ -249,7 +250,7 @@ class SelfQueryRetriever(BaseRetriever):
|
|
|
249
250
|
|
|
250
251
|
@model_validator(mode="before")
|
|
251
252
|
@classmethod
|
|
252
|
-
def validate_translator(cls, values:
|
|
253
|
+
def validate_translator(cls, values: dict) -> Any:
|
|
253
254
|
"""Validate translator."""
|
|
254
255
|
if "structured_query_translator" not in values:
|
|
255
256
|
values["structured_query_translator"] = _get_builtin_translator(
|
|
@@ -264,7 +265,7 @@ class SelfQueryRetriever(BaseRetriever):
|
|
|
264
265
|
|
|
265
266
|
def _prepare_query(
|
|
266
267
|
self, query: str, structured_query: StructuredQuery
|
|
267
|
-
) ->
|
|
268
|
+
) -> tuple[str, dict[str, Any]]:
|
|
268
269
|
new_query, new_kwargs = self.structured_query_translator.visit_structured_query(
|
|
269
270
|
structured_query
|
|
270
271
|
)
|
|
@@ -276,20 +277,20 @@ class SelfQueryRetriever(BaseRetriever):
|
|
|
276
277
|
return new_query, search_kwargs
|
|
277
278
|
|
|
278
279
|
def _get_docs_with_query(
|
|
279
|
-
self, query: str, search_kwargs:
|
|
280
|
-
) ->
|
|
280
|
+
self, query: str, search_kwargs: dict[str, Any]
|
|
281
|
+
) -> list[Document]:
|
|
281
282
|
docs = self.vectorstore.search(query, self.search_type, **search_kwargs)
|
|
282
283
|
return docs
|
|
283
284
|
|
|
284
285
|
async def _aget_docs_with_query(
|
|
285
|
-
self, query: str, search_kwargs:
|
|
286
|
-
) ->
|
|
286
|
+
self, query: str, search_kwargs: dict[str, Any]
|
|
287
|
+
) -> list[Document]:
|
|
287
288
|
docs = await self.vectorstore.asearch(query, self.search_type, **search_kwargs)
|
|
288
289
|
return docs
|
|
289
290
|
|
|
290
291
|
def _get_relevant_documents(
|
|
291
292
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
292
|
-
) ->
|
|
293
|
+
) -> list[Document]:
|
|
293
294
|
"""Get documents relevant for a query.
|
|
294
295
|
|
|
295
296
|
Args:
|
|
@@ -309,7 +310,7 @@ class SelfQueryRetriever(BaseRetriever):
|
|
|
309
310
|
|
|
310
311
|
async def _aget_relevant_documents(
|
|
311
312
|
self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
|
|
312
|
-
) ->
|
|
313
|
+
) -> list[Document]:
|
|
313
314
|
"""Get documents relevant for a query.
|
|
314
315
|
|
|
315
316
|
Args:
|
|
@@ -335,7 +336,7 @@ class SelfQueryRetriever(BaseRetriever):
|
|
|
335
336
|
document_contents: str,
|
|
336
337
|
metadata_field_info: Sequence[Union[AttributeInfo, dict]],
|
|
337
338
|
structured_query_translator: Optional[Visitor] = None,
|
|
338
|
-
chain_kwargs: Optional[
|
|
339
|
+
chain_kwargs: Optional[dict] = None,
|
|
339
340
|
enable_limit: bool = False,
|
|
340
341
|
use_original_query: bool = False,
|
|
341
342
|
**kwargs: Any,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
from copy import deepcopy
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
|
|
5
5
|
from langchain_core.callbacks import (
|
|
6
6
|
AsyncCallbackManagerForRetrieverRun,
|
|
@@ -28,7 +28,7 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
|
|
|
28
28
|
"""Keyword arguments to pass to the vectorstore similarity search."""
|
|
29
29
|
|
|
30
30
|
# TODO: abstract as a queue
|
|
31
|
-
memory_stream:
|
|
31
|
+
memory_stream: list[Document] = Field(default_factory=list)
|
|
32
32
|
"""The memory_stream of documents to search through."""
|
|
33
33
|
|
|
34
34
|
decay_rate: float = Field(default=0.01)
|
|
@@ -37,7 +37,7 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
|
|
|
37
37
|
k: int = 4
|
|
38
38
|
"""The maximum number of documents to retrieve in a given call."""
|
|
39
39
|
|
|
40
|
-
other_score_keys:
|
|
40
|
+
other_score_keys: list[str] = []
|
|
41
41
|
"""Other keys in the metadata to factor into the score, e.g. 'importance'."""
|
|
42
42
|
|
|
43
43
|
default_salience: Optional[float] = None
|
|
@@ -77,9 +77,9 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
|
|
|
77
77
|
score += vector_relevance
|
|
78
78
|
return score
|
|
79
79
|
|
|
80
|
-
def get_salient_docs(self, query: str) ->
|
|
80
|
+
def get_salient_docs(self, query: str) -> dict[int, tuple[Document, float]]:
|
|
81
81
|
"""Return documents that are salient to the query."""
|
|
82
|
-
docs_and_scores:
|
|
82
|
+
docs_and_scores: list[tuple[Document, float]]
|
|
83
83
|
docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(
|
|
84
84
|
query, **self.search_kwargs
|
|
85
85
|
)
|
|
@@ -91,9 +91,9 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
|
|
|
91
91
|
results[buffer_idx] = (doc, relevance)
|
|
92
92
|
return results
|
|
93
93
|
|
|
94
|
-
async def aget_salient_docs(self, query: str) ->
|
|
94
|
+
async def aget_salient_docs(self, query: str) -> dict[int, tuple[Document, float]]:
|
|
95
95
|
"""Return documents that are salient to the query."""
|
|
96
|
-
docs_and_scores:
|
|
96
|
+
docs_and_scores: list[tuple[Document, float]]
|
|
97
97
|
docs_and_scores = (
|
|
98
98
|
await self.vectorstore.asimilarity_search_with_relevance_scores(
|
|
99
99
|
query, **self.search_kwargs
|
|
@@ -108,8 +108,8 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
|
|
|
108
108
|
return results
|
|
109
109
|
|
|
110
110
|
def _get_rescored_docs(
|
|
111
|
-
self, docs_and_scores:
|
|
112
|
-
) ->
|
|
111
|
+
self, docs_and_scores: dict[Any, tuple[Document, Optional[float]]]
|
|
112
|
+
) -> list[Document]:
|
|
113
113
|
current_time = datetime.datetime.now()
|
|
114
114
|
rescored_docs = [
|
|
115
115
|
(doc, self._get_combined_score(doc, relevance, current_time))
|
|
@@ -127,7 +127,7 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
|
|
|
127
127
|
|
|
128
128
|
def _get_relevant_documents(
|
|
129
129
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
130
|
-
) ->
|
|
130
|
+
) -> list[Document]:
|
|
131
131
|
docs_and_scores = {
|
|
132
132
|
doc.metadata["buffer_idx"]: (doc, self.default_salience)
|
|
133
133
|
for doc in self.memory_stream[-self.k :]
|
|
@@ -138,7 +138,7 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
|
|
|
138
138
|
|
|
139
139
|
async def _aget_relevant_documents(
|
|
140
140
|
self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
|
|
141
|
-
) ->
|
|
141
|
+
) -> list[Document]:
|
|
142
142
|
docs_and_scores = {
|
|
143
143
|
doc.metadata["buffer_idx"]: (doc, self.default_salience)
|
|
144
144
|
for doc in self.memory_stream[-self.k :]
|
|
@@ -147,7 +147,7 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
|
|
|
147
147
|
docs_and_scores.update(await self.aget_salient_docs(query))
|
|
148
148
|
return self._get_rescored_docs(docs_and_scores)
|
|
149
149
|
|
|
150
|
-
def add_documents(self, documents:
|
|
150
|
+
def add_documents(self, documents: list[Document], **kwargs: Any) -> list[str]:
|
|
151
151
|
"""Add documents to vectorstore."""
|
|
152
152
|
current_time = kwargs.get("current_time")
|
|
153
153
|
if current_time is None:
|
|
@@ -164,8 +164,8 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
|
|
|
164
164
|
return self.vectorstore.add_documents(dup_docs, **kwargs)
|
|
165
165
|
|
|
166
166
|
async def aadd_documents(
|
|
167
|
-
self, documents:
|
|
168
|
-
) ->
|
|
167
|
+
self, documents: list[Document], **kwargs: Any
|
|
168
|
+
) -> list[str]:
|
|
169
169
|
"""Add documents to vectorstore."""
|
|
170
170
|
current_time = kwargs.get("current_time")
|
|
171
171
|
if current_time is None:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
1
2
|
from operator import itemgetter
|
|
2
|
-
from typing import Any, Callable,
|
|
3
|
+
from typing import Any, Callable, Optional, Union
|
|
3
4
|
|
|
4
5
|
from langchain_core.messages import BaseMessage
|
|
5
6
|
from langchain_core.output_parsers.openai_functions import JsonOutputFunctionsParser
|
|
@@ -22,7 +23,7 @@ class OpenAIFunction(TypedDict):
|
|
|
22
23
|
class OpenAIFunctionsRouter(RunnableBindingBase[BaseMessage, Any]):
|
|
23
24
|
"""A runnable that routes to the selected function."""
|
|
24
25
|
|
|
25
|
-
functions: Optional[
|
|
26
|
+
functions: Optional[list[OpenAIFunction]]
|
|
26
27
|
|
|
27
28
|
def __init__(
|
|
28
29
|
self,
|
|
@@ -33,7 +34,7 @@ class OpenAIFunctionsRouter(RunnableBindingBase[BaseMessage, Any]):
|
|
|
33
34
|
Callable[[dict], Any],
|
|
34
35
|
],
|
|
35
36
|
],
|
|
36
|
-
functions: Optional[
|
|
37
|
+
functions: Optional[list[OpenAIFunction]] = None,
|
|
37
38
|
):
|
|
38
39
|
if functions is not None:
|
|
39
40
|
assert len(functions) == len(runnables)
|