langchain 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/__init__.py +1 -0
- langchain/_api/module_import.py +2 -2
- langchain/agents/__init__.py +5 -4
- langchain/agents/agent.py +272 -50
- langchain/agents/agent_iterator.py +20 -0
- langchain/agents/agent_toolkits/__init__.py +1 -0
- langchain/agents/agent_toolkits/file_management/__init__.py +1 -0
- langchain/agents/agent_toolkits/playwright/__init__.py +1 -0
- langchain/agents/agent_toolkits/vectorstore/base.py +1 -0
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +1 -0
- langchain/agents/agent_types.py +1 -0
- langchain/agents/chat/base.py +37 -1
- langchain/agents/chat/output_parser.py +14 -0
- langchain/agents/conversational/base.py +38 -6
- langchain/agents/conversational/output_parser.py +10 -0
- langchain/agents/conversational_chat/base.py +42 -3
- langchain/agents/format_scratchpad/__init__.py +1 -0
- langchain/agents/format_scratchpad/log.py +12 -1
- langchain/agents/format_scratchpad/log_to_messages.py +10 -1
- langchain/agents/format_scratchpad/openai_functions.py +10 -5
- langchain/agents/format_scratchpad/tools.py +11 -7
- langchain/agents/initialize.py +15 -7
- langchain/agents/json_chat/base.py +9 -3
- langchain/agents/loading.py +7 -0
- langchain/agents/mrkl/base.py +39 -10
- langchain/agents/mrkl/output_parser.py +12 -0
- langchain/agents/openai_assistant/base.py +37 -14
- langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +32 -4
- langchain/agents/openai_functions_agent/base.py +61 -10
- langchain/agents/openai_functions_multi_agent/base.py +22 -7
- langchain/agents/openai_tools/base.py +3 -0
- langchain/agents/output_parsers/__init__.py +1 -0
- langchain/agents/react/base.py +1 -0
- langchain/agents/self_ask_with_search/base.py +1 -0
- langchain/agents/structured_chat/output_parser.py +3 -3
- langchain/agents/tool_calling_agent/base.py +13 -3
- langchain/agents/tools.py +3 -0
- langchain/agents/utils.py +9 -1
- langchain/base_language.py +1 -0
- langchain/callbacks/__init__.py +1 -0
- langchain/callbacks/base.py +1 -0
- langchain/callbacks/streaming_stdout.py +1 -0
- langchain/callbacks/streaming_stdout_final_only.py +1 -0
- langchain/callbacks/tracers/evaluation.py +1 -0
- langchain/chains/api/base.py +5 -2
- langchain/chains/base.py +1 -1
- langchain/chains/combine_documents/base.py +59 -0
- langchain/chains/combine_documents/map_reduce.py +4 -2
- langchain/chains/combine_documents/map_rerank.py +5 -3
- langchain/chains/combine_documents/refine.py +4 -2
- langchain/chains/combine_documents/stuff.py +1 -0
- langchain/chains/constitutional_ai/base.py +1 -0
- langchain/chains/constitutional_ai/models.py +1 -0
- langchain/chains/constitutional_ai/principles.py +1 -0
- langchain/chains/conversation/base.py +81 -1
- langchain/chains/conversational_retrieval/base.py +2 -1
- langchain/chains/elasticsearch_database/base.py +2 -1
- langchain/chains/hyde/base.py +1 -0
- langchain/chains/llm.py +4 -2
- langchain/chains/llm_checker/base.py +4 -3
- langchain/chains/llm_math/base.py +1 -0
- langchain/chains/loading.py +2 -1
- langchain/chains/mapreduce.py +1 -0
- langchain/chains/moderation.py +1 -1
- langchain/chains/natbot/base.py +1 -0
- langchain/chains/openai_functions/base.py +1 -0
- langchain/chains/openai_functions/extraction.py +6 -6
- langchain/chains/openai_tools/extraction.py +3 -3
- langchain/chains/qa_generation/base.py +47 -1
- langchain/chains/qa_with_sources/__init__.py +1 -0
- langchain/chains/qa_with_sources/loading.py +1 -0
- langchain/chains/qa_with_sources/vector_db.py +1 -1
- langchain/chains/query_constructor/base.py +1 -0
- langchain/chains/query_constructor/ir.py +1 -0
- langchain/chains/question_answering/chain.py +1 -0
- langchain/chains/retrieval_qa/base.py +3 -2
- langchain/chains/router/base.py +1 -0
- langchain/chains/router/llm_router.py +2 -1
- langchain/chains/router/multi_prompt.py +1 -0
- langchain/chains/router/multi_retrieval_qa.py +1 -0
- langchain/chains/sequential.py +2 -1
- langchain/chains/structured_output/base.py +12 -12
- langchain/chains/summarize/chain.py +1 -0
- langchain/chains/transform.py +4 -3
- langchain/chat_models/__init__.py +1 -0
- langchain/chat_models/base.py +2 -2
- langchain/docstore/__init__.py +1 -0
- langchain/document_loaders/__init__.py +1 -0
- langchain/document_transformers/__init__.py +1 -0
- langchain/embeddings/__init__.py +0 -1
- langchain/evaluation/__init__.py +2 -1
- langchain/evaluation/agents/__init__.py +1 -0
- langchain/evaluation/agents/trajectory_eval_prompt.py +1 -0
- langchain/evaluation/comparison/__init__.py +1 -0
- langchain/evaluation/comparison/eval_chain.py +1 -0
- langchain/evaluation/comparison/prompt.py +1 -0
- langchain/evaluation/embedding_distance/__init__.py +1 -0
- langchain/evaluation/embedding_distance/base.py +1 -0
- langchain/evaluation/loading.py +1 -0
- langchain/evaluation/parsing/base.py +1 -0
- langchain/evaluation/qa/__init__.py +1 -0
- langchain/evaluation/qa/eval_chain.py +1 -0
- langchain/evaluation/qa/generate_chain.py +1 -0
- langchain/evaluation/schema.py +1 -0
- langchain/evaluation/scoring/__init__.py +1 -0
- langchain/evaluation/scoring/eval_chain.py +1 -0
- langchain/evaluation/scoring/prompt.py +1 -0
- langchain/evaluation/string_distance/__init__.py +1 -0
- langchain/example_generator.py +1 -0
- langchain/formatting.py +1 -0
- langchain/globals/__init__.py +1 -0
- langchain/graphs/__init__.py +1 -0
- langchain/indexes/__init__.py +1 -0
- langchain/indexes/_sql_record_manager.py +9 -5
- langchain/indexes/graph.py +1 -0
- langchain/indexes/prompts/__init__.py +1 -0
- langchain/input.py +1 -0
- langchain/llms/__init__.py +1 -0
- langchain/load/__init__.py +1 -0
- langchain/memory/__init__.py +5 -0
- langchain/memory/vectorstore_token_buffer_memory.py +184 -0
- langchain/output_parsers/__init__.py +1 -0
- langchain/output_parsers/combining.py +1 -1
- langchain/output_parsers/enum.py +7 -3
- langchain/output_parsers/fix.py +57 -16
- langchain/output_parsers/pandas_dataframe.py +1 -1
- langchain/output_parsers/regex.py +1 -1
- langchain/output_parsers/regex_dict.py +1 -1
- langchain/output_parsers/retry.py +76 -29
- langchain/output_parsers/structured.py +3 -3
- langchain/output_parsers/yaml.py +4 -0
- langchain/prompts/__init__.py +1 -0
- langchain/prompts/example_selector/__init__.py +1 -0
- langchain/python.py +1 -0
- langchain/requests.py +1 -0
- langchain/retrievers/__init__.py +1 -0
- langchain/retrievers/document_compressors/chain_extract.py +1 -0
- langchain/retrievers/document_compressors/chain_filter.py +1 -0
- langchain/retrievers/ensemble.py +18 -3
- langchain/retrievers/multi_query.py +2 -1
- langchain/retrievers/re_phraser.py +2 -1
- langchain/retrievers/self_query/base.py +9 -8
- langchain/schema/__init__.py +1 -0
- langchain/schema/runnable/__init__.py +1 -0
- langchain/serpapi.py +1 -0
- langchain/smith/__init__.py +6 -5
- langchain/smith/evaluation/__init__.py +0 -1
- langchain/smith/evaluation/string_run_evaluator.py +1 -0
- langchain/sql_database.py +1 -0
- langchain/storage/__init__.py +1 -0
- langchain/storage/_lc_store.py +1 -0
- langchain/storage/in_memory.py +1 -0
- langchain/text_splitter.py +1 -0
- langchain/tools/__init__.py +1 -0
- langchain/tools/amadeus/__init__.py +1 -0
- langchain/tools/azure_cognitive_services/__init__.py +1 -0
- langchain/tools/bing_search/__init__.py +1 -0
- langchain/tools/dataforseo_api_search/__init__.py +1 -0
- langchain/tools/ddg_search/__init__.py +1 -0
- langchain/tools/edenai/__init__.py +1 -0
- langchain/tools/eleven_labs/__init__.py +1 -0
- langchain/tools/file_management/__init__.py +1 -0
- langchain/tools/github/__init__.py +1 -1
- langchain/tools/gitlab/__init__.py +1 -1
- langchain/tools/gmail/__init__.py +1 -0
- langchain/tools/golden_query/__init__.py +1 -0
- langchain/tools/google_cloud/__init__.py +1 -0
- langchain/tools/google_finance/__init__.py +1 -0
- langchain/tools/google_jobs/__init__.py +1 -0
- langchain/tools/google_lens/__init__.py +1 -0
- langchain/tools/google_places/__init__.py +1 -0
- langchain/tools/google_scholar/__init__.py +1 -0
- langchain/tools/google_search/__init__.py +1 -0
- langchain/tools/google_trends/__init__.py +1 -0
- langchain/tools/human/__init__.py +1 -0
- langchain/tools/memorize/__init__.py +1 -0
- langchain/tools/metaphor_search/__init__.py +1 -0
- langchain/tools/multion/__init__.py +1 -0
- langchain/tools/office365/__init__.py +1 -0
- langchain/tools/openapi/utils/openapi_utils.py +1 -0
- langchain/tools/openweathermap/__init__.py +1 -0
- langchain/tools/playwright/__init__.py +1 -0
- langchain/tools/shell/__init__.py +1 -0
- langchain/tools/slack/__init__.py +1 -0
- langchain/tools/sql_database/prompt.py +1 -0
- langchain/tools/steamship_image_generation/__init__.py +1 -0
- langchain/tools/tavily_search/__init__.py +1 -0
- langchain/tools/wolfram_alpha/__init__.py +1 -0
- langchain/tools/zapier/__init__.py +1 -0
- langchain/utilities/__init__.py +1 -0
- langchain/utilities/python.py +1 -0
- langchain/vectorstores/__init__.py +1 -0
- {langchain-0.2.5.dist-info → langchain-0.2.7.dist-info}/METADATA +3 -4
- {langchain-0.2.5.dist-info → langchain-0.2.7.dist-info}/RECORD +197 -196
- {langchain-0.2.5.dist-info → langchain-0.2.7.dist-info}/LICENSE +0 -0
- {langchain-0.2.5.dist-info → langchain-0.2.7.dist-info}/WHEEL +0 -0
- {langchain-0.2.5.dist-info → langchain-0.2.7.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Class for a conversation memory buffer with older messages stored in a vectorstore .
|
|
3
|
+
|
|
4
|
+
This implementats a conversation memory in which the messages are stored in a memory
|
|
5
|
+
buffer up to a specified token limit. When the limit is exceeded, older messages are
|
|
6
|
+
saved to a vectorstore backing database. The vectorstore can be made persistent across
|
|
7
|
+
sessions.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import warnings
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from typing import Any, Dict, List
|
|
13
|
+
|
|
14
|
+
from langchain_core.messages import BaseMessage
|
|
15
|
+
from langchain_core.prompts.chat import SystemMessagePromptTemplate
|
|
16
|
+
from langchain_core.pydantic_v1 import Field, PrivateAttr
|
|
17
|
+
from langchain_core.vectorstores import VectorStoreRetriever
|
|
18
|
+
|
|
19
|
+
from langchain.memory import ConversationTokenBufferMemory, VectorStoreRetrieverMemory
|
|
20
|
+
from langchain.memory.chat_memory import BaseChatMemory
|
|
21
|
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
22
|
+
|
|
23
|
+
DEFAULT_HISTORY_TEMPLATE = """
|
|
24
|
+
Current date and time: {current_time}.
|
|
25
|
+
|
|
26
|
+
Potentially relevant timestamped excerpts of previous conversations (you
|
|
27
|
+
do not need to use these if irrelevant):
|
|
28
|
+
{previous_history}
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %Z"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ConversationVectorStoreTokenBufferMemory(ConversationTokenBufferMemory):
|
|
36
|
+
"""Conversation chat memory with token limit and vectordb backing.
|
|
37
|
+
|
|
38
|
+
load_memory_variables() will return a dict with the key "history".
|
|
39
|
+
It contains background information retrieved from the vector store
|
|
40
|
+
plus recent lines of the current conversation.
|
|
41
|
+
|
|
42
|
+
To help the LLM understand the part of the conversation stored in the
|
|
43
|
+
vectorstore, each interaction is timestamped and the current date and
|
|
44
|
+
time is also provided in the history. A side effect of this is that the
|
|
45
|
+
LLM will have access to the current date and time.
|
|
46
|
+
|
|
47
|
+
Initialization arguments:
|
|
48
|
+
|
|
49
|
+
This class accepts all the initialization arguments of
|
|
50
|
+
ConversationTokenBufferMemory, such as `llm`. In addition, it
|
|
51
|
+
accepts the following additional arguments
|
|
52
|
+
|
|
53
|
+
retriever: (required) A VectorStoreRetriever object to use
|
|
54
|
+
as the vector backing store
|
|
55
|
+
|
|
56
|
+
split_chunk_size: (optional, 1000) Token chunk split size
|
|
57
|
+
for long messages generated by the AI
|
|
58
|
+
|
|
59
|
+
previous_history_template: (optional) Template used to format
|
|
60
|
+
the contents of the prompt history
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
Example using ChromaDB:
|
|
64
|
+
|
|
65
|
+
.. code-block:: python
|
|
66
|
+
|
|
67
|
+
from langchain.memory.token_buffer_vectorstore_memory import (
|
|
68
|
+
ConversationVectorStoreTokenBufferMemory
|
|
69
|
+
)
|
|
70
|
+
from langchain_community.vectorstores import Chroma
|
|
71
|
+
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
|
|
72
|
+
from langchain_openai import OpenAI
|
|
73
|
+
|
|
74
|
+
embedder = HuggingFaceInstructEmbeddings(
|
|
75
|
+
query_instruction="Represent the query for retrieval: "
|
|
76
|
+
)
|
|
77
|
+
chroma = Chroma(collection_name="demo",
|
|
78
|
+
embedding_function=embedder,
|
|
79
|
+
collection_metadata={"hnsw:space": "cosine"},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
retriever = chroma.as_retriever(
|
|
83
|
+
search_type="similarity_score_threshold",
|
|
84
|
+
search_kwargs={
|
|
85
|
+
'k': 5,
|
|
86
|
+
'score_threshold': 0.75,
|
|
87
|
+
},
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
conversation_memory = ConversationVectorStoreTokenBufferMemory(
|
|
91
|
+
return_messages=True,
|
|
92
|
+
llm=OpenAI(),
|
|
93
|
+
retriever=retriever,
|
|
94
|
+
max_token_limit = 1000,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
conversation_memory.save_context({"Human": "Hi there"},
|
|
98
|
+
{"AI": "Nice to meet you!"}
|
|
99
|
+
)
|
|
100
|
+
conversation_memory.save_context({"Human": "Nice day isn't it?"},
|
|
101
|
+
{"AI": "I love Wednesdays."}
|
|
102
|
+
)
|
|
103
|
+
conversation_memory.load_memory_variables({"input": "What time is it?"})
|
|
104
|
+
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
retriever: VectorStoreRetriever = Field(exclude=True)
|
|
108
|
+
memory_key: str = "history"
|
|
109
|
+
previous_history_template: str = DEFAULT_HISTORY_TEMPLATE
|
|
110
|
+
split_chunk_size: int = 1000
|
|
111
|
+
|
|
112
|
+
_memory_retriever: VectorStoreRetrieverMemory = PrivateAttr(default=None)
|
|
113
|
+
_timestamps: List[datetime] = PrivateAttr(default_factory=list)
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def memory_retriever(self) -> VectorStoreRetrieverMemory:
|
|
117
|
+
"""Return a memory retriever from the passed retriever object."""
|
|
118
|
+
if self._memory_retriever is not None:
|
|
119
|
+
return self._memory_retriever
|
|
120
|
+
self._memory_retriever = VectorStoreRetrieverMemory(retriever=self.retriever)
|
|
121
|
+
return self._memory_retriever
|
|
122
|
+
|
|
123
|
+
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
|
124
|
+
"""Return history and memory buffer."""
|
|
125
|
+
try:
|
|
126
|
+
with warnings.catch_warnings():
|
|
127
|
+
warnings.simplefilter("ignore")
|
|
128
|
+
memory_variables = self.memory_retriever.load_memory_variables(inputs)
|
|
129
|
+
previous_history = memory_variables[self.memory_retriever.memory_key]
|
|
130
|
+
except AssertionError: # happens when db is empty
|
|
131
|
+
previous_history = ""
|
|
132
|
+
current_history = super().load_memory_variables(inputs)
|
|
133
|
+
template = SystemMessagePromptTemplate.from_template(
|
|
134
|
+
self.previous_history_template
|
|
135
|
+
)
|
|
136
|
+
messages = [
|
|
137
|
+
template.format(
|
|
138
|
+
previous_history=previous_history,
|
|
139
|
+
current_time=datetime.now().astimezone().strftime(TIMESTAMP_FORMAT),
|
|
140
|
+
)
|
|
141
|
+
]
|
|
142
|
+
messages.extend(current_history[self.memory_key])
|
|
143
|
+
return {self.memory_key: messages}
|
|
144
|
+
|
|
145
|
+
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
|
|
146
|
+
"""Save context from this conversation to buffer. Pruned."""
|
|
147
|
+
BaseChatMemory.save_context(self, inputs, outputs)
|
|
148
|
+
self._timestamps.append(datetime.now().astimezone())
|
|
149
|
+
# Prune buffer if it exceeds max token limit
|
|
150
|
+
buffer = self.chat_memory.messages
|
|
151
|
+
curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer)
|
|
152
|
+
if curr_buffer_length > self.max_token_limit:
|
|
153
|
+
while curr_buffer_length > self.max_token_limit:
|
|
154
|
+
self._pop_and_store_interaction(buffer)
|
|
155
|
+
curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer)
|
|
156
|
+
|
|
157
|
+
def save_remainder(self) -> None:
|
|
158
|
+
"""
|
|
159
|
+
Save the remainder of the conversation buffer to the vector store.
|
|
160
|
+
|
|
161
|
+
This is useful if you have made the vectorstore persistent, in which
|
|
162
|
+
case this can be called before the end of the session to store the
|
|
163
|
+
remainder of the conversation.
|
|
164
|
+
"""
|
|
165
|
+
buffer = self.chat_memory.messages
|
|
166
|
+
while len(buffer) > 0:
|
|
167
|
+
self._pop_and_store_interaction(buffer)
|
|
168
|
+
|
|
169
|
+
def _pop_and_store_interaction(self, buffer: List[BaseMessage]) -> None:
|
|
170
|
+
input = buffer.pop(0)
|
|
171
|
+
output = buffer.pop(0)
|
|
172
|
+
timestamp = self._timestamps.pop(0).strftime(TIMESTAMP_FORMAT)
|
|
173
|
+
# Split AI output into smaller chunks to avoid creating documents
|
|
174
|
+
# that will overflow the context window
|
|
175
|
+
ai_chunks = self._split_long_ai_text(str(output.content))
|
|
176
|
+
for index, chunk in enumerate(ai_chunks):
|
|
177
|
+
self.memory_retriever.save_context(
|
|
178
|
+
{"Human": f"<{timestamp}/00> {str(input.content)}"},
|
|
179
|
+
{"AI": f"<{timestamp}/{index:02}> {chunk}"},
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def _split_long_ai_text(self, text: str) -> List[str]:
|
|
183
|
+
splitter = RecursiveCharacterTextSplitter(chunk_size=self.split_chunk_size)
|
|
184
|
+
return [chunk.page_content for chunk in splitter.create_documents([text])]
|
|
@@ -6,7 +6,7 @@ from langchain_core.output_parsers import BaseOutputParser
|
|
|
6
6
|
from langchain_core.pydantic_v1 import root_validator
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class CombiningOutputParser(BaseOutputParser):
|
|
9
|
+
class CombiningOutputParser(BaseOutputParser[Dict[str, Any]]):
|
|
10
10
|
"""Combine multiple output parsers into one."""
|
|
11
11
|
|
|
12
12
|
parsers: List[BaseOutputParser]
|
langchain/output_parsers/enum.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Dict, List, Type
|
|
3
3
|
|
|
4
4
|
from langchain_core.exceptions import OutputParserException
|
|
5
5
|
from langchain_core.output_parsers import BaseOutputParser
|
|
6
6
|
from langchain_core.pydantic_v1 import root_validator
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class EnumOutputParser(BaseOutputParser):
|
|
9
|
+
class EnumOutputParser(BaseOutputParser[Enum]):
|
|
10
10
|
"""Parse an output that is one of a set of values."""
|
|
11
11
|
|
|
12
12
|
enum: Type[Enum]
|
|
@@ -23,7 +23,7 @@ class EnumOutputParser(BaseOutputParser):
|
|
|
23
23
|
def _valid_values(self) -> List[str]:
|
|
24
24
|
return [e.value for e in self.enum]
|
|
25
25
|
|
|
26
|
-
def parse(self, response: str) ->
|
|
26
|
+
def parse(self, response: str) -> Enum:
|
|
27
27
|
try:
|
|
28
28
|
return self.enum(response.strip())
|
|
29
29
|
except ValueError:
|
|
@@ -34,3 +34,7 @@ class EnumOutputParser(BaseOutputParser):
|
|
|
34
34
|
|
|
35
35
|
def get_format_instructions(self) -> str:
|
|
36
36
|
return f"Select one of the following options: {', '.join(self._valid_values)}"
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def OutputType(self) -> Type[Enum]:
|
|
40
|
+
return self.enum
|
langchain/output_parsers/fix.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, TypeVar
|
|
3
|
+
from typing import Any, TypeVar, Union
|
|
4
4
|
|
|
5
5
|
from langchain_core.exceptions import OutputParserException
|
|
6
6
|
from langchain_core.language_models import BaseLanguageModel
|
|
7
7
|
from langchain_core.output_parsers import BaseOutputParser
|
|
8
8
|
from langchain_core.prompts import BasePromptTemplate
|
|
9
|
+
from langchain_core.runnables import RunnableSerializable
|
|
9
10
|
|
|
10
11
|
from langchain.output_parsers.prompts import NAIVE_FIX_PROMPT
|
|
11
12
|
|
|
@@ -22,10 +23,12 @@ class OutputFixingParser(BaseOutputParser[T]):
|
|
|
22
23
|
parser: BaseOutputParser[T]
|
|
23
24
|
"""The parser to use to parse the output."""
|
|
24
25
|
# Should be an LLMChain but we want to avoid top-level imports from langchain.chains
|
|
25
|
-
retry_chain: Any
|
|
26
|
-
"""The
|
|
26
|
+
retry_chain: Union[RunnableSerializable, Any]
|
|
27
|
+
"""The RunnableSerializable to use to retry the completion (Legacy: LLMChain)."""
|
|
27
28
|
max_retries: int = 1
|
|
28
29
|
"""The maximum number of times to retry the parse."""
|
|
30
|
+
legacy: bool = True
|
|
31
|
+
"""Whether to use the run or arun method of the retry_chain."""
|
|
29
32
|
|
|
30
33
|
@classmethod
|
|
31
34
|
def from_llm(
|
|
@@ -46,9 +49,7 @@ class OutputFixingParser(BaseOutputParser[T]):
|
|
|
46
49
|
Returns:
|
|
47
50
|
OutputFixingParser
|
|
48
51
|
"""
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
chain = LLMChain(llm=llm, prompt=prompt)
|
|
52
|
+
chain = prompt | llm
|
|
52
53
|
return cls(parser=parser, retry_chain=chain, max_retries=max_retries)
|
|
53
54
|
|
|
54
55
|
def parse(self, completion: str) -> T:
|
|
@@ -62,11 +63,29 @@ class OutputFixingParser(BaseOutputParser[T]):
|
|
|
62
63
|
raise e
|
|
63
64
|
else:
|
|
64
65
|
retries += 1
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
66
|
+
if self.legacy and hasattr(self.retry_chain, "run"):
|
|
67
|
+
completion = self.retry_chain.run(
|
|
68
|
+
instructions=self.parser.get_format_instructions(),
|
|
69
|
+
completion=completion,
|
|
70
|
+
error=repr(e),
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
try:
|
|
74
|
+
completion = self.retry_chain.invoke(
|
|
75
|
+
dict(
|
|
76
|
+
instructions=self.parser.get_format_instructions(), # noqa: E501
|
|
77
|
+
input=completion,
|
|
78
|
+
error=repr(e),
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
except (NotImplementedError, AttributeError):
|
|
82
|
+
# Case: self.parser does not have get_format_instructions # noqa: E501
|
|
83
|
+
completion = self.retry_chain.invoke(
|
|
84
|
+
dict(
|
|
85
|
+
input=completion,
|
|
86
|
+
error=repr(e),
|
|
87
|
+
)
|
|
88
|
+
)
|
|
70
89
|
|
|
71
90
|
raise OutputParserException("Failed to parse")
|
|
72
91
|
|
|
@@ -81,11 +100,29 @@ class OutputFixingParser(BaseOutputParser[T]):
|
|
|
81
100
|
raise e
|
|
82
101
|
else:
|
|
83
102
|
retries += 1
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
103
|
+
if self.legacy and hasattr(self.retry_chain, "arun"):
|
|
104
|
+
completion = await self.retry_chain.arun(
|
|
105
|
+
instructions=self.parser.get_format_instructions(), # noqa: E501
|
|
106
|
+
completion=completion,
|
|
107
|
+
error=repr(e),
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
try:
|
|
111
|
+
completion = await self.retry_chain.ainvoke(
|
|
112
|
+
dict(
|
|
113
|
+
instructions=self.parser.get_format_instructions(), # noqa: E501
|
|
114
|
+
input=completion,
|
|
115
|
+
error=repr(e),
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
except (NotImplementedError, AttributeError):
|
|
119
|
+
# Case: self.parser does not have get_format_instructions # noqa: E501
|
|
120
|
+
completion = await self.retry_chain.ainvoke(
|
|
121
|
+
dict(
|
|
122
|
+
input=completion,
|
|
123
|
+
error=repr(e),
|
|
124
|
+
)
|
|
125
|
+
)
|
|
89
126
|
|
|
90
127
|
raise OutputParserException("Failed to parse")
|
|
91
128
|
|
|
@@ -95,3 +132,7 @@ class OutputFixingParser(BaseOutputParser[T]):
|
|
|
95
132
|
@property
|
|
96
133
|
def _type(self) -> str:
|
|
97
134
|
return "output_fixing"
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def OutputType(self) -> type[T]:
|
|
138
|
+
return self.parser.OutputType
|
|
@@ -10,7 +10,7 @@ from langchain.output_parsers.format_instructions import (
|
|
|
10
10
|
)
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class PandasDataFrameOutputParser(BaseOutputParser):
|
|
13
|
+
class PandasDataFrameOutputParser(BaseOutputParser[Dict[str, Any]]):
|
|
14
14
|
"""Parse an output using Pandas DataFrame format."""
|
|
15
15
|
|
|
16
16
|
"""The Pandas DataFrame to parse."""
|
|
@@ -6,7 +6,7 @@ from typing import Dict, List, Optional
|
|
|
6
6
|
from langchain_core.output_parsers import BaseOutputParser
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class RegexParser(BaseOutputParser):
|
|
9
|
+
class RegexParser(BaseOutputParser[Dict[str, str]]):
|
|
10
10
|
"""Parse the output of an LLM call using a regex."""
|
|
11
11
|
|
|
12
12
|
@classmethod
|
|
@@ -6,7 +6,7 @@ from typing import Dict, Optional
|
|
|
6
6
|
from langchain_core.output_parsers import BaseOutputParser
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class RegexDictParser(BaseOutputParser):
|
|
9
|
+
class RegexDictParser(BaseOutputParser[Dict[str, str]]):
|
|
10
10
|
"""Parse the output of an LLM call into a Dictionary using a regex."""
|
|
11
11
|
|
|
12
12
|
regex_pattern: str = r"{}:\s?([^.'\n']*)\.?" # : :meta private:
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, TypeVar
|
|
3
|
+
from typing import Any, TypeVar, Union
|
|
4
4
|
|
|
5
5
|
from langchain_core.exceptions import OutputParserException
|
|
6
6
|
from langchain_core.language_models import BaseLanguageModel
|
|
7
7
|
from langchain_core.output_parsers import BaseOutputParser
|
|
8
8
|
from langchain_core.prompt_values import PromptValue
|
|
9
9
|
from langchain_core.prompts import BasePromptTemplate, PromptTemplate
|
|
10
|
+
from langchain_core.runnables import RunnableSerializable
|
|
10
11
|
|
|
11
12
|
NAIVE_COMPLETION_RETRY = """Prompt:
|
|
12
13
|
{prompt}
|
|
@@ -43,10 +44,12 @@ class RetryOutputParser(BaseOutputParser[T]):
|
|
|
43
44
|
parser: BaseOutputParser[T]
|
|
44
45
|
"""The parser to use to parse the output."""
|
|
45
46
|
# Should be an LLMChain but we want to avoid top-level imports from langchain.chains
|
|
46
|
-
retry_chain: Any
|
|
47
|
-
"""The
|
|
47
|
+
retry_chain: Union[RunnableSerializable, Any]
|
|
48
|
+
"""The RunnableSerializable to use to retry the completion (Legacy: LLMChain)."""
|
|
48
49
|
max_retries: int = 1
|
|
49
50
|
"""The maximum number of times to retry the parse."""
|
|
51
|
+
legacy: bool = True
|
|
52
|
+
"""Whether to use the run or arun method of the retry_chain."""
|
|
50
53
|
|
|
51
54
|
@classmethod
|
|
52
55
|
def from_llm(
|
|
@@ -67,9 +70,7 @@ class RetryOutputParser(BaseOutputParser[T]):
|
|
|
67
70
|
Returns:
|
|
68
71
|
RetryOutputParser
|
|
69
72
|
"""
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
chain = LLMChain(llm=llm, prompt=prompt)
|
|
73
|
+
chain = prompt | llm
|
|
73
74
|
return cls(parser=parser, retry_chain=chain, max_retries=max_retries)
|
|
74
75
|
|
|
75
76
|
def parse_with_prompt(self, completion: str, prompt_value: PromptValue) -> T:
|
|
@@ -92,9 +93,19 @@ class RetryOutputParser(BaseOutputParser[T]):
|
|
|
92
93
|
raise e
|
|
93
94
|
else:
|
|
94
95
|
retries += 1
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
96
|
+
if self.legacy and hasattr(self.retry_chain, "run"):
|
|
97
|
+
completion = self.retry_chain.run(
|
|
98
|
+
prompt=prompt_value.to_string(),
|
|
99
|
+
completion=completion,
|
|
100
|
+
error=repr(e),
|
|
101
|
+
)
|
|
102
|
+
else:
|
|
103
|
+
completion = self.retry_chain.invoke(
|
|
104
|
+
dict(
|
|
105
|
+
prompt=prompt_value.to_string(),
|
|
106
|
+
input=completion,
|
|
107
|
+
)
|
|
108
|
+
)
|
|
98
109
|
|
|
99
110
|
raise OutputParserException("Failed to parse")
|
|
100
111
|
|
|
@@ -118,9 +129,19 @@ class RetryOutputParser(BaseOutputParser[T]):
|
|
|
118
129
|
raise e
|
|
119
130
|
else:
|
|
120
131
|
retries += 1
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
132
|
+
if self.legacy and hasattr(self.retry_chain, "arun"):
|
|
133
|
+
completion = await self.retry_chain.arun(
|
|
134
|
+
prompt=prompt_value.to_string(),
|
|
135
|
+
completion=completion,
|
|
136
|
+
error=repr(e),
|
|
137
|
+
)
|
|
138
|
+
else:
|
|
139
|
+
completion = await self.retry_chain.ainvoke(
|
|
140
|
+
dict(
|
|
141
|
+
prompt=prompt_value.to_string(),
|
|
142
|
+
input=completion,
|
|
143
|
+
)
|
|
144
|
+
)
|
|
124
145
|
|
|
125
146
|
raise OutputParserException("Failed to parse")
|
|
126
147
|
|
|
@@ -136,6 +157,10 @@ class RetryOutputParser(BaseOutputParser[T]):
|
|
|
136
157
|
def _type(self) -> str:
|
|
137
158
|
return "retry"
|
|
138
159
|
|
|
160
|
+
@property
|
|
161
|
+
def OutputType(self) -> type[T]:
|
|
162
|
+
return self.parser.OutputType
|
|
163
|
+
|
|
139
164
|
|
|
140
165
|
class RetryWithErrorOutputParser(BaseOutputParser[T]):
|
|
141
166
|
"""Wrap a parser and try to fix parsing errors.
|
|
@@ -149,11 +174,13 @@ class RetryWithErrorOutputParser(BaseOutputParser[T]):
|
|
|
149
174
|
|
|
150
175
|
parser: BaseOutputParser[T]
|
|
151
176
|
"""The parser to use to parse the output."""
|
|
152
|
-
# Should be an LLMChain but we want to avoid top-level imports from langchain.chains
|
|
153
|
-
retry_chain: Any
|
|
154
|
-
"""The
|
|
177
|
+
# Should be an LLMChain but we want to avoid top-level imports from langchain.chains # noqa: E501
|
|
178
|
+
retry_chain: Union[RunnableSerializable, Any]
|
|
179
|
+
"""The RunnableSerializable to use to retry the completion (Legacy: LLMChain)."""
|
|
155
180
|
max_retries: int = 1
|
|
156
181
|
"""The maximum number of times to retry the parse."""
|
|
182
|
+
legacy: bool = True
|
|
183
|
+
"""Whether to use the run or arun method of the retry_chain."""
|
|
157
184
|
|
|
158
185
|
@classmethod
|
|
159
186
|
def from_llm(
|
|
@@ -174,12 +201,10 @@ class RetryWithErrorOutputParser(BaseOutputParser[T]):
|
|
|
174
201
|
Returns:
|
|
175
202
|
A RetryWithErrorOutputParser.
|
|
176
203
|
"""
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
chain = LLMChain(llm=llm, prompt=prompt)
|
|
204
|
+
chain = prompt | llm
|
|
180
205
|
return cls(parser=parser, retry_chain=chain, max_retries=max_retries)
|
|
181
206
|
|
|
182
|
-
def parse_with_prompt(self, completion: str, prompt_value: PromptValue) -> T:
|
|
207
|
+
def parse_with_prompt(self, completion: str, prompt_value: PromptValue) -> T: # noqa: E501
|
|
183
208
|
retries = 0
|
|
184
209
|
|
|
185
210
|
while retries <= self.max_retries:
|
|
@@ -190,11 +215,20 @@ class RetryWithErrorOutputParser(BaseOutputParser[T]):
|
|
|
190
215
|
raise e
|
|
191
216
|
else:
|
|
192
217
|
retries += 1
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
218
|
+
if self.legacy and hasattr(self.retry_chain, "run"):
|
|
219
|
+
completion = self.retry_chain.run(
|
|
220
|
+
prompt=prompt_value.to_string(),
|
|
221
|
+
completion=completion,
|
|
222
|
+
error=repr(e),
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
completion = self.retry_chain.invoke(
|
|
226
|
+
dict(
|
|
227
|
+
input=completion,
|
|
228
|
+
prompt=prompt_value.to_string(),
|
|
229
|
+
error=repr(e),
|
|
230
|
+
)
|
|
231
|
+
)
|
|
198
232
|
|
|
199
233
|
raise OutputParserException("Failed to parse")
|
|
200
234
|
|
|
@@ -209,11 +243,20 @@ class RetryWithErrorOutputParser(BaseOutputParser[T]):
|
|
|
209
243
|
raise e
|
|
210
244
|
else:
|
|
211
245
|
retries += 1
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
246
|
+
if self.legacy and hasattr(self.retry_chain, "arun"):
|
|
247
|
+
completion = await self.retry_chain.arun(
|
|
248
|
+
prompt=prompt_value.to_string(),
|
|
249
|
+
completion=completion,
|
|
250
|
+
error=repr(e),
|
|
251
|
+
)
|
|
252
|
+
else:
|
|
253
|
+
completion = await self.retry_chain.ainvoke(
|
|
254
|
+
dict(
|
|
255
|
+
prompt=prompt_value.to_string(),
|
|
256
|
+
input=completion,
|
|
257
|
+
error=repr(e),
|
|
258
|
+
)
|
|
259
|
+
)
|
|
217
260
|
|
|
218
261
|
raise OutputParserException("Failed to parse")
|
|
219
262
|
|
|
@@ -228,3 +271,7 @@ class RetryWithErrorOutputParser(BaseOutputParser[T]):
|
|
|
228
271
|
@property
|
|
229
272
|
def _type(self) -> str:
|
|
230
273
|
return "retry_with_error"
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def OutputType(self) -> type[T]:
|
|
277
|
+
return self.parser.OutputType
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, List
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
4
|
|
|
5
5
|
from langchain_core.output_parsers import BaseOutputParser
|
|
6
6
|
from langchain_core.output_parsers.json import parse_and_check_json_markdown
|
|
@@ -31,7 +31,7 @@ def _get_sub_string(schema: ResponseSchema) -> str:
|
|
|
31
31
|
)
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
class StructuredOutputParser(BaseOutputParser):
|
|
34
|
+
class StructuredOutputParser(BaseOutputParser[Dict[str, Any]]):
|
|
35
35
|
"""Parse the output of an LLM call to a structured output."""
|
|
36
36
|
|
|
37
37
|
response_schemas: List[ResponseSchema]
|
|
@@ -92,7 +92,7 @@ class StructuredOutputParser(BaseOutputParser):
|
|
|
92
92
|
else:
|
|
93
93
|
return STRUCTURED_FORMAT_INSTRUCTIONS.format(format=schema_str)
|
|
94
94
|
|
|
95
|
-
def parse(self, text: str) -> Any:
|
|
95
|
+
def parse(self, text: str) -> Dict[str, Any]:
|
|
96
96
|
expected_keys = [rs.name for rs in self.response_schemas]
|
|
97
97
|
return parse_and_check_json_markdown(text, expected_keys)
|
|
98
98
|
|
langchain/output_parsers/yaml.py
CHANGED
langchain/prompts/__init__.py
CHANGED
langchain/python.py
CHANGED
langchain/requests.py
CHANGED
langchain/retrievers/__init__.py
CHANGED
|
@@ -17,6 +17,7 @@ the backbone of a retriever, but there are other types of retrievers as well.
|
|
|
17
17
|
Document, Serializable, Callbacks,
|
|
18
18
|
CallbackManagerForRetrieverRun, AsyncCallbackManagerForRetrieverRun
|
|
19
19
|
"""
|
|
20
|
+
|
|
20
21
|
from typing import TYPE_CHECKING, Any
|
|
21
22
|
|
|
22
23
|
from langchain._api.module_import import create_importer
|