bisheng-langchain 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/__init__.py +0 -0
- bisheng_langchain/chains/__init__.py +5 -0
- bisheng_langchain/chains/combine_documents/__init__.py +0 -0
- bisheng_langchain/chains/combine_documents/stuff.py +56 -0
- bisheng_langchain/chains/question_answering/__init__.py +240 -0
- bisheng_langchain/chains/retrieval_qa/__init__.py +0 -0
- bisheng_langchain/chains/retrieval_qa/base.py +89 -0
- bisheng_langchain/chat_models/__init__.py +11 -0
- bisheng_langchain/chat_models/host_llm.py +409 -0
- bisheng_langchain/chat_models/interface/__init__.py +10 -0
- bisheng_langchain/chat_models/interface/minimax.py +123 -0
- bisheng_langchain/chat_models/interface/openai.py +68 -0
- bisheng_langchain/chat_models/interface/types.py +61 -0
- bisheng_langchain/chat_models/interface/utils.py +5 -0
- bisheng_langchain/chat_models/interface/wenxin.py +114 -0
- bisheng_langchain/chat_models/interface/xunfei.py +233 -0
- bisheng_langchain/chat_models/interface/zhipuai.py +81 -0
- bisheng_langchain/chat_models/minimax.py +354 -0
- bisheng_langchain/chat_models/proxy_llm.py +354 -0
- bisheng_langchain/chat_models/wenxin.py +349 -0
- bisheng_langchain/chat_models/xunfeiai.py +355 -0
- bisheng_langchain/chat_models/zhipuai.py +379 -0
- bisheng_langchain/document_loaders/__init__.py +3 -0
- bisheng_langchain/document_loaders/elem_html.py +0 -0
- bisheng_langchain/document_loaders/elem_image.py +0 -0
- bisheng_langchain/document_loaders/elem_pdf.py +655 -0
- bisheng_langchain/document_loaders/parsers/__init__.py +5 -0
- bisheng_langchain/document_loaders/parsers/image.py +28 -0
- bisheng_langchain/document_loaders/parsers/test_image.py +286 -0
- bisheng_langchain/embeddings/__init__.py +7 -0
- bisheng_langchain/embeddings/host_embedding.py +133 -0
- bisheng_langchain/embeddings/interface/__init__.py +3 -0
- bisheng_langchain/embeddings/interface/types.py +23 -0
- bisheng_langchain/embeddings/interface/wenxin.py +86 -0
- bisheng_langchain/embeddings/wenxin.py +139 -0
- bisheng_langchain/vectorstores/__init__.py +3 -0
- bisheng_langchain/vectorstores/elastic_keywords_search.py +284 -0
- bisheng_langchain-0.0.1.dist-info/METADATA +64 -0
- bisheng_langchain-0.0.1.dist-info/RECORD +41 -0
- bisheng_langchain-0.0.1.dist-info/WHEEL +5 -0
- bisheng_langchain-0.0.1.dist-info/top_level.txt +1 -0
File without changes
|
File without changes
|
@@ -0,0 +1,56 @@
|
|
1
|
+
from typing import Any, List, Tuple
|
2
|
+
|
3
|
+
from langchain.callbacks.manager import Callbacks
|
4
|
+
from langchain.chains.combine_documents.stuff import StuffDocumentsChain as StuffDocumentsChainOld
|
5
|
+
from langchain.docstore.document import Document
|
6
|
+
|
7
|
+
|
8
|
+
class StuffDocumentsChain(StuffDocumentsChainOld):
|
9
|
+
|
10
|
+
token_max: int = -1
|
11
|
+
|
12
|
+
def combine_docs(self,
|
13
|
+
docs: List[Document],
|
14
|
+
callbacks: Callbacks = None,
|
15
|
+
**kwargs: Any) -> Tuple[str, dict]:
|
16
|
+
"""Stuff all documents into one prompt and pass to LLM.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
docs: List of documents to join together into one variable
|
20
|
+
callbacks: Optional callbacks to pass along
|
21
|
+
**kwargs: additional parameters to use to get inputs to LLMChain.
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
The first element returned is the single string output. The second
|
25
|
+
element returned is a dictionary of other keys to return.
|
26
|
+
"""
|
27
|
+
inputs = self._get_inputs(docs, **kwargs)
|
28
|
+
# print('inputs:', len(inputs['context']))
|
29
|
+
# print('prompt_length:', self.prompt_length(docs, **kwargs))
|
30
|
+
if self.token_max > 0:
|
31
|
+
inputs[self.document_variable_name] = inputs[
|
32
|
+
self.document_variable_name][:self.token_max]
|
33
|
+
# Call predict on the LLM.
|
34
|
+
return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
|
35
|
+
|
36
|
+
async def acombine_docs(self,
|
37
|
+
docs: List[Document],
|
38
|
+
callbacks: Callbacks = None,
|
39
|
+
**kwargs: Any) -> Tuple[str, dict]:
|
40
|
+
"""Stuff all documents into one prompt and pass to LLM.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
docs: List of documents to join together into one variable
|
44
|
+
callbacks: Optional callbacks to pass along
|
45
|
+
**kwargs: additional parameters to use to get inputs to LLMChain.
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
The first element returned is the single string output. The second
|
49
|
+
element returned is a dictionary of other keys to return.
|
50
|
+
"""
|
51
|
+
inputs = self._get_inputs(docs, **kwargs)
|
52
|
+
if self.token_max > 0:
|
53
|
+
inputs[self.document_variable_name] = inputs[
|
54
|
+
self.document_variable_name][:self.token_max]
|
55
|
+
# Call predict on the LLM.
|
56
|
+
return await self.llm_chain.apredict(callbacks=callbacks, **inputs), {}
|
@@ -0,0 +1,240 @@
|
|
1
|
+
"""Load question answering chains."""
|
2
|
+
from typing import Any, Mapping, Optional, Protocol
|
3
|
+
|
4
|
+
# from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
5
|
+
from bisheng_langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
6
|
+
from langchain.callbacks.base import BaseCallbackManager
|
7
|
+
from langchain.callbacks.manager import Callbacks
|
8
|
+
from langchain.chains import ReduceDocumentsChain
|
9
|
+
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
10
|
+
from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain
|
11
|
+
from langchain.chains.combine_documents.map_rerank import MapRerankDocumentsChain
|
12
|
+
from langchain.chains.combine_documents.refine import RefineDocumentsChain
|
13
|
+
from langchain.chains.llm import LLMChain
|
14
|
+
from langchain.chains.question_answering import map_reduce_prompt, refine_prompts, stuff_prompt
|
15
|
+
from langchain.chains.question_answering.map_rerank_prompt import PROMPT as MAP_RERANK_PROMPT
|
16
|
+
from langchain.schema.language_model import BaseLanguageModel
|
17
|
+
from langchain.schema.prompt_template import BasePromptTemplate
|
18
|
+
|
19
|
+
|
20
|
+
class LoadingCallable(Protocol):
|
21
|
+
"""Interface for loading the combine documents chain."""
|
22
|
+
|
23
|
+
def __call__(self, llm: BaseLanguageModel,
|
24
|
+
**kwargs: Any) -> BaseCombineDocumentsChain:
|
25
|
+
"""Callable to load the combine documents chain."""
|
26
|
+
|
27
|
+
|
28
|
+
def _load_map_rerank_chain(
|
29
|
+
llm: BaseLanguageModel,
|
30
|
+
prompt: BasePromptTemplate = MAP_RERANK_PROMPT,
|
31
|
+
verbose: bool = False,
|
32
|
+
document_variable_name: str = 'context',
|
33
|
+
rank_key: str = 'score',
|
34
|
+
answer_key: str = 'answer',
|
35
|
+
callback_manager: Optional[BaseCallbackManager] = None,
|
36
|
+
callbacks: Callbacks = None,
|
37
|
+
**kwargs: Any,
|
38
|
+
) -> MapRerankDocumentsChain:
|
39
|
+
llm_chain = LLMChain(
|
40
|
+
llm=llm,
|
41
|
+
prompt=prompt,
|
42
|
+
verbose=verbose,
|
43
|
+
callback_manager=callback_manager,
|
44
|
+
callbacks=callbacks,
|
45
|
+
)
|
46
|
+
return MapRerankDocumentsChain(
|
47
|
+
llm_chain=llm_chain,
|
48
|
+
rank_key=rank_key,
|
49
|
+
answer_key=answer_key,
|
50
|
+
document_variable_name=document_variable_name,
|
51
|
+
verbose=verbose,
|
52
|
+
callback_manager=callback_manager,
|
53
|
+
**kwargs,
|
54
|
+
)
|
55
|
+
|
56
|
+
|
57
|
+
def _load_stuff_chain(
|
58
|
+
llm: BaseLanguageModel,
|
59
|
+
prompt: Optional[BasePromptTemplate] = None,
|
60
|
+
document_variable_name: str = 'context',
|
61
|
+
verbose: Optional[bool] = None,
|
62
|
+
callback_manager: Optional[BaseCallbackManager] = None,
|
63
|
+
callbacks: Callbacks = None,
|
64
|
+
**kwargs: Any,
|
65
|
+
) -> StuffDocumentsChain:
|
66
|
+
_prompt = prompt or stuff_prompt.PROMPT_SELECTOR.get_prompt(llm)
|
67
|
+
llm_chain = LLMChain(
|
68
|
+
llm=llm,
|
69
|
+
prompt=_prompt,
|
70
|
+
verbose=verbose,
|
71
|
+
callback_manager=callback_manager,
|
72
|
+
callbacks=callbacks,
|
73
|
+
)
|
74
|
+
# TODO: document prompt
|
75
|
+
return StuffDocumentsChain(
|
76
|
+
llm_chain=llm_chain,
|
77
|
+
document_variable_name=document_variable_name,
|
78
|
+
verbose=verbose,
|
79
|
+
callback_manager=callback_manager,
|
80
|
+
**kwargs,
|
81
|
+
)
|
82
|
+
|
83
|
+
|
84
|
+
def _load_map_reduce_chain(
|
85
|
+
llm: BaseLanguageModel,
|
86
|
+
question_prompt: Optional[BasePromptTemplate] = None,
|
87
|
+
combine_prompt: Optional[BasePromptTemplate] = None,
|
88
|
+
combine_document_variable_name: str = 'summaries',
|
89
|
+
map_reduce_document_variable_name: str = 'context',
|
90
|
+
collapse_prompt: Optional[BasePromptTemplate] = None,
|
91
|
+
reduce_llm: Optional[BaseLanguageModel] = None,
|
92
|
+
collapse_llm: Optional[BaseLanguageModel] = None,
|
93
|
+
verbose: Optional[bool] = None,
|
94
|
+
callback_manager: Optional[BaseCallbackManager] = None,
|
95
|
+
callbacks: Callbacks = None,
|
96
|
+
token_max: int = 3000,
|
97
|
+
**kwargs: Any,
|
98
|
+
) -> MapReduceDocumentsChain:
|
99
|
+
_question_prompt = (
|
100
|
+
question_prompt
|
101
|
+
or map_reduce_prompt.QUESTION_PROMPT_SELECTOR.get_prompt(llm))
|
102
|
+
_combine_prompt = (
|
103
|
+
combine_prompt
|
104
|
+
or map_reduce_prompt.COMBINE_PROMPT_SELECTOR.get_prompt(llm))
|
105
|
+
map_chain = LLMChain(
|
106
|
+
llm=llm,
|
107
|
+
prompt=_question_prompt,
|
108
|
+
verbose=verbose,
|
109
|
+
callback_manager=callback_manager,
|
110
|
+
callbacks=callbacks,
|
111
|
+
)
|
112
|
+
_reduce_llm = reduce_llm or llm
|
113
|
+
reduce_chain = LLMChain(
|
114
|
+
llm=_reduce_llm,
|
115
|
+
prompt=_combine_prompt,
|
116
|
+
verbose=verbose,
|
117
|
+
callback_manager=callback_manager,
|
118
|
+
callbacks=callbacks,
|
119
|
+
)
|
120
|
+
# TODO: document prompt
|
121
|
+
combine_documents_chain = StuffDocumentsChain(
|
122
|
+
llm_chain=reduce_chain,
|
123
|
+
document_variable_name=combine_document_variable_name,
|
124
|
+
verbose=verbose,
|
125
|
+
callback_manager=callback_manager,
|
126
|
+
callbacks=callbacks,
|
127
|
+
)
|
128
|
+
if collapse_prompt is None:
|
129
|
+
collapse_chain = None
|
130
|
+
if collapse_llm is not None:
|
131
|
+
raise ValueError(
|
132
|
+
'collapse_llm provided, but collapse_prompt was not: please '
|
133
|
+
'provide one or stop providing collapse_llm.')
|
134
|
+
else:
|
135
|
+
_collapse_llm = collapse_llm or llm
|
136
|
+
collapse_chain = StuffDocumentsChain(
|
137
|
+
llm_chain=LLMChain(
|
138
|
+
llm=_collapse_llm,
|
139
|
+
prompt=collapse_prompt,
|
140
|
+
verbose=verbose,
|
141
|
+
callback_manager=callback_manager,
|
142
|
+
callbacks=callbacks,
|
143
|
+
),
|
144
|
+
document_variable_name=combine_document_variable_name,
|
145
|
+
verbose=verbose,
|
146
|
+
callback_manager=callback_manager,
|
147
|
+
)
|
148
|
+
reduce_documents_chain = ReduceDocumentsChain(
|
149
|
+
combine_documents_chain=combine_documents_chain,
|
150
|
+
collapse_documents_chain=collapse_chain,
|
151
|
+
token_max=token_max,
|
152
|
+
verbose=verbose,
|
153
|
+
)
|
154
|
+
return MapReduceDocumentsChain(
|
155
|
+
llm_chain=map_chain,
|
156
|
+
document_variable_name=map_reduce_document_variable_name,
|
157
|
+
reduce_documents_chain=reduce_documents_chain,
|
158
|
+
verbose=verbose,
|
159
|
+
callback_manager=callback_manager,
|
160
|
+
callbacks=callbacks,
|
161
|
+
**kwargs,
|
162
|
+
)
|
163
|
+
|
164
|
+
|
165
|
+
def _load_refine_chain(
|
166
|
+
llm: BaseLanguageModel,
|
167
|
+
question_prompt: Optional[BasePromptTemplate] = None,
|
168
|
+
refine_prompt: Optional[BasePromptTemplate] = None,
|
169
|
+
document_variable_name: str = 'context_str',
|
170
|
+
initial_response_name: str = 'existing_answer',
|
171
|
+
refine_llm: Optional[BaseLanguageModel] = None,
|
172
|
+
verbose: Optional[bool] = None,
|
173
|
+
callback_manager: Optional[BaseCallbackManager] = None,
|
174
|
+
callbacks: Callbacks = None,
|
175
|
+
**kwargs: Any,
|
176
|
+
) -> RefineDocumentsChain:
|
177
|
+
_question_prompt = (
|
178
|
+
question_prompt
|
179
|
+
or refine_prompts.QUESTION_PROMPT_SELECTOR.get_prompt(llm))
|
180
|
+
_refine_prompt = refine_prompt or refine_prompts.REFINE_PROMPT_SELECTOR.get_prompt(
|
181
|
+
llm)
|
182
|
+
initial_chain = LLMChain(
|
183
|
+
llm=llm,
|
184
|
+
prompt=_question_prompt,
|
185
|
+
verbose=verbose,
|
186
|
+
callback_manager=callback_manager,
|
187
|
+
callbacks=callbacks,
|
188
|
+
)
|
189
|
+
_refine_llm = refine_llm or llm
|
190
|
+
refine_chain = LLMChain(
|
191
|
+
llm=_refine_llm,
|
192
|
+
prompt=_refine_prompt,
|
193
|
+
verbose=verbose,
|
194
|
+
callback_manager=callback_manager,
|
195
|
+
callbacks=callbacks,
|
196
|
+
)
|
197
|
+
return RefineDocumentsChain(
|
198
|
+
initial_llm_chain=initial_chain,
|
199
|
+
refine_llm_chain=refine_chain,
|
200
|
+
document_variable_name=document_variable_name,
|
201
|
+
initial_response_name=initial_response_name,
|
202
|
+
verbose=verbose,
|
203
|
+
callback_manager=callback_manager,
|
204
|
+
**kwargs,
|
205
|
+
)
|
206
|
+
|
207
|
+
|
208
|
+
def load_qa_chain(
|
209
|
+
llm: BaseLanguageModel,
|
210
|
+
chain_type: str = 'stuff',
|
211
|
+
verbose: Optional[bool] = None,
|
212
|
+
callback_manager: Optional[BaseCallbackManager] = None,
|
213
|
+
**kwargs: Any,
|
214
|
+
) -> BaseCombineDocumentsChain:
|
215
|
+
"""Load question answering chain.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
llm: Language Model to use in the chain.
|
219
|
+
chain_type: Type of document combining chain to use. Should be one of "stuff",
|
220
|
+
"map_reduce", "map_rerank", and "refine".
|
221
|
+
verbose: Whether chains should be run in verbose mode or not. Note that this
|
222
|
+
applies to all chains that make up the final chain.
|
223
|
+
callback_manager: Callback manager to use for the chain.
|
224
|
+
|
225
|
+
Returns:
|
226
|
+
A chain to use for question answering.
|
227
|
+
"""
|
228
|
+
loader_mapping: Mapping[str, LoadingCallable] = {
|
229
|
+
'stuff': _load_stuff_chain,
|
230
|
+
'map_reduce': _load_map_reduce_chain,
|
231
|
+
'refine': _load_refine_chain,
|
232
|
+
'map_rerank': _load_map_rerank_chain,
|
233
|
+
}
|
234
|
+
if chain_type not in loader_mapping:
|
235
|
+
raise ValueError(f'Got unsupported chain type: {chain_type}. '
|
236
|
+
f'Should be one of {loader_mapping.keys()}')
|
237
|
+
return loader_mapping[chain_type](llm,
|
238
|
+
verbose=verbose,
|
239
|
+
callback_manager=callback_manager,
|
240
|
+
**kwargs)
|
File without changes
|
@@ -0,0 +1,89 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from langchain.callbacks.manager import AsyncCallbackManagerForChainRun, CallbackManagerForChainRun
|
4
|
+
from langchain.chains.retrieval_qa.base import BaseRetrievalQA
|
5
|
+
from langchain.pydantic_v1 import Field
|
6
|
+
from langchain.schema import BaseRetriever, Document
|
7
|
+
|
8
|
+
|
9
|
+
class MultiRetrievalQA(BaseRetrievalQA):
|
10
|
+
"""Chain for question-answering against an index.
|
11
|
+
|
12
|
+
Example:
|
13
|
+
.. code-block:: python
|
14
|
+
|
15
|
+
from langchain.llms import OpenAI
|
16
|
+
from langchain.chains import RetrievalQA
|
17
|
+
from langchain.faiss import FAISS
|
18
|
+
from langchain.vectorstores.base import VectorStoreRetriever
|
19
|
+
retriever = VectorStoreRetriever(vectorstore=FAISS(...))
|
20
|
+
retrievalQA = RetrievalQA.from_llm(llm=OpenAI(), retriever=retriever)
|
21
|
+
|
22
|
+
"""
|
23
|
+
|
24
|
+
vector_retriever: BaseRetriever = Field(exclude=True)
|
25
|
+
keyword_retriever: BaseRetriever = Field(exclude=True)
|
26
|
+
combine_strategy: str = 'keyword_front' # "keyword_front, vector_front, mix"
|
27
|
+
|
28
|
+
def _get_docs(
|
29
|
+
self,
|
30
|
+
question: str,
|
31
|
+
*,
|
32
|
+
run_manager: CallbackManagerForChainRun,
|
33
|
+
) -> List[Document]:
|
34
|
+
"""Get docs."""
|
35
|
+
vector_docs = self.vector_retriever.get_relevant_documents(
|
36
|
+
question, callbacks=run_manager.get_child())
|
37
|
+
keyword_docs = self.keyword_retriever.get_relevant_documents(
|
38
|
+
question, callbacks=run_manager.get_child())
|
39
|
+
if self.combine_strategy == 'keyword_front':
|
40
|
+
return keyword_docs + vector_docs
|
41
|
+
elif self.combine_strategy == 'vector_front':
|
42
|
+
return vector_docs + keyword_docs
|
43
|
+
elif self.combine_strategy == 'mix':
|
44
|
+
combine_docs = []
|
45
|
+
min_len = min(len(keyword_docs), len(vector_docs))
|
46
|
+
for i in range(min_len):
|
47
|
+
combine_docs.append(keyword_docs[i])
|
48
|
+
combine_docs.append(vector_docs[i])
|
49
|
+
combine_docs.extend(keyword_docs[min_len:])
|
50
|
+
combine_docs.extend(vector_docs[min_len:])
|
51
|
+
return combine_docs
|
52
|
+
else:
|
53
|
+
raise ValueError(f'Expected combine_strategy to be one of '
|
54
|
+
f'(keyword_front, vector_front, mix),'
|
55
|
+
f'instead found {self.combine_strategy}')
|
56
|
+
|
57
|
+
async def _aget_docs(
|
58
|
+
self,
|
59
|
+
question: str,
|
60
|
+
*,
|
61
|
+
run_manager: AsyncCallbackManagerForChainRun,
|
62
|
+
) -> List[Document]:
|
63
|
+
"""Get docs."""
|
64
|
+
vector_docs = await self.vector_retriever.get_relevant_documents(
|
65
|
+
question, callbacks=run_manager.get_child())
|
66
|
+
keyword_docs = await self.keyword_retriever.get_relevant_documents(
|
67
|
+
question, callbacks=run_manager.get_child())
|
68
|
+
if self.combine_strategy == 'keyword_front':
|
69
|
+
return keyword_docs + vector_docs
|
70
|
+
elif self.combine_strategy == 'vector_front':
|
71
|
+
return vector_docs + keyword_docs
|
72
|
+
elif self.combine_strategy == 'mix':
|
73
|
+
combine_docs = []
|
74
|
+
min_len = min(len(keyword_docs), len(vector_docs))
|
75
|
+
for i in range(min_len):
|
76
|
+
combine_docs.append(keyword_docs[i])
|
77
|
+
combine_docs.append(vector_docs[i])
|
78
|
+
combine_docs.extend(keyword_docs[min_len:])
|
79
|
+
combine_docs.extend(vector_docs[min_len:])
|
80
|
+
return combine_docs
|
81
|
+
else:
|
82
|
+
raise ValueError(f'Expected combine_strategy to be one of '
|
83
|
+
f'(keyword_front, vector_front, mix),'
|
84
|
+
f'instead found {self.combine_strategy}')
|
85
|
+
|
86
|
+
@property
|
87
|
+
def _chain_type(self) -> str:
|
88
|
+
"""Return the chain type."""
|
89
|
+
return 'multi_retrieval_qa'
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from .host_llm import BaichuanChat, ChatGLM2Host, Llama2Chat, QwenChat
|
2
|
+
from .minimax import ChatMinimaxAI
|
3
|
+
from .proxy_llm import ProxyChatLLM
|
4
|
+
from .wenxin import ChatWenxin
|
5
|
+
from .xunfeiai import ChatXunfeiAI
|
6
|
+
from .zhipuai import ChatZhipuAI
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
'ProxyChatLLM', 'ChatMinimaxAI', 'ChatWenxin', 'ChatZhipuAI',
|
10
|
+
'ChatXunfeiAI', 'Llama2Chat', 'ChatGLM2Host', 'BaichuanChat', 'QwenChat'
|
11
|
+
]
|