bisheng-langchain 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. bisheng_langchain/__init__.py +0 -0
  2. bisheng_langchain/chains/__init__.py +5 -0
  3. bisheng_langchain/chains/combine_documents/__init__.py +0 -0
  4. bisheng_langchain/chains/combine_documents/stuff.py +56 -0
  5. bisheng_langchain/chains/question_answering/__init__.py +240 -0
  6. bisheng_langchain/chains/retrieval_qa/__init__.py +0 -0
  7. bisheng_langchain/chains/retrieval_qa/base.py +89 -0
  8. bisheng_langchain/chat_models/__init__.py +11 -0
  9. bisheng_langchain/chat_models/host_llm.py +409 -0
  10. bisheng_langchain/chat_models/interface/__init__.py +10 -0
  11. bisheng_langchain/chat_models/interface/minimax.py +123 -0
  12. bisheng_langchain/chat_models/interface/openai.py +68 -0
  13. bisheng_langchain/chat_models/interface/types.py +61 -0
  14. bisheng_langchain/chat_models/interface/utils.py +5 -0
  15. bisheng_langchain/chat_models/interface/wenxin.py +114 -0
  16. bisheng_langchain/chat_models/interface/xunfei.py +233 -0
  17. bisheng_langchain/chat_models/interface/zhipuai.py +81 -0
  18. bisheng_langchain/chat_models/minimax.py +354 -0
  19. bisheng_langchain/chat_models/proxy_llm.py +354 -0
  20. bisheng_langchain/chat_models/wenxin.py +349 -0
  21. bisheng_langchain/chat_models/xunfeiai.py +355 -0
  22. bisheng_langchain/chat_models/zhipuai.py +379 -0
  23. bisheng_langchain/document_loaders/__init__.py +3 -0
  24. bisheng_langchain/document_loaders/elem_html.py +0 -0
  25. bisheng_langchain/document_loaders/elem_image.py +0 -0
  26. bisheng_langchain/document_loaders/elem_pdf.py +655 -0
  27. bisheng_langchain/document_loaders/parsers/__init__.py +5 -0
  28. bisheng_langchain/document_loaders/parsers/image.py +28 -0
  29. bisheng_langchain/document_loaders/parsers/test_image.py +286 -0
  30. bisheng_langchain/embeddings/__init__.py +7 -0
  31. bisheng_langchain/embeddings/host_embedding.py +133 -0
  32. bisheng_langchain/embeddings/interface/__init__.py +3 -0
  33. bisheng_langchain/embeddings/interface/types.py +23 -0
  34. bisheng_langchain/embeddings/interface/wenxin.py +86 -0
  35. bisheng_langchain/embeddings/wenxin.py +139 -0
  36. bisheng_langchain/vectorstores/__init__.py +3 -0
  37. bisheng_langchain/vectorstores/elastic_keywords_search.py +284 -0
  38. bisheng_langchain-0.0.1.dist-info/METADATA +64 -0
  39. bisheng_langchain-0.0.1.dist-info/RECORD +41 -0
  40. bisheng_langchain-0.0.1.dist-info/WHEEL +5 -0
  41. bisheng_langchain-0.0.1.dist-info/top_level.txt +1 -0
File without changes
@@ -0,0 +1,5 @@
1
+ from bisheng_langchain.chains.combine_documents.stuff import StuffDocumentsChain
2
+
3
+ __all__ = [
4
+ 'StuffDocumentsChain',
5
+ ]
File without changes
@@ -0,0 +1,56 @@
1
+ from typing import Any, List, Tuple
2
+
3
+ from langchain.callbacks.manager import Callbacks
4
+ from langchain.chains.combine_documents.stuff import StuffDocumentsChain as StuffDocumentsChainOld
5
+ from langchain.docstore.document import Document
6
+
7
+
8
+ class StuffDocumentsChain(StuffDocumentsChainOld):
9
+
10
+ token_max: int = -1
11
+
12
+ def combine_docs(self,
13
+ docs: List[Document],
14
+ callbacks: Callbacks = None,
15
+ **kwargs: Any) -> Tuple[str, dict]:
16
+ """Stuff all documents into one prompt and pass to LLM.
17
+
18
+ Args:
19
+ docs: List of documents to join together into one variable
20
+ callbacks: Optional callbacks to pass along
21
+ **kwargs: additional parameters to use to get inputs to LLMChain.
22
+
23
+ Returns:
24
+ The first element returned is the single string output. The second
25
+ element returned is a dictionary of other keys to return.
26
+ """
27
+ inputs = self._get_inputs(docs, **kwargs)
28
+ # print('inputs:', len(inputs['context']))
29
+ # print('prompt_length:', self.prompt_length(docs, **kwargs))
30
+ if self.token_max > 0:
31
+ inputs[self.document_variable_name] = inputs[
32
+ self.document_variable_name][:self.token_max]
33
+ # Call predict on the LLM.
34
+ return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
35
+
36
+ async def acombine_docs(self,
37
+ docs: List[Document],
38
+ callbacks: Callbacks = None,
39
+ **kwargs: Any) -> Tuple[str, dict]:
40
+ """Stuff all documents into one prompt and pass to LLM.
41
+
42
+ Args:
43
+ docs: List of documents to join together into one variable
44
+ callbacks: Optional callbacks to pass along
45
+ **kwargs: additional parameters to use to get inputs to LLMChain.
46
+
47
+ Returns:
48
+ The first element returned is the single string output. The second
49
+ element returned is a dictionary of other keys to return.
50
+ """
51
+ inputs = self._get_inputs(docs, **kwargs)
52
+ if self.token_max > 0:
53
+ inputs[self.document_variable_name] = inputs[
54
+ self.document_variable_name][:self.token_max]
55
+ # Call predict on the LLM.
56
+ return await self.llm_chain.apredict(callbacks=callbacks, **inputs), {}
@@ -0,0 +1,240 @@
1
+ """Load question answering chains."""
2
+ from typing import Any, Mapping, Optional, Protocol
3
+
4
+ # from langchain.chains.combine_documents.stuff import StuffDocumentsChain
5
+ from bisheng_langchain.chains.combine_documents.stuff import StuffDocumentsChain
6
+ from langchain.callbacks.base import BaseCallbackManager
7
+ from langchain.callbacks.manager import Callbacks
8
+ from langchain.chains import ReduceDocumentsChain
9
+ from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
10
+ from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain
11
+ from langchain.chains.combine_documents.map_rerank import MapRerankDocumentsChain
12
+ from langchain.chains.combine_documents.refine import RefineDocumentsChain
13
+ from langchain.chains.llm import LLMChain
14
+ from langchain.chains.question_answering import map_reduce_prompt, refine_prompts, stuff_prompt
15
+ from langchain.chains.question_answering.map_rerank_prompt import PROMPT as MAP_RERANK_PROMPT
16
+ from langchain.schema.language_model import BaseLanguageModel
17
+ from langchain.schema.prompt_template import BasePromptTemplate
18
+
19
+
20
+ class LoadingCallable(Protocol):
21
+ """Interface for loading the combine documents chain."""
22
+
23
+ def __call__(self, llm: BaseLanguageModel,
24
+ **kwargs: Any) -> BaseCombineDocumentsChain:
25
+ """Callable to load the combine documents chain."""
26
+
27
+
28
+ def _load_map_rerank_chain(
29
+ llm: BaseLanguageModel,
30
+ prompt: BasePromptTemplate = MAP_RERANK_PROMPT,
31
+ verbose: bool = False,
32
+ document_variable_name: str = 'context',
33
+ rank_key: str = 'score',
34
+ answer_key: str = 'answer',
35
+ callback_manager: Optional[BaseCallbackManager] = None,
36
+ callbacks: Callbacks = None,
37
+ **kwargs: Any,
38
+ ) -> MapRerankDocumentsChain:
39
+ llm_chain = LLMChain(
40
+ llm=llm,
41
+ prompt=prompt,
42
+ verbose=verbose,
43
+ callback_manager=callback_manager,
44
+ callbacks=callbacks,
45
+ )
46
+ return MapRerankDocumentsChain(
47
+ llm_chain=llm_chain,
48
+ rank_key=rank_key,
49
+ answer_key=answer_key,
50
+ document_variable_name=document_variable_name,
51
+ verbose=verbose,
52
+ callback_manager=callback_manager,
53
+ **kwargs,
54
+ )
55
+
56
+
57
+ def _load_stuff_chain(
58
+ llm: BaseLanguageModel,
59
+ prompt: Optional[BasePromptTemplate] = None,
60
+ document_variable_name: str = 'context',
61
+ verbose: Optional[bool] = None,
62
+ callback_manager: Optional[BaseCallbackManager] = None,
63
+ callbacks: Callbacks = None,
64
+ **kwargs: Any,
65
+ ) -> StuffDocumentsChain:
66
+ _prompt = prompt or stuff_prompt.PROMPT_SELECTOR.get_prompt(llm)
67
+ llm_chain = LLMChain(
68
+ llm=llm,
69
+ prompt=_prompt,
70
+ verbose=verbose,
71
+ callback_manager=callback_manager,
72
+ callbacks=callbacks,
73
+ )
74
+ # TODO: document prompt
75
+ return StuffDocumentsChain(
76
+ llm_chain=llm_chain,
77
+ document_variable_name=document_variable_name,
78
+ verbose=verbose,
79
+ callback_manager=callback_manager,
80
+ **kwargs,
81
+ )
82
+
83
+
84
+ def _load_map_reduce_chain(
85
+ llm: BaseLanguageModel,
86
+ question_prompt: Optional[BasePromptTemplate] = None,
87
+ combine_prompt: Optional[BasePromptTemplate] = None,
88
+ combine_document_variable_name: str = 'summaries',
89
+ map_reduce_document_variable_name: str = 'context',
90
+ collapse_prompt: Optional[BasePromptTemplate] = None,
91
+ reduce_llm: Optional[BaseLanguageModel] = None,
92
+ collapse_llm: Optional[BaseLanguageModel] = None,
93
+ verbose: Optional[bool] = None,
94
+ callback_manager: Optional[BaseCallbackManager] = None,
95
+ callbacks: Callbacks = None,
96
+ token_max: int = 3000,
97
+ **kwargs: Any,
98
+ ) -> MapReduceDocumentsChain:
99
+ _question_prompt = (
100
+ question_prompt
101
+ or map_reduce_prompt.QUESTION_PROMPT_SELECTOR.get_prompt(llm))
102
+ _combine_prompt = (
103
+ combine_prompt
104
+ or map_reduce_prompt.COMBINE_PROMPT_SELECTOR.get_prompt(llm))
105
+ map_chain = LLMChain(
106
+ llm=llm,
107
+ prompt=_question_prompt,
108
+ verbose=verbose,
109
+ callback_manager=callback_manager,
110
+ callbacks=callbacks,
111
+ )
112
+ _reduce_llm = reduce_llm or llm
113
+ reduce_chain = LLMChain(
114
+ llm=_reduce_llm,
115
+ prompt=_combine_prompt,
116
+ verbose=verbose,
117
+ callback_manager=callback_manager,
118
+ callbacks=callbacks,
119
+ )
120
+ # TODO: document prompt
121
+ combine_documents_chain = StuffDocumentsChain(
122
+ llm_chain=reduce_chain,
123
+ document_variable_name=combine_document_variable_name,
124
+ verbose=verbose,
125
+ callback_manager=callback_manager,
126
+ callbacks=callbacks,
127
+ )
128
+ if collapse_prompt is None:
129
+ collapse_chain = None
130
+ if collapse_llm is not None:
131
+ raise ValueError(
132
+ 'collapse_llm provided, but collapse_prompt was not: please '
133
+ 'provide one or stop providing collapse_llm.')
134
+ else:
135
+ _collapse_llm = collapse_llm or llm
136
+ collapse_chain = StuffDocumentsChain(
137
+ llm_chain=LLMChain(
138
+ llm=_collapse_llm,
139
+ prompt=collapse_prompt,
140
+ verbose=verbose,
141
+ callback_manager=callback_manager,
142
+ callbacks=callbacks,
143
+ ),
144
+ document_variable_name=combine_document_variable_name,
145
+ verbose=verbose,
146
+ callback_manager=callback_manager,
147
+ )
148
+ reduce_documents_chain = ReduceDocumentsChain(
149
+ combine_documents_chain=combine_documents_chain,
150
+ collapse_documents_chain=collapse_chain,
151
+ token_max=token_max,
152
+ verbose=verbose,
153
+ )
154
+ return MapReduceDocumentsChain(
155
+ llm_chain=map_chain,
156
+ document_variable_name=map_reduce_document_variable_name,
157
+ reduce_documents_chain=reduce_documents_chain,
158
+ verbose=verbose,
159
+ callback_manager=callback_manager,
160
+ callbacks=callbacks,
161
+ **kwargs,
162
+ )
163
+
164
+
165
+ def _load_refine_chain(
166
+ llm: BaseLanguageModel,
167
+ question_prompt: Optional[BasePromptTemplate] = None,
168
+ refine_prompt: Optional[BasePromptTemplate] = None,
169
+ document_variable_name: str = 'context_str',
170
+ initial_response_name: str = 'existing_answer',
171
+ refine_llm: Optional[BaseLanguageModel] = None,
172
+ verbose: Optional[bool] = None,
173
+ callback_manager: Optional[BaseCallbackManager] = None,
174
+ callbacks: Callbacks = None,
175
+ **kwargs: Any,
176
+ ) -> RefineDocumentsChain:
177
+ _question_prompt = (
178
+ question_prompt
179
+ or refine_prompts.QUESTION_PROMPT_SELECTOR.get_prompt(llm))
180
+ _refine_prompt = refine_prompt or refine_prompts.REFINE_PROMPT_SELECTOR.get_prompt(
181
+ llm)
182
+ initial_chain = LLMChain(
183
+ llm=llm,
184
+ prompt=_question_prompt,
185
+ verbose=verbose,
186
+ callback_manager=callback_manager,
187
+ callbacks=callbacks,
188
+ )
189
+ _refine_llm = refine_llm or llm
190
+ refine_chain = LLMChain(
191
+ llm=_refine_llm,
192
+ prompt=_refine_prompt,
193
+ verbose=verbose,
194
+ callback_manager=callback_manager,
195
+ callbacks=callbacks,
196
+ )
197
+ return RefineDocumentsChain(
198
+ initial_llm_chain=initial_chain,
199
+ refine_llm_chain=refine_chain,
200
+ document_variable_name=document_variable_name,
201
+ initial_response_name=initial_response_name,
202
+ verbose=verbose,
203
+ callback_manager=callback_manager,
204
+ **kwargs,
205
+ )
206
+
207
+
208
+ def load_qa_chain(
209
+ llm: BaseLanguageModel,
210
+ chain_type: str = 'stuff',
211
+ verbose: Optional[bool] = None,
212
+ callback_manager: Optional[BaseCallbackManager] = None,
213
+ **kwargs: Any,
214
+ ) -> BaseCombineDocumentsChain:
215
+ """Load question answering chain.
216
+
217
+ Args:
218
+ llm: Language Model to use in the chain.
219
+ chain_type: Type of document combining chain to use. Should be one of "stuff",
220
+ "map_reduce", "map_rerank", and "refine".
221
+ verbose: Whether chains should be run in verbose mode or not. Note that this
222
+ applies to all chains that make up the final chain.
223
+ callback_manager: Callback manager to use for the chain.
224
+
225
+ Returns:
226
+ A chain to use for question answering.
227
+ """
228
+ loader_mapping: Mapping[str, LoadingCallable] = {
229
+ 'stuff': _load_stuff_chain,
230
+ 'map_reduce': _load_map_reduce_chain,
231
+ 'refine': _load_refine_chain,
232
+ 'map_rerank': _load_map_rerank_chain,
233
+ }
234
+ if chain_type not in loader_mapping:
235
+ raise ValueError(f'Got unsupported chain type: {chain_type}. '
236
+ f'Should be one of {loader_mapping.keys()}')
237
+ return loader_mapping[chain_type](llm,
238
+ verbose=verbose,
239
+ callback_manager=callback_manager,
240
+ **kwargs)
File without changes
@@ -0,0 +1,89 @@
1
+ from typing import List
2
+
3
+ from langchain.callbacks.manager import AsyncCallbackManagerForChainRun, CallbackManagerForChainRun
4
+ from langchain.chains.retrieval_qa.base import BaseRetrievalQA
5
+ from langchain.pydantic_v1 import Field
6
+ from langchain.schema import BaseRetriever, Document
7
+
8
+
9
+ class MultiRetrievalQA(BaseRetrievalQA):
10
+ """Chain for question-answering against an index.
11
+
12
+ Example:
13
+ .. code-block:: python
14
+
15
+ from langchain.llms import OpenAI
16
+ from langchain.chains import RetrievalQA
17
+ from langchain.faiss import FAISS
18
+ from langchain.vectorstores.base import VectorStoreRetriever
19
+ retriever = VectorStoreRetriever(vectorstore=FAISS(...))
20
+ retrievalQA = RetrievalQA.from_llm(llm=OpenAI(), retriever=retriever)
21
+
22
+ """
23
+
24
+ vector_retriever: BaseRetriever = Field(exclude=True)
25
+ keyword_retriever: BaseRetriever = Field(exclude=True)
26
+ combine_strategy: str = 'keyword_front' # "keyword_front, vector_front, mix"
27
+
28
+ def _get_docs(
29
+ self,
30
+ question: str,
31
+ *,
32
+ run_manager: CallbackManagerForChainRun,
33
+ ) -> List[Document]:
34
+ """Get docs."""
35
+ vector_docs = self.vector_retriever.get_relevant_documents(
36
+ question, callbacks=run_manager.get_child())
37
+ keyword_docs = self.keyword_retriever.get_relevant_documents(
38
+ question, callbacks=run_manager.get_child())
39
+ if self.combine_strategy == 'keyword_front':
40
+ return keyword_docs + vector_docs
41
+ elif self.combine_strategy == 'vector_front':
42
+ return vector_docs + keyword_docs
43
+ elif self.combine_strategy == 'mix':
44
+ combine_docs = []
45
+ min_len = min(len(keyword_docs), len(vector_docs))
46
+ for i in range(min_len):
47
+ combine_docs.append(keyword_docs[i])
48
+ combine_docs.append(vector_docs[i])
49
+ combine_docs.extend(keyword_docs[min_len:])
50
+ combine_docs.extend(vector_docs[min_len:])
51
+ return combine_docs
52
+ else:
53
+ raise ValueError(f'Expected combine_strategy to be one of '
54
+ f'(keyword_front, vector_front, mix),'
55
+ f'instead found {self.combine_strategy}')
56
+
57
+ async def _aget_docs(
58
+ self,
59
+ question: str,
60
+ *,
61
+ run_manager: AsyncCallbackManagerForChainRun,
62
+ ) -> List[Document]:
63
+ """Get docs."""
64
+ vector_docs = await self.vector_retriever.get_relevant_documents(
65
+ question, callbacks=run_manager.get_child())
66
+ keyword_docs = await self.keyword_retriever.get_relevant_documents(
67
+ question, callbacks=run_manager.get_child())
68
+ if self.combine_strategy == 'keyword_front':
69
+ return keyword_docs + vector_docs
70
+ elif self.combine_strategy == 'vector_front':
71
+ return vector_docs + keyword_docs
72
+ elif self.combine_strategy == 'mix':
73
+ combine_docs = []
74
+ min_len = min(len(keyword_docs), len(vector_docs))
75
+ for i in range(min_len):
76
+ combine_docs.append(keyword_docs[i])
77
+ combine_docs.append(vector_docs[i])
78
+ combine_docs.extend(keyword_docs[min_len:])
79
+ combine_docs.extend(vector_docs[min_len:])
80
+ return combine_docs
81
+ else:
82
+ raise ValueError(f'Expected combine_strategy to be one of '
83
+ f'(keyword_front, vector_front, mix),'
84
+ f'instead found {self.combine_strategy}')
85
+
86
+ @property
87
+ def _chain_type(self) -> str:
88
+ """Return the chain type."""
89
+ return 'multi_retrieval_qa'
@@ -0,0 +1,11 @@
1
+ from .host_llm import BaichuanChat, ChatGLM2Host, Llama2Chat, QwenChat
2
+ from .minimax import ChatMinimaxAI
3
+ from .proxy_llm import ProxyChatLLM
4
+ from .wenxin import ChatWenxin
5
+ from .xunfeiai import ChatXunfeiAI
6
+ from .zhipuai import ChatZhipuAI
7
+
8
+ __all__ = [
9
+ 'ProxyChatLLM', 'ChatMinimaxAI', 'ChatWenxin', 'ChatZhipuAI',
10
+ 'ChatXunfeiAI', 'Llama2Chat', 'ChatGLM2Host', 'BaichuanChat', 'QwenChat'
11
+ ]