bisheng-langchain 0.2.2.4__py3-none-any.whl → 0.2.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/chains/__init__.py +3 -2
- bisheng_langchain/chains/conversational_retrieval/__init__.py +0 -0
- bisheng_langchain/chains/conversational_retrieval/base.py +115 -0
- bisheng_langchain/chat_models/__init__.py +3 -2
- bisheng_langchain/chat_models/host_llm.py +28 -0
- bisheng_langchain/chat_models/sensetime.py +59 -47
- bisheng_langchain/document_loaders/elem_unstrcutured_loader.py +13 -7
- {bisheng_langchain-0.2.2.4.dist-info → bisheng_langchain-0.2.2.5.dist-info}/METADATA +1 -1
- {bisheng_langchain-0.2.2.4.dist-info → bisheng_langchain-0.2.2.5.dist-info}/RECORD +11 -9
- {bisheng_langchain-0.2.2.4.dist-info → bisheng_langchain-0.2.2.5.dist-info}/WHEEL +0 -0
- {bisheng_langchain-0.2.2.4.dist-info → bisheng_langchain-0.2.2.5.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
from bisheng_langchain.chains.autogen.auto_gen import AutoGenChain
|
2
2
|
from bisheng_langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
3
|
+
from bisheng_langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
|
3
4
|
from bisheng_langchain.chains.retrieval.retrieval_chain import RetrievalChain
|
4
5
|
from bisheng_langchain.chains.router.multi_rule import MultiRuleChain
|
5
6
|
from bisheng_langchain.chains.router.rule_router import RuleBasedRouter
|
@@ -7,6 +8,6 @@ from bisheng_langchain.chains.router.rule_router import RuleBasedRouter
|
|
7
8
|
from .loader_output import LoaderOutputChain
|
8
9
|
|
9
10
|
__all__ = [
|
10
|
-
'StuffDocumentsChain', 'LoaderOutputChain', 'AutoGenChain', 'RuleBasedRouter',
|
11
|
-
'RetrievalChain'
|
11
|
+
'StuffDocumentsChain', 'LoaderOutputChain', 'AutoGenChain', 'RuleBasedRouter',
|
12
|
+
'MultiRuleChain', 'RetrievalChain', 'ConversationalRetrievalChain'
|
12
13
|
]
|
File without changes
|
@@ -0,0 +1,115 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import inspect
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
5
|
+
|
6
|
+
from langchain.callbacks.manager import AsyncCallbackManagerForChainRun, CallbackManagerForChainRun
|
7
|
+
from langchain.chains.conversational_retrieval.base import \
|
8
|
+
ConversationalRetrievalChain as BaseConversationalRetrievalChain
|
9
|
+
from langchain_core.messages import BaseMessage
|
10
|
+
|
11
|
+
# Depending on the memory type and configuration, the chat history format may differ.
|
12
|
+
# This needs to be consolidated.
|
13
|
+
CHAT_TURN_TYPE = Union[Tuple[str, str], BaseMessage]
|
14
|
+
|
15
|
+
_ROLE_MAP = {'human': 'Human: ', 'ai': 'Assistant: '}
|
16
|
+
|
17
|
+
|
18
|
+
def _get_chat_history(chat_history: List[CHAT_TURN_TYPE]) -> str:
|
19
|
+
buffer = ''
|
20
|
+
for dialogue_turn in chat_history:
|
21
|
+
if isinstance(dialogue_turn, BaseMessage):
|
22
|
+
role_prefix = _ROLE_MAP.get(dialogue_turn.type, f'{dialogue_turn.type}: ')
|
23
|
+
buffer += f'\n{role_prefix}{dialogue_turn.content}'
|
24
|
+
elif isinstance(dialogue_turn, tuple):
|
25
|
+
human = 'Human: ' + dialogue_turn[0]
|
26
|
+
ai = 'Assistant: ' + dialogue_turn[1]
|
27
|
+
buffer += '\n' + '\n'.join([human, ai])
|
28
|
+
else:
|
29
|
+
raise ValueError(f'Unsupported chat history format: {type(dialogue_turn)}.'
|
30
|
+
f' Full chat history: {chat_history} ')
|
31
|
+
return buffer
|
32
|
+
|
33
|
+
|
34
|
+
class ConversationalRetrievalChain(BaseConversationalRetrievalChain):
|
35
|
+
"""ConversationalRetrievalChain is a chain you can use to have a conversation with a character from a series."""
|
36
|
+
|
37
|
+
def _call(
|
38
|
+
self,
|
39
|
+
inputs: Dict[str, Any],
|
40
|
+
run_manager: Optional[CallbackManagerForChainRun] = None,
|
41
|
+
) -> Dict[str, Any]:
|
42
|
+
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
43
|
+
question = inputs['question']
|
44
|
+
get_chat_history = self.get_chat_history or _get_chat_history
|
45
|
+
chat_history_str = get_chat_history(inputs['chat_history'])
|
46
|
+
|
47
|
+
if chat_history_str:
|
48
|
+
# callbacks = _run_manager.get_child()
|
49
|
+
new_question = self.question_generator.run(question=question,
|
50
|
+
chat_history=chat_history_str)
|
51
|
+
else:
|
52
|
+
new_question = question
|
53
|
+
accepts_run_manager = ('run_manager' in inspect.signature(self._get_docs).parameters)
|
54
|
+
if accepts_run_manager:
|
55
|
+
docs = self._get_docs(new_question, inputs, run_manager=_run_manager)
|
56
|
+
else:
|
57
|
+
docs = self._get_docs(new_question, inputs) # type: ignore[call-arg]
|
58
|
+
output: Dict[str, Any] = {}
|
59
|
+
if self.response_if_no_docs_found is not None and len(docs) == 0:
|
60
|
+
output[self.output_key] = self.response_if_no_docs_found
|
61
|
+
else:
|
62
|
+
new_inputs = inputs.copy()
|
63
|
+
if self.rephrase_question:
|
64
|
+
new_inputs['question'] = new_question
|
65
|
+
new_inputs['chat_history'] = chat_history_str
|
66
|
+
answer = self.combine_docs_chain.run(input_documents=docs,
|
67
|
+
callbacks=_run_manager.get_child(),
|
68
|
+
**new_inputs)
|
69
|
+
output[self.output_key] = answer
|
70
|
+
|
71
|
+
if self.return_source_documents:
|
72
|
+
output['source_documents'] = docs
|
73
|
+
if self.return_generated_question:
|
74
|
+
output['generated_question'] = new_question
|
75
|
+
return output
|
76
|
+
|
77
|
+
async def _acall(
|
78
|
+
self,
|
79
|
+
inputs: Dict[str, Any],
|
80
|
+
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
|
81
|
+
) -> Dict[str, Any]:
|
82
|
+
_run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
|
83
|
+
question = inputs['question']
|
84
|
+
get_chat_history = self.get_chat_history or _get_chat_history
|
85
|
+
chat_history_str = get_chat_history(inputs['chat_history'])
|
86
|
+
if chat_history_str:
|
87
|
+
# callbacks = _run_manager.get_child()
|
88
|
+
new_question = await self.question_generator.arun(question=question,
|
89
|
+
chat_history=chat_history_str)
|
90
|
+
else:
|
91
|
+
new_question = question
|
92
|
+
accepts_run_manager = ('run_manager' in inspect.signature(self._aget_docs).parameters)
|
93
|
+
if accepts_run_manager:
|
94
|
+
docs = await self._aget_docs(new_question, inputs, run_manager=_run_manager)
|
95
|
+
else:
|
96
|
+
docs = await self._aget_docs(new_question, inputs) # type: ignore[call-arg]
|
97
|
+
|
98
|
+
output: Dict[str, Any] = {}
|
99
|
+
if self.response_if_no_docs_found is not None and len(docs) == 0:
|
100
|
+
output[self.output_key] = self.response_if_no_docs_found
|
101
|
+
else:
|
102
|
+
new_inputs = inputs.copy()
|
103
|
+
if self.rephrase_question:
|
104
|
+
new_inputs['question'] = new_question
|
105
|
+
new_inputs['chat_history'] = chat_history_str
|
106
|
+
answer = await self.combine_docs_chain.arun(input_documents=docs,
|
107
|
+
callbacks=_run_manager.get_child(),
|
108
|
+
**new_inputs)
|
109
|
+
output[self.output_key] = answer
|
110
|
+
|
111
|
+
if self.return_source_documents:
|
112
|
+
output['source_documents'] = docs
|
113
|
+
if self.return_generated_question:
|
114
|
+
output['generated_question'] = new_question
|
115
|
+
return output
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from .host_llm import CustomLLMChat, HostBaichuanChat, HostChatGLM, HostLlama2Chat, HostQwenChat
|
1
|
+
from .host_llm import CustomLLMChat, HostBaichuanChat, HostChatGLM, HostLlama2Chat, HostQwenChat, HostYuanChat, HostYiChat
|
2
2
|
from .minimax import ChatMinimaxAI
|
3
3
|
from .proxy_llm import ProxyChatLLM
|
4
4
|
from .qwen import ChatQWen
|
@@ -9,5 +9,6 @@ from .sensetime import SenseChat
|
|
9
9
|
|
10
10
|
__all__ = [
|
11
11
|
'ProxyChatLLM', 'ChatMinimaxAI', 'ChatWenxin', 'ChatZhipuAI', 'ChatXunfeiAI', 'HostChatGLM',
|
12
|
-
'HostBaichuanChat', 'HostLlama2Chat', 'HostQwenChat', 'CustomLLMChat', 'ChatQWen', 'SenseChat'
|
12
|
+
'HostBaichuanChat', 'HostLlama2Chat', 'HostQwenChat', 'CustomLLMChat', 'ChatQWen', 'SenseChat',
|
13
|
+
'HostYuanChat', 'HostYiChat'
|
13
14
|
]
|
@@ -535,3 +535,31 @@ class CustomLLMChat(BaseHostChatLLM):
|
|
535
535
|
def _llm_type(self) -> str:
|
536
536
|
"""Return type of chat model."""
|
537
537
|
return 'custom_llm_chat'
|
538
|
+
|
539
|
+
class HostYuanChat(BaseHostChatLLM):
|
540
|
+
# use custom llm chat api, api should compatiable with openai definition
|
541
|
+
model_name: str = Field('Yuan2-2B-Janus-hf', alias='model')
|
542
|
+
|
543
|
+
temperature: float = 1
|
544
|
+
top_p: float = 0.9
|
545
|
+
max_tokens: int = 4096
|
546
|
+
host_base_url: str
|
547
|
+
|
548
|
+
@property
|
549
|
+
def _llm_type(self) -> str:
|
550
|
+
"""Return type of chat model."""
|
551
|
+
return 'yuan2'
|
552
|
+
|
553
|
+
class HostYiChat(BaseHostChatLLM):
|
554
|
+
# use custom llm chat api, api should compatiable with openai definition
|
555
|
+
model_name: str = Field('Yi-34B-Chat', alias='model')
|
556
|
+
|
557
|
+
temperature: float = 0.6
|
558
|
+
top_p: float = 0.8
|
559
|
+
max_tokens: int = 4096
|
560
|
+
host_base_url: str
|
561
|
+
|
562
|
+
@property
|
563
|
+
def _llm_type(self) -> str:
|
564
|
+
"""Return type of chat model."""
|
565
|
+
return 'yi_chat'
|
@@ -1,8 +1,12 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import json
|
3
4
|
import logging
|
4
|
-
import
|
5
|
-
from typing import
|
5
|
+
import time
|
6
|
+
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
|
7
|
+
|
8
|
+
import jwt
|
9
|
+
from bisheng_langchain.utils.requests import Requests
|
6
10
|
from langchain.callbacks.manager import AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun
|
7
11
|
from langchain.chat_models.base import BaseChatModel
|
8
12
|
from langchain.schema import ChatGeneration, ChatResult
|
@@ -12,19 +16,13 @@ from langchain.utils import get_from_dict_or_env
|
|
12
16
|
from langchain_core.pydantic_v1 import Field, root_validator
|
13
17
|
from tenacity import (before_sleep_log, retry, retry_if_exception_type, stop_after_attempt,
|
14
18
|
wait_exponential)
|
15
|
-
from bisheng_langchain.utils.requests import Requests
|
16
|
-
|
17
|
-
import time
|
18
|
-
import requests
|
19
|
-
import json
|
20
|
-
|
21
19
|
|
22
|
-
import jwt
|
23
20
|
# if TYPE_CHECKING:
|
24
21
|
# import jwt
|
25
22
|
|
26
23
|
logger = logging.getLogger(__name__)
|
27
24
|
|
25
|
+
|
28
26
|
def _import_pyjwt() -> Any:
|
29
27
|
try:
|
30
28
|
import jwt
|
@@ -34,19 +32,18 @@ def _import_pyjwt() -> Any:
|
|
34
32
|
'Please install it with `pip install PyJWT`.')
|
35
33
|
return jwt
|
36
34
|
|
35
|
+
|
37
36
|
def encode_jwt_token(ak, sk):
|
38
|
-
headers = {
|
39
|
-
"alg": "HS256",
|
40
|
-
"typ": "JWT"
|
41
|
-
}
|
37
|
+
headers = {'alg': 'HS256', 'typ': 'JWT'}
|
42
38
|
payload = {
|
43
|
-
|
44
|
-
|
45
|
-
|
39
|
+
'iss': ak,
|
40
|
+
'exp': int(time.time()) + 18000, # 填写您期望的有效时间,此处示例代表当前时间+300分钟
|
41
|
+
'nbf': int(time.time()) - 500 # 填写您期望的生效时间,此处示例代表当前时间-500秒
|
46
42
|
}
|
47
43
|
token = jwt.encode(payload, sk, headers=headers)
|
48
44
|
return token
|
49
45
|
|
46
|
+
|
50
47
|
def _create_retry_decorator(llm):
|
51
48
|
|
52
49
|
min_seconds = 1
|
@@ -67,7 +64,8 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
|
67
64
|
if role == 'user':
|
68
65
|
return HumanMessage(content=_dict['content'])
|
69
66
|
elif role == 'assistant':
|
70
|
-
content = _dict['
|
67
|
+
content = _dict['content'] or '' # OpenAI returns None for tool invocations
|
68
|
+
|
71
69
|
if _dict.get('function_call'):
|
72
70
|
additional_kwargs = {'function_call': dict(_dict['function_call'])}
|
73
71
|
else:
|
@@ -119,7 +117,9 @@ def _convert_message_to_dict2(message: BaseMessage) -> List[dict]:
|
|
119
117
|
|
120
118
|
return [message_dict]
|
121
119
|
|
122
|
-
|
120
|
+
|
121
|
+
url = 'https://api.sensenova.cn/v1/llm/chat-completions'
|
122
|
+
|
123
123
|
|
124
124
|
class SenseChat(BaseChatModel):
|
125
125
|
|
@@ -164,22 +164,23 @@ class SenseChat(BaseChatModel):
|
|
164
164
|
|
165
165
|
@root_validator()
|
166
166
|
def validate_environment(cls, values: Dict) -> Dict:
|
167
|
-
|
168
167
|
"""Validate that api key and python package exists in environment."""
|
169
168
|
|
170
169
|
_import_pyjwt()
|
171
170
|
|
172
|
-
values['access_key_id'] = get_from_dict_or_env(values, 'access_key_id',
|
173
|
-
'ACCESS_KEY_ID')
|
171
|
+
values['access_key_id'] = get_from_dict_or_env(values, 'access_key_id', 'ACCESS_KEY_ID')
|
174
172
|
values['secret_access_key'] = get_from_dict_or_env(values, 'secret_access_key',
|
175
|
-
|
173
|
+
'SECRET_ACCESS_KEY')
|
176
174
|
token = encode_jwt_token(values['access_key_id'], values['secret_access_key'])
|
177
175
|
if isinstance(token, bytes):
|
178
176
|
token = token.decode('utf-8')
|
179
177
|
|
180
178
|
try:
|
181
|
-
header = {
|
182
|
-
|
179
|
+
header = {
|
180
|
+
'Authorization': 'Bearer {}'.format(token),
|
181
|
+
'Content-Type': 'application/json'
|
182
|
+
}
|
183
|
+
|
183
184
|
values['client'] = Requests(headers=header, )
|
184
185
|
except AttributeError:
|
185
186
|
raise ValueError('Try upgrading it with `pip install --upgrade requests`.')
|
@@ -212,8 +213,8 @@ class SenseChat(BaseChatModel):
|
|
212
213
|
'temperature': temperature,
|
213
214
|
'repetition_penalty': self.repetition_penalty,
|
214
215
|
'n': self.n,
|
215
|
-
|
216
|
-
'stream': False#self.streaming
|
216
|
+
'max_new_tokens': self.max_tokens,
|
217
|
+
'stream': False # self.streaming
|
217
218
|
}
|
218
219
|
|
219
220
|
token = encode_jwt_token(self.access_key_id, self.secret_access_key)
|
@@ -223,6 +224,7 @@ class SenseChat(BaseChatModel):
|
|
223
224
|
|
224
225
|
response = self.client.post(url=url, json=params).json()
|
225
226
|
return response
|
227
|
+
|
226
228
|
rsp_dict = _completion_with_retry(**kwargs)
|
227
229
|
if 'error' in rsp_dict:
|
228
230
|
logger.error(f'sensechat_error resp={rsp_dict}')
|
@@ -232,16 +234,10 @@ class SenseChat(BaseChatModel):
|
|
232
234
|
# return rsp_dict['data'], rsp_dict.get('usage', '')
|
233
235
|
return rsp_dict, rsp_dict.get('usage', '')
|
234
236
|
|
235
|
-
|
236
237
|
async def acompletion_with_retry(self, **kwargs: Any) -> Any:
|
237
238
|
"""Use tenacity to retry the async completion call."""
|
238
239
|
retry_decorator = _create_retry_decorator(self)
|
239
240
|
|
240
|
-
token = encode_jwt_token(self.access_key_id, self.secret_access_key)
|
241
|
-
if isinstance(token, bytes):
|
242
|
-
token = token.decode('utf-8')
|
243
|
-
self.client.headers.update({'Authorization': 'Bearer {}'.format(token)})
|
244
|
-
|
245
241
|
if self.streaming:
|
246
242
|
self.client.headers.update({'Accept': 'text/event-stream'})
|
247
243
|
else:
|
@@ -250,13 +246,23 @@ class SenseChat(BaseChatModel):
|
|
250
246
|
@retry_decorator
|
251
247
|
async def _acompletion_with_retry(**kwargs: Any) -> Any:
|
252
248
|
messages = kwargs.pop('messages', '')
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
249
|
+
|
250
|
+
inp = {
|
251
|
+
'messages': messages,
|
252
|
+
'model': self.model_name,
|
253
|
+
'top_p': self.top_p,
|
254
|
+
'temperature': self.temperature,
|
255
|
+
'repetition_penalty': self.repetition_penalty,
|
256
|
+
'n': self.n,
|
257
|
+
'max_new_tokens': self.max_tokens,
|
258
|
+
'stream': True
|
259
|
+
}
|
260
|
+
|
257
261
|
# Use OpenAI's async api https://github.com/openai/openai-python#async-api
|
258
262
|
async with self.client.apost(url=url, json=inp) as response:
|
263
|
+
|
259
264
|
async for line in response.content.iter_any():
|
265
|
+
|
260
266
|
if b'\n' in line:
|
261
267
|
for txt_ in line.split(b'\n'):
|
262
268
|
yield txt_.decode('utf-8').strip()
|
@@ -315,26 +321,32 @@ class SenseChat(BaseChatModel):
|
|
315
321
|
message_dicts, params = self._create_message_dicts(messages, stop)
|
316
322
|
params = {**params, **kwargs}
|
317
323
|
if self.streaming:
|
324
|
+
|
318
325
|
inner_completion = ''
|
319
|
-
role = '
|
326
|
+
role = 'user'
|
320
327
|
params['stream'] = True
|
321
328
|
function_call: Optional[dict] = None
|
322
329
|
async for is_error, stream_resp in self.acompletion_with_retry(messages=message_dicts,
|
323
330
|
**params):
|
324
|
-
|
325
|
-
|
331
|
+
if str(stream_resp).startswith('[DONE]'):
|
332
|
+
continue
|
333
|
+
output = json.loads(stream_resp)
|
326
334
|
if is_error:
|
327
335
|
logger.error(stream_resp)
|
328
336
|
raise ValueError(stream_resp)
|
329
|
-
if 'data' in
|
330
|
-
output =
|
331
|
-
|
337
|
+
if 'data' in output:
|
338
|
+
output = output['data']
|
339
|
+
|
340
|
+
choices = None
|
341
|
+
if 'choices' in output:
|
342
|
+
choices = output.get('choices')
|
343
|
+
|
332
344
|
if choices:
|
333
345
|
for choice in choices:
|
334
|
-
|
335
|
-
|
346
|
+
token = choice['delta']
|
347
|
+
|
336
348
|
inner_completion += token or ''
|
337
|
-
_function_call =
|
349
|
+
_function_call = ''
|
338
350
|
if run_manager:
|
339
351
|
await run_manager.on_llm_new_token(token)
|
340
352
|
if _function_call:
|
@@ -376,16 +388,16 @@ class SenseChat(BaseChatModel):
|
|
376
388
|
def _create_chat_result(self, response: Mapping[str, Any]) -> ChatResult:
|
377
389
|
generations = []
|
378
390
|
|
379
|
-
# print('response', response)
|
380
391
|
def _norm_text(text):
|
381
392
|
if text[0] == '"' and text[-1] == '"':
|
382
393
|
out = eval(text)
|
383
394
|
else:
|
384
395
|
out = text
|
385
396
|
return out
|
397
|
+
|
386
398
|
for res in response['data']['choices']:
|
387
399
|
res['content'] = _norm_text(res['message'])
|
388
|
-
res[
|
400
|
+
res['role'] = 'user'
|
389
401
|
message = _convert_dict_to_message(res)
|
390
402
|
gen = ChatGeneration(message=message)
|
391
403
|
generations.append(gen)
|
@@ -63,7 +63,8 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
63
63
|
unstructured_api_url: str = None,
|
64
64
|
start: int = 0,
|
65
65
|
n: int = None,
|
66
|
-
verbose: bool = False
|
66
|
+
verbose: bool = False,
|
67
|
+
kwargs: dict = {}) -> None:
|
67
68
|
"""Initialize with a file path."""
|
68
69
|
self.unstructured_api_url = unstructured_api_url
|
69
70
|
self.unstructured_api_key = unstructured_api_key
|
@@ -71,18 +72,18 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
71
72
|
self.file_name = file_name
|
72
73
|
self.start = start
|
73
74
|
self.n = n
|
75
|
+
self.extra_kwargs = kwargs
|
74
76
|
super().__init__(file_path)
|
75
77
|
|
76
78
|
def load(self) -> List[Document]:
|
77
79
|
"""Load given path as pages."""
|
78
80
|
b64_data = base64.b64encode(open(self.file_path, 'rb').read()).decode()
|
81
|
+
parameters = {'start': self.start, 'n': self.n}
|
82
|
+
parameters.update(self.extra_kwargs)
|
79
83
|
payload = dict(filename=os.path.basename(self.file_name),
|
80
84
|
b64_data=[b64_data],
|
81
85
|
mode='partition',
|
82
|
-
parameters=
|
83
|
-
'start': self.start,
|
84
|
-
'n': self.n
|
85
|
-
})
|
86
|
+
parameters=parameters)
|
86
87
|
|
87
88
|
resp = requests.post(self.unstructured_api_url, headers=self.headers, json=payload).json()
|
88
89
|
|
@@ -112,18 +113,23 @@ class ElemUnstructuredLoaderV0(BasePDFLoader):
|
|
112
113
|
unstructured_api_url: str = None,
|
113
114
|
start: int = 0,
|
114
115
|
n: int = None,
|
115
|
-
verbose: bool = False
|
116
|
+
verbose: bool = False,
|
117
|
+
kwargs: dict = {}) -> None:
|
116
118
|
"""Initialize with a file path."""
|
117
119
|
self.unstructured_api_url = unstructured_api_url
|
118
120
|
self.unstructured_api_key = unstructured_api_key
|
121
|
+
self.start = start
|
122
|
+
self.n = n
|
119
123
|
self.headers = {'Content-Type': 'application/json'}
|
120
124
|
self.file_name = file_name
|
125
|
+
self.extra_kwargs = kwargs
|
121
126
|
super().__init__(file_path)
|
122
127
|
|
123
128
|
def load(self) -> List[Document]:
|
124
129
|
b64_data = base64.b64encode(open(self.file_path, 'rb').read()).decode()
|
125
130
|
payload = dict(filename=os.path.basename(self.file_name), b64_data=[b64_data], mode='text')
|
126
|
-
|
131
|
+
payload.update({'start': self.start, 'n': self.n})
|
132
|
+
payload.update(self.extra_kwargs)
|
127
133
|
resp = requests.post(self.unstructured_api_url, headers=self.headers, json=payload).json()
|
128
134
|
|
129
135
|
if 200 != resp.get('status_code'):
|
@@ -12,24 +12,26 @@ bisheng_langchain/autogen_role/assistant.py,sha256=VGCoxJaRxRG6ZIJa2TsxcLZbMbF4K
|
|
12
12
|
bisheng_langchain/autogen_role/custom.py,sha256=8xxtAzNF_N1fysyChynVD19t659Qvtcyj_LNiOrE7ew,2499
|
13
13
|
bisheng_langchain/autogen_role/groupchat_manager.py,sha256=O9XIove5yzyF_g3K5DnF-Fasdx0sUrRWMogYgEDYJAI,2314
|
14
14
|
bisheng_langchain/autogen_role/user.py,sha256=lISbJN5yFsUXHnDCUwr5t6R8O8K3dOMspH4l4_kITnE,5885
|
15
|
-
bisheng_langchain/chains/__init__.py,sha256=
|
15
|
+
bisheng_langchain/chains/__init__.py,sha256=bZXTCzBbsaU9ks90SU5T2u2py006sArwKZJgCc8BNn8,679
|
16
16
|
bisheng_langchain/chains/loader_output.py,sha256=02ZercAFaudStTZ4t7mcVkGRj5pD78HZ6NO8HbmbDH8,1903
|
17
17
|
bisheng_langchain/chains/autogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
18
|
bisheng_langchain/chains/autogen/auto_gen.py,sha256=QIkfCO9-VN2wRkl3_TWVj-JkdL2dqMQNy93j3uB401s,3270
|
19
19
|
bisheng_langchain/chains/combine_documents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
20
|
bisheng_langchain/chains/combine_documents/stuff.py,sha256=z_E_wfhJrAYWcNVRPomPm5fGRDI3hqoC52wcMzgzxVA,2369
|
21
|
+
bisheng_langchain/chains/conversational_retrieval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
+
bisheng_langchain/chains/conversational_retrieval/base.py,sha256=XiqBqov6No-wTVCou6qyMT5p2JQgoQI7OLQOYH8XUos,5313
|
21
23
|
bisheng_langchain/chains/question_answering/__init__.py,sha256=_gOZMc-SWprK6xc-Jj64jcr9nc-G4YkZbEYwfJNq_bY,8795
|
22
24
|
bisheng_langchain/chains/retrieval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
25
|
bisheng_langchain/chains/retrieval/retrieval_chain.py,sha256=7VLJ-IPVjKfmAVgVET4cvKCO9DCMxwsGgVhW-wz5RZM,3050
|
24
26
|
bisheng_langchain/chains/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
27
|
bisheng_langchain/chains/router/multi_rule.py,sha256=BiFryj3-7rOxfttD-MyOkKWLCSGB9LVYd2rjOsIfQC8,375
|
26
28
|
bisheng_langchain/chains/router/rule_router.py,sha256=R2YRUnwn7s_7DbsSn27uPn4cIV0D-5iXEORXir0tNGM,1835
|
27
|
-
bisheng_langchain/chat_models/__init__.py,sha256=
|
28
|
-
bisheng_langchain/chat_models/host_llm.py,sha256=
|
29
|
+
bisheng_langchain/chat_models/__init__.py,sha256=7NuGJAUgeCF9yDqe7D3Yw69_5COlsExg811TSDErpps,599
|
30
|
+
bisheng_langchain/chat_models/host_llm.py,sha256=zbrWUf9Vvc9_8dlVLPoY7Cm0NL7WhE9DSd5F7xYhY2A,22420
|
29
31
|
bisheng_langchain/chat_models/minimax.py,sha256=JLs_f6vWD9beZYUtjD4FG28G8tZHrGUAWOwdLIuJomw,13901
|
30
32
|
bisheng_langchain/chat_models/proxy_llm.py,sha256=wzVBZik9WC3-f7kyQ1eu3Ooibqpcocln08knf5lV1Nw,17082
|
31
33
|
bisheng_langchain/chat_models/qwen.py,sha256=jGx_tW-LPxfegE6NvY6wID8ps2SsP813atjXnc04C-s,18841
|
32
|
-
bisheng_langchain/chat_models/sensetime.py,sha256=
|
34
|
+
bisheng_langchain/chat_models/sensetime.py,sha256=fuQ5yYGO5F7o7iQ7us17MlL4TAWRRFCCpNN9bAF-ydc,17056
|
33
35
|
bisheng_langchain/chat_models/wenxin.py,sha256=OBXmFWkUWZMu1lUz6hPAEawsbAcdgMWcm9WkJJLZyng,13671
|
34
36
|
bisheng_langchain/chat_models/xunfeiai.py,sha256=Yz09-I8u6XhGVnT5mdel15Z3CCQZqApJkgnaxyiZNFk,14037
|
35
37
|
bisheng_langchain/chat_models/zhipuai.py,sha256=KokWmDDwljsV2iFiRXZlylIaQRw4jDOq5aCnat53wnQ,14887
|
@@ -46,7 +48,7 @@ bisheng_langchain/document_loaders/custom_kv.py,sha256=sUKeK0e8-cCmKyj1FsR7SzBNW
|
|
46
48
|
bisheng_langchain/document_loaders/elem_html.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
49
|
bisheng_langchain/document_loaders/elem_image.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
50
|
bisheng_langchain/document_loaders/elem_pdf.py,sha256=K-TXILGNFLFjavhun_MFbUF4t2_WGA3Z-kbnr75lmW8,22243
|
49
|
-
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=
|
51
|
+
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=bJQObxHnk8FaF8RUBkqODzgeikrZ8wdl_TQPa2oEoQo,5169
|
50
52
|
bisheng_langchain/document_loaders/universal_kv.py,sha256=dJF_GQGKBMUjB_kX9CSp7xZRhXgwVuGPbMIzJwPh-C0,4063
|
51
53
|
bisheng_langchain/document_loaders/parsers/__init__.py,sha256=OOM_FJkwaU-zNS58fASw0TH8FNT6VXKb0VrvisgdrII,171
|
52
54
|
bisheng_langchain/document_loaders/parsers/ellm_client.py,sha256=B4Dea8xXXnGvB9j2OXv53HILNUmnWeNJz9ssNM-2fLM,1760
|
@@ -70,7 +72,7 @@ bisheng_langchain/vectorstores/__init__.py,sha256=zCZgDe7LyQ0iDkfcm5UJ5NxwKQSRHn
|
|
70
72
|
bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=gt_uw_fSMcEZWxbiA3V0RyA-utLOZlUY-qxdwnsfZks,12664
|
71
73
|
bisheng_langchain/vectorstores/milvus.py,sha256=44ZbDsIxdsbUnHOpEpCdrW5zvWnYvDdAVoDKjCFoyYI,34424
|
72
74
|
bisheng_langchain/vectorstores/retriever.py,sha256=hj4nAAl352EV_ANnU2OHJn7omCH3nBK82ydo14KqMH4,4353
|
73
|
-
bisheng_langchain-0.2.2.
|
74
|
-
bisheng_langchain-0.2.2.
|
75
|
-
bisheng_langchain-0.2.2.
|
76
|
-
bisheng_langchain-0.2.2.
|
75
|
+
bisheng_langchain-0.2.2.5.dist-info/METADATA,sha256=zPkSGdTgG56TrcN9xLWKEv02k_Wtg2WRqkCa1yoigtE,2299
|
76
|
+
bisheng_langchain-0.2.2.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
77
|
+
bisheng_langchain-0.2.2.5.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
|
78
|
+
bisheng_langchain-0.2.2.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|