bisheng-langchain 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/__init__.py +0 -0
- bisheng_langchain/chains/__init__.py +5 -0
- bisheng_langchain/chains/combine_documents/__init__.py +0 -0
- bisheng_langchain/chains/combine_documents/stuff.py +56 -0
- bisheng_langchain/chains/question_answering/__init__.py +240 -0
- bisheng_langchain/chains/retrieval_qa/__init__.py +0 -0
- bisheng_langchain/chains/retrieval_qa/base.py +89 -0
- bisheng_langchain/chat_models/__init__.py +11 -0
- bisheng_langchain/chat_models/host_llm.py +409 -0
- bisheng_langchain/chat_models/interface/__init__.py +10 -0
- bisheng_langchain/chat_models/interface/minimax.py +123 -0
- bisheng_langchain/chat_models/interface/openai.py +68 -0
- bisheng_langchain/chat_models/interface/types.py +61 -0
- bisheng_langchain/chat_models/interface/utils.py +5 -0
- bisheng_langchain/chat_models/interface/wenxin.py +114 -0
- bisheng_langchain/chat_models/interface/xunfei.py +233 -0
- bisheng_langchain/chat_models/interface/zhipuai.py +81 -0
- bisheng_langchain/chat_models/minimax.py +354 -0
- bisheng_langchain/chat_models/proxy_llm.py +354 -0
- bisheng_langchain/chat_models/wenxin.py +349 -0
- bisheng_langchain/chat_models/xunfeiai.py +355 -0
- bisheng_langchain/chat_models/zhipuai.py +379 -0
- bisheng_langchain/document_loaders/__init__.py +3 -0
- bisheng_langchain/document_loaders/elem_html.py +0 -0
- bisheng_langchain/document_loaders/elem_image.py +0 -0
- bisheng_langchain/document_loaders/elem_pdf.py +655 -0
- bisheng_langchain/document_loaders/parsers/__init__.py +5 -0
- bisheng_langchain/document_loaders/parsers/image.py +28 -0
- bisheng_langchain/document_loaders/parsers/test_image.py +286 -0
- bisheng_langchain/embeddings/__init__.py +7 -0
- bisheng_langchain/embeddings/host_embedding.py +133 -0
- bisheng_langchain/embeddings/interface/__init__.py +3 -0
- bisheng_langchain/embeddings/interface/types.py +23 -0
- bisheng_langchain/embeddings/interface/wenxin.py +86 -0
- bisheng_langchain/embeddings/wenxin.py +139 -0
- bisheng_langchain/vectorstores/__init__.py +3 -0
- bisheng_langchain/vectorstores/elastic_keywords_search.py +284 -0
- bisheng_langchain-0.0.1.dist-info/METADATA +64 -0
- bisheng_langchain-0.0.1.dist-info/RECORD +41 -0
- bisheng_langchain-0.0.1.dist-info/WHEEL +5 -0
- bisheng_langchain-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,114 @@
|
|
1
|
+
import json
|
2
|
+
|
3
|
+
import requests
|
4
|
+
|
5
|
+
from .types import ChatInput, ChatOutput, Choice, Message, Usage
|
6
|
+
from .utils import get_ts
|
7
|
+
|
8
|
+
|
9
|
+
def get_access_token(api_key, sec_key):
|
10
|
+
url = (f'https://aip.baidubce.com/oauth/2.0/token?'
|
11
|
+
f'grant_type=client_credentials'
|
12
|
+
f'&client_id={api_key}&client_secret={sec_key}')
|
13
|
+
|
14
|
+
payload = json.dumps('')
|
15
|
+
headers = {
|
16
|
+
'Content-Type': 'application/json',
|
17
|
+
'Accept': 'application/json'
|
18
|
+
}
|
19
|
+
|
20
|
+
response = requests.request('POST', url, headers=headers, data=payload)
|
21
|
+
return response.json().get('access_token')
|
22
|
+
|
23
|
+
|
24
|
+
class ChatCompletion(object):
|
25
|
+
|
26
|
+
def __init__(self, api_key, sec_key, **kwargs):
|
27
|
+
self.api_key = api_key
|
28
|
+
self.sec_key = sec_key
|
29
|
+
self.ep_url = ('https://aip.baidubce.com/rpc/2.0/ai_custom/v1/'
|
30
|
+
'wenxinworkshop/chat/completions')
|
31
|
+
self.ep_url_turbo = ('https://aip.baidubce.com/rpc/2.0/ai_custom/v1/'
|
32
|
+
'wenxinworkshop/chat/eb-instant')
|
33
|
+
|
34
|
+
# token = get_access_token(api_key, sec_key)
|
35
|
+
# self.endpoint = f"{self.ep_url}?access_token={token}"
|
36
|
+
self.headers = {'Content-Type': 'application/json'}
|
37
|
+
|
38
|
+
def __call__(self, inp: ChatInput, verbose=False):
|
39
|
+
messages = inp.messages
|
40
|
+
model = inp.model
|
41
|
+
top_p = 0.8 if inp.top_p is None else inp.top_p
|
42
|
+
temperature = 0.95 if inp.temperature is None else inp.temperature
|
43
|
+
stream = False if inp.stream is None else inp.stream
|
44
|
+
# max_tokens = 1024 if inp.max_tokens is None else inp.max_tokens
|
45
|
+
|
46
|
+
system_content = ''
|
47
|
+
new_messages = []
|
48
|
+
for m in messages:
|
49
|
+
role = m.role
|
50
|
+
if role == 'system':
|
51
|
+
system_content = m.content
|
52
|
+
continue
|
53
|
+
new_messages.append({'role': role, 'content': m.content})
|
54
|
+
|
55
|
+
if system_content:
|
56
|
+
new_messages[-1]['content'] = system_content + '\n' + new_messages[
|
57
|
+
-1]['content']
|
58
|
+
|
59
|
+
payload = {
|
60
|
+
'stream': stream,
|
61
|
+
'messages': new_messages,
|
62
|
+
'temperature': temperature,
|
63
|
+
'top_p': top_p
|
64
|
+
}
|
65
|
+
|
66
|
+
if verbose:
|
67
|
+
print('payload', payload)
|
68
|
+
|
69
|
+
token = get_access_token(self.api_key, self.sec_key)
|
70
|
+
endpoint = f'{self.ep_url}?access_token={token}'
|
71
|
+
if model == 'ernie-bot-turbo':
|
72
|
+
endpoint = f'{self.ep_url_turbo}?access_token={token}'
|
73
|
+
|
74
|
+
response = requests.post(endpoint, headers=self.headers, json=payload)
|
75
|
+
|
76
|
+
req_type = 'chat.completion'
|
77
|
+
status_message = 'success'
|
78
|
+
status_code = response.status_code
|
79
|
+
created = get_ts()
|
80
|
+
choices = []
|
81
|
+
usage = None
|
82
|
+
if status_code == 200:
|
83
|
+
try:
|
84
|
+
info = json.loads(response.text)
|
85
|
+
status_code = info.get('error_code', 200)
|
86
|
+
status_message = info.get('error_msg', status_message)
|
87
|
+
if status_code == 200:
|
88
|
+
created = info['created']
|
89
|
+
result = info['result']
|
90
|
+
finish_reason = 'default'
|
91
|
+
msg = Message(role='assistant', content=result)
|
92
|
+
choices = [
|
93
|
+
Choice(index=0,
|
94
|
+
message=msg,
|
95
|
+
finish_reason=finish_reason)
|
96
|
+
]
|
97
|
+
usage = Usage(**info['usage'])
|
98
|
+
except Exception as e:
|
99
|
+
status_code = 401
|
100
|
+
status_message = str(e)
|
101
|
+
else:
|
102
|
+
status_code = 400
|
103
|
+
status_message = 'requests error'
|
104
|
+
|
105
|
+
if status_code != 200:
|
106
|
+
raise Exception(status_message)
|
107
|
+
|
108
|
+
return ChatOutput(status_code=status_code,
|
109
|
+
status_message=status_message,
|
110
|
+
model=model,
|
111
|
+
object=req_type,
|
112
|
+
created=created,
|
113
|
+
choices=choices,
|
114
|
+
usage=usage)
|
@@ -0,0 +1,233 @@
|
|
1
|
+
import base64
|
2
|
+
import hashlib
|
3
|
+
import hmac
|
4
|
+
import json
|
5
|
+
from datetime import datetime
|
6
|
+
from time import mktime
|
7
|
+
from urllib.parse import urlencode, urlparse
|
8
|
+
from wsgiref.handlers import format_date_time
|
9
|
+
|
10
|
+
import websocket
|
11
|
+
from websocket import create_connection
|
12
|
+
|
13
|
+
import _thread as thread
|
14
|
+
|
15
|
+
from .types import ChatInput, ChatOutput, Choice, Message, Usage
|
16
|
+
from .utils import get_ts
|
17
|
+
|
18
|
+
# import ssl
|
19
|
+
# import threading
|
20
|
+
|
21
|
+
|
22
|
+
class Ws_Param(object):
|
23
|
+
# 初始化
|
24
|
+
def __init__(self, APPID, APIKey, APISecret, gpt_url):
|
25
|
+
self.APPID = APPID
|
26
|
+
self.APIKey = APIKey
|
27
|
+
self.APISecret = APISecret
|
28
|
+
self.host = urlparse(gpt_url).netloc
|
29
|
+
self.path = urlparse(gpt_url).path
|
30
|
+
self.gpt_url = gpt_url
|
31
|
+
|
32
|
+
# 生成url
|
33
|
+
def create_url(self):
|
34
|
+
# 生成RFC1123格式的时间戳
|
35
|
+
now = datetime.now()
|
36
|
+
date = format_date_time(mktime(now.timetuple()))
|
37
|
+
|
38
|
+
# 拼接字符串
|
39
|
+
signature_origin = 'host: ' + self.host + '\n'
|
40
|
+
signature_origin += 'date: ' + date + '\n'
|
41
|
+
signature_origin += 'GET ' + self.path + ' HTTP/1.1'
|
42
|
+
|
43
|
+
# 进行hmac-sha256进行加密
|
44
|
+
signature_sha = hmac.new(self.APISecret.encode('utf-8'),
|
45
|
+
signature_origin.encode('utf-8'),
|
46
|
+
digestmod=hashlib.sha256).digest()
|
47
|
+
|
48
|
+
signature_sha_base64 = base64.b64encode(signature_sha).decode(
|
49
|
+
encoding='utf-8')
|
50
|
+
|
51
|
+
authorization_origin = (
|
52
|
+
f'api_key="{self.APIKey}", '
|
53
|
+
f'algorithm="hmac-sha256", headers="host date request-line",'
|
54
|
+
f' signature="{signature_sha_base64}"')
|
55
|
+
|
56
|
+
authorization = base64.b64encode(
|
57
|
+
authorization_origin.encode('utf-8')).decode(encoding='utf-8')
|
58
|
+
|
59
|
+
# 将请求的鉴权参数组合为字典
|
60
|
+
v = {'authorization': authorization, 'date': date, 'host': self.host}
|
61
|
+
# 拼接鉴权参数,生成url
|
62
|
+
url = self.gpt_url + '?' + urlencode(v)
|
63
|
+
# 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释,
|
64
|
+
# 比对相同参数时生成的url与自己代码生成的url是否一致
|
65
|
+
return url
|
66
|
+
|
67
|
+
|
68
|
+
# 收到websocket错误的处理
|
69
|
+
def on_error(ws, error):
|
70
|
+
print('### error:', error)
|
71
|
+
|
72
|
+
|
73
|
+
# 收到websocket关闭的处理
|
74
|
+
def on_close(ws):
|
75
|
+
print('### closed ###')
|
76
|
+
|
77
|
+
|
78
|
+
# 收到websocket连接建立的处理
|
79
|
+
def on_open(ws):
|
80
|
+
thread.start_new_thread(run, (ws, ))
|
81
|
+
|
82
|
+
|
83
|
+
def run(ws, *args):
|
84
|
+
data = json.dumps(gen_params(appid=ws.appid, question=ws.question))
|
85
|
+
ws.send(data)
|
86
|
+
|
87
|
+
|
88
|
+
# 收到websocket消息的处理
|
89
|
+
def on_message(ws, message):
|
90
|
+
print(message)
|
91
|
+
data = json.loads(message)
|
92
|
+
code = data['header']['code']
|
93
|
+
if code != 0:
|
94
|
+
print(f'请求错误: {code}, {data}')
|
95
|
+
ws.close()
|
96
|
+
else:
|
97
|
+
choices = data['payload']['choices']
|
98
|
+
status = choices['status']
|
99
|
+
content = choices['text'][0]['content']
|
100
|
+
print(content, end='')
|
101
|
+
if status == 2:
|
102
|
+
ws.close()
|
103
|
+
|
104
|
+
|
105
|
+
def gen_params(appid, question):
|
106
|
+
data = {
|
107
|
+
'header': {
|
108
|
+
'app_id': appid,
|
109
|
+
'uid': '1234'
|
110
|
+
},
|
111
|
+
'parameter': {
|
112
|
+
'chat': {
|
113
|
+
'domain': 'general',
|
114
|
+
'random_threshold': 0.5,
|
115
|
+
'max_tokens': 2048,
|
116
|
+
'auditing': 'default'
|
117
|
+
}
|
118
|
+
},
|
119
|
+
'payload': {
|
120
|
+
'message': {
|
121
|
+
'text': [{
|
122
|
+
'role': 'user',
|
123
|
+
'content': question
|
124
|
+
}]
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
return data
|
129
|
+
|
130
|
+
|
131
|
+
class ChatCompletion(object):
|
132
|
+
|
133
|
+
def __init__(self, appid, api_key, api_secret, **kwargs):
|
134
|
+
gpt_url = 'ws://spark-api.xf-yun.com/v1.1/chat'
|
135
|
+
self.wsParam = Ws_Param(appid, api_key, api_secret, gpt_url)
|
136
|
+
websocket.enableTrace(False)
|
137
|
+
# wsUrl = wsParam.create_url()
|
138
|
+
|
139
|
+
# todo: modify to the ws pool
|
140
|
+
# self.mutex = threading.Lock()
|
141
|
+
# self.ws = websocket.WebSocket()
|
142
|
+
# self.ws.connect(wsUrl)
|
143
|
+
|
144
|
+
self.header = {'app_id': appid, 'uid': 'elem'}
|
145
|
+
|
146
|
+
def __call__(self, inp: ChatInput, verbose=False):
|
147
|
+
messages = inp.messages
|
148
|
+
model = inp.model
|
149
|
+
# top_p = 0.7 if inp.top_p is None else inp.top_p
|
150
|
+
temperature = 0.5 if inp.temperature is None else inp.temperature
|
151
|
+
# stream = False if inp.stream is None else inp.stream
|
152
|
+
max_tokens = 1024 if inp.max_tokens is None else inp.max_tokens
|
153
|
+
# stop = None
|
154
|
+
# if inp.stop is not None:
|
155
|
+
# stop = inp.stop.split('||')
|
156
|
+
|
157
|
+
new_messages = []
|
158
|
+
for m in messages:
|
159
|
+
role = m.role
|
160
|
+
if role == 'system':
|
161
|
+
role = 'user'
|
162
|
+
new_messages.append({'role': role, 'content': m.content})
|
163
|
+
|
164
|
+
created = get_ts()
|
165
|
+
payload = {
|
166
|
+
'header': self.header,
|
167
|
+
'payload': {
|
168
|
+
'message': {
|
169
|
+
'text': new_messages
|
170
|
+
}
|
171
|
+
},
|
172
|
+
'parameter': {
|
173
|
+
'chat': {
|
174
|
+
'domain': 'general',
|
175
|
+
'temperature': temperature,
|
176
|
+
'max_tokens': max_tokens,
|
177
|
+
'auditing': 'default'
|
178
|
+
}
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
if verbose:
|
183
|
+
print('payload', payload)
|
184
|
+
|
185
|
+
req_type = 'chat.completion'
|
186
|
+
status_code = 200
|
187
|
+
status_message = 'success'
|
188
|
+
choices = []
|
189
|
+
usage = None
|
190
|
+
texts = []
|
191
|
+
ws = None
|
192
|
+
try:
|
193
|
+
# self.mutex.acquire()
|
194
|
+
wsUrl = self.wsParam.create_url()
|
195
|
+
ws = create_connection(wsUrl)
|
196
|
+
ws.send(json.dumps(payload))
|
197
|
+
while True:
|
198
|
+
raw_data = ws.recv()
|
199
|
+
if not raw_data:
|
200
|
+
break
|
201
|
+
|
202
|
+
resp = json.loads(raw_data)
|
203
|
+
if resp['header']['code'] == 0:
|
204
|
+
texts.append(
|
205
|
+
resp['payload']['choices']['text'][0]['content'])
|
206
|
+
if resp['header']['code'] == 0 and resp['header'][
|
207
|
+
'status'] == 2:
|
208
|
+
usage_dict = resp['payload']['usage']['text']
|
209
|
+
usage_dict.pop('question_tokens')
|
210
|
+
usage = Usage(**usage_dict)
|
211
|
+
except Exception as e:
|
212
|
+
status_code = 401
|
213
|
+
status_message = str(e)
|
214
|
+
finally:
|
215
|
+
if ws:
|
216
|
+
ws.close()
|
217
|
+
|
218
|
+
if texts:
|
219
|
+
finish_reason = 'default'
|
220
|
+
msg = Message(role='assistant', content=''.join(texts))
|
221
|
+
cho = Choice(index=0, message=msg, finish_reason=finish_reason)
|
222
|
+
choices.append(cho)
|
223
|
+
|
224
|
+
if status_code != 200:
|
225
|
+
raise Exception(status_message)
|
226
|
+
|
227
|
+
return ChatOutput(status_code=status_code,
|
228
|
+
status_message=status_message,
|
229
|
+
model=model,
|
230
|
+
object=req_type,
|
231
|
+
created=created,
|
232
|
+
choices=choices,
|
233
|
+
usage=usage)
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# import json
|
2
|
+
|
3
|
+
import zhipuai
|
4
|
+
|
5
|
+
from .types import ChatInput, ChatOutput, Choice, Message, Usage
|
6
|
+
from .utils import get_ts
|
7
|
+
|
8
|
+
|
9
|
+
class ChatCompletion(object):
|
10
|
+
|
11
|
+
def __init__(self, api_key, **kwargs):
|
12
|
+
zhipuai.api_key = api_key
|
13
|
+
|
14
|
+
def __call__(self, inp: ChatInput, verbose=False):
|
15
|
+
messages = inp.messages
|
16
|
+
model = inp.model
|
17
|
+
top_p = 0.7 if inp.top_p is None else inp.top_p
|
18
|
+
temperature = 0.95 if inp.temperature is None else inp.temperature
|
19
|
+
# stream = False if inp.stream is None else inp.stream
|
20
|
+
# max_tokens = 1024 if inp.max_tokens is None else inp.max_tokens
|
21
|
+
|
22
|
+
new_messages = []
|
23
|
+
system_content = ''
|
24
|
+
for m in messages:
|
25
|
+
content = m.content
|
26
|
+
role = m.role
|
27
|
+
if role == 'system':
|
28
|
+
system_content += content
|
29
|
+
continue
|
30
|
+
new_messages.append({'role': role, 'content': content})
|
31
|
+
|
32
|
+
if system_content:
|
33
|
+
new_messages[-1]['content'] = (system_content +
|
34
|
+
new_messages[-1]['content'])
|
35
|
+
|
36
|
+
created = get_ts()
|
37
|
+
payload = {
|
38
|
+
'model': model,
|
39
|
+
'prompt': new_messages,
|
40
|
+
'temperature': temperature,
|
41
|
+
'top_p': top_p,
|
42
|
+
'request_id': str(created),
|
43
|
+
'incremental': False
|
44
|
+
}
|
45
|
+
|
46
|
+
if verbose:
|
47
|
+
print('payload', payload)
|
48
|
+
|
49
|
+
req_type = 'chat.completion'
|
50
|
+
status_message = 'success'
|
51
|
+
choices = []
|
52
|
+
usage = None
|
53
|
+
try:
|
54
|
+
resp = zhipuai.model_api.invoke(**payload)
|
55
|
+
status_code = resp['code']
|
56
|
+
status_message = resp['msg']
|
57
|
+
if status_code == 200:
|
58
|
+
choices = []
|
59
|
+
for index, choice in enumerate(resp['data']['choices']):
|
60
|
+
finish_reason = 'default'
|
61
|
+
msg = Message(**choice)
|
62
|
+
cho = Choice(index=index,
|
63
|
+
message=msg,
|
64
|
+
finish_reason=finish_reason)
|
65
|
+
choices.append(cho)
|
66
|
+
usage = Usage(**resp['data']['usage'])
|
67
|
+
|
68
|
+
except Exception as e:
|
69
|
+
status_code = 400
|
70
|
+
status_message = str(e)
|
71
|
+
|
72
|
+
if status_code != 200:
|
73
|
+
raise Exception(status_message)
|
74
|
+
|
75
|
+
return ChatOutput(status_code=status_code,
|
76
|
+
status_message=status_message,
|
77
|
+
model=model,
|
78
|
+
object=req_type,
|
79
|
+
created=created,
|
80
|
+
choices=choices,
|
81
|
+
usage=usage)
|