bisheng-langchain 0.1.10.1__py3-none-any.whl → 0.2.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/chains/__init__.py +4 -1
- bisheng_langchain/chains/router/__init__.py +0 -0
- bisheng_langchain/chains/router/multi_rule.py +14 -0
- bisheng_langchain/chains/router/rule_router.py +47 -0
- bisheng_langchain/chat_models/host_llm.py +1 -1
- bisheng_langchain/chat_models/proxy_llm.py +7 -3
- bisheng_langchain/document_loaders/__init__.py +2 -0
- bisheng_langchain/document_loaders/custom_kv.py +166 -0
- bisheng_langchain/document_loaders/elem_unstrcutured_loader.py +7 -22
- bisheng_langchain/document_loaders/universal_kv.py +10 -22
- bisheng_langchain/input_output/__init__.py +4 -0
- bisheng_langchain/input_output/input.py +34 -9
- bisheng_langchain/input_output/output.py +276 -0
- bisheng_langchain/utils/requests.py +24 -24
- bisheng_langchain/vectorstores/__init__.py +2 -1
- bisheng_langchain/vectorstores/milvus.py +0 -0
- bisheng_langchain/vectorstores/retriever.py +111 -0
- {bisheng_langchain-0.1.10.1.dist-info → bisheng_langchain-0.2.0rc0.dist-info}/METADATA +3 -6
- {bisheng_langchain-0.1.10.1.dist-info → bisheng_langchain-0.2.0rc0.dist-info}/RECORD +21 -15
- {bisheng_langchain-0.1.10.1.dist-info → bisheng_langchain-0.2.0rc0.dist-info}/WHEEL +1 -1
- {bisheng_langchain-0.1.10.1.dist-info → bisheng_langchain-0.2.0rc0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,9 @@
|
|
1
1
|
from bisheng_langchain.chains.autogen.auto_gen import AutoGenChain
|
2
2
|
from bisheng_langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
3
|
+
from bisheng_langchain.chains.router.multi_rule import MultiRuleChain
|
4
|
+
from bisheng_langchain.chains.router.rule_router import RuleBasedRouter
|
3
5
|
|
4
6
|
from .loader_output import LoaderOutputChain
|
5
7
|
|
6
|
-
__all__ = ['StuffDocumentsChain', 'LoaderOutputChain', 'AutoGenChain'
|
8
|
+
__all__ = ['StuffDocumentsChain', 'LoaderOutputChain', 'AutoGenChain',
|
9
|
+
'RuleBasedRouter', 'MultiRuleChain']
|
File without changes
|
@@ -0,0 +1,14 @@
|
|
1
|
+
from typing import List, Mapping
|
2
|
+
|
3
|
+
from langchain.chains.router.base import Chain, MultiRouteChain, RouterChain
|
4
|
+
|
5
|
+
|
6
|
+
class MultiRuleChain(MultiRouteChain):
|
7
|
+
router_chain: RouterChain
|
8
|
+
destination_chains: Mapping[str, Chain]
|
9
|
+
default_chain: Chain
|
10
|
+
output_variables: List[str]
|
11
|
+
|
12
|
+
@property
|
13
|
+
def output_keys(self) -> List[str]:
|
14
|
+
return self.output_variables
|
@@ -0,0 +1,47 @@
|
|
1
|
+
from typing import Any, Callable, Dict, List, Union
|
2
|
+
|
3
|
+
from langchain.callbacks.manager import Callbacks
|
4
|
+
from langchain.chains.router.base import Route, RouterChain
|
5
|
+
|
6
|
+
|
7
|
+
class RuleBasedRouter(RouterChain):
|
8
|
+
rule_function: Callable[..., str]
|
9
|
+
input_variables: List[str]
|
10
|
+
|
11
|
+
@property
|
12
|
+
def input_keys(self):
|
13
|
+
return self.input_variables
|
14
|
+
|
15
|
+
def _validate_outputs(self, outputs: Dict[str, Any]) -> None:
|
16
|
+
super()._validate_outputs(outputs)
|
17
|
+
if not isinstance(outputs['next_inputs'], dict):
|
18
|
+
raise ValueError
|
19
|
+
|
20
|
+
def _call(
|
21
|
+
self,
|
22
|
+
inputs: Union[Dict[str, Any], Any],
|
23
|
+
) -> Route:
|
24
|
+
result = self.rule_function(inputs)
|
25
|
+
if not result.get('destination') or not result:
|
26
|
+
return Route(None, result['next_inputs'])
|
27
|
+
return Route(result['destination'], result['next_inputs'])
|
28
|
+
|
29
|
+
def route(
|
30
|
+
self,
|
31
|
+
inputs: Union[Dict[str, Any], Any],
|
32
|
+
callbacks: Callbacks = None,
|
33
|
+
) -> Route:
|
34
|
+
result = self.rule_function(inputs)
|
35
|
+
if not result.get('destination') or not result:
|
36
|
+
return Route(None, result['next_inputs'])
|
37
|
+
return Route(result['destination'], result['next_inputs'])
|
38
|
+
|
39
|
+
async def aroute(
|
40
|
+
self,
|
41
|
+
inputs: Union[Dict[str, Any], Any],
|
42
|
+
callbacks: Callbacks = None,
|
43
|
+
) -> Route:
|
44
|
+
result = await self.rule_function(inputs)
|
45
|
+
if not result.get('destination') or not result:
|
46
|
+
return Route(None, result['next_inputs'])
|
47
|
+
return Route(result['destination'], result['next_inputs'])
|
@@ -194,10 +194,14 @@ class ProxyChatLLM(BaseChatModel):
|
|
194
194
|
'function_call': kwargs.get('function_call', None),
|
195
195
|
'functions': kwargs.get('functions', [])
|
196
196
|
}
|
197
|
-
response = self.client.post(self.elemai_base_url,
|
197
|
+
response = self.client.post(self.elemai_base_url, json=params)
|
198
198
|
return response.json()
|
199
199
|
|
200
|
-
|
200
|
+
rsp_dict = _completion_with_retry(**kwargs)
|
201
|
+
if 200 != rsp_dict.get('status_code'):
|
202
|
+
logger.error(f"proxy_llm_error resp={rsp_dict}")
|
203
|
+
raise Exception(rsp_dict)
|
204
|
+
return rsp_dict
|
201
205
|
|
202
206
|
def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> dict:
|
203
207
|
overall_token_usage: dict = {}
|
@@ -233,7 +237,7 @@ class ProxyChatLLM(BaseChatModel):
|
|
233
237
|
@retry_decorator
|
234
238
|
async def _acompletion_with_retry(**kwargs: Any) -> Any:
|
235
239
|
# Use OpenAI's async api https://github.com/openai/openai-python#async-api
|
236
|
-
async with self.client.apost(url=self.elemai_base_url,
|
240
|
+
async with self.client.apost(url=self.elemai_base_url, json=kwargs) as response:
|
237
241
|
async for txt in response.content.iter_any():
|
238
242
|
if b'\n' in txt:
|
239
243
|
for txt_ in txt.split(b'\n'):
|
@@ -1,3 +1,4 @@
|
|
1
|
+
from .custom_kv import CustomKVLoader
|
1
2
|
from .elem_pdf import PDFWithSemanticLoader
|
2
3
|
from .elem_unstrcutured_loader import ElemUnstructuredLoader, ElemUnstructuredLoaderV0
|
3
4
|
from .universal_kv import UniversalKVLoader
|
@@ -7,4 +8,5 @@ __all__ = [
|
|
7
8
|
'ElemUnstructuredLoader',
|
8
9
|
'ElemUnstructuredLoaderV0',
|
9
10
|
'UniversalKVLoader',
|
11
|
+
'CustomKVLoader',
|
10
12
|
]
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# flake8: noqa
|
2
|
+
"""Loads PDF with semantic splilter."""
|
3
|
+
import base64
|
4
|
+
import json
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
import re
|
8
|
+
import tempfile
|
9
|
+
from pathlib import Path
|
10
|
+
from time import sleep
|
11
|
+
from typing import List, Optional, Tuple, Union
|
12
|
+
from urllib.parse import quote_plus, unquote, urlparse
|
13
|
+
|
14
|
+
import cv2
|
15
|
+
import fitz
|
16
|
+
import numpy as np
|
17
|
+
from bisheng_langchain.utils.requests import Requests
|
18
|
+
from langchain.docstore.document import Document
|
19
|
+
from langchain.document_loaders.base import BaseLoader
|
20
|
+
from PIL import Image
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
def convert_base64(image):
|
25
|
+
image_binary = cv2.imencode('.jpg', image)[1].tobytes()
|
26
|
+
x = base64.b64encode(image_binary)
|
27
|
+
return x.decode('ascii').replace('\n', '')
|
28
|
+
|
29
|
+
|
30
|
+
def transpdf2png(pdf_file):
|
31
|
+
pdf_bytes = open(pdf_file, 'rb').read()
|
32
|
+
pdf = fitz.Document(stream=pdf_bytes, filetype='pdf')
|
33
|
+
dpis = [72, 144, 200]
|
34
|
+
|
35
|
+
pdf_images = dict()
|
36
|
+
for page in pdf:
|
37
|
+
pix = None
|
38
|
+
for dpi in dpis:
|
39
|
+
pix = page.get_pixmap(dpi=dpi)
|
40
|
+
if min(pix.width, pix.height) >= 1600: break
|
41
|
+
|
42
|
+
mode = 'RGBA' if pix.alpha else 'RGB'
|
43
|
+
img = Image.frombytes(mode, [pix.width, pix.height], pix.samples)
|
44
|
+
# RGB to BGR
|
45
|
+
img = np.array(img)[:, :, ::-1]
|
46
|
+
img_name = 'page_{:03d}'.format(page.number)
|
47
|
+
pdf_images[img_name] = img
|
48
|
+
|
49
|
+
return pdf_images
|
50
|
+
|
51
|
+
|
52
|
+
class CustomKVLoader(BaseLoader):
|
53
|
+
"""Extract key-value from pdf or image.
|
54
|
+
"""
|
55
|
+
def __init__(self, file_path:str,
|
56
|
+
elm_api_base_url: str,
|
57
|
+
elm_api_key: str,
|
58
|
+
schemas: str,
|
59
|
+
elem_server_id: str,
|
60
|
+
task_type: str,
|
61
|
+
request_timeout: Optional[Union[float, Tuple[float, float]]] = 30) -> None:
|
62
|
+
"""Initialize with a file path."""
|
63
|
+
self.file_path = file_path
|
64
|
+
self.elm_api_base_url = elm_api_base_url
|
65
|
+
self.elm_api_key = elm_api_key
|
66
|
+
self.elem_server_id = elem_server_id
|
67
|
+
self.task_type = task_type
|
68
|
+
self.schemas = set(schemas.split('|'))
|
69
|
+
self.headers = {'Authorization': f'Bearer {elm_api_key}'}
|
70
|
+
self.requests = Requests(headers=self.headers,
|
71
|
+
request_timeout=request_timeout)
|
72
|
+
if '~' in self.file_path:
|
73
|
+
self.file_path = os.path.expanduser(self.file_path)
|
74
|
+
|
75
|
+
# If the file is a web path, download it to a temporary file, and use that
|
76
|
+
if not os.path.isfile(self.file_path) and self._is_valid_url(self.file_path):
|
77
|
+
r = self.requests.get(self.file_path)
|
78
|
+
|
79
|
+
if r.status_code != 200:
|
80
|
+
raise ValueError(
|
81
|
+
'Check the url of your file; returned status code %s'
|
82
|
+
% r.status_code
|
83
|
+
)
|
84
|
+
|
85
|
+
self.temp_dir = tempfile.TemporaryDirectory()
|
86
|
+
temp_file = Path(self.temp_dir.name) / unquote(urlparse(self.file_path
|
87
|
+
).path.split('/')[-1])
|
88
|
+
with open(temp_file, mode='wb') as f:
|
89
|
+
f.write(r.content)
|
90
|
+
self.file_path = str(temp_file)
|
91
|
+
elif not os.path.isfile(self.file_path):
|
92
|
+
raise ValueError('File path %s is not a valid file or url' % self.file_path)
|
93
|
+
super().__init__()
|
94
|
+
|
95
|
+
@staticmethod
|
96
|
+
def _is_valid_url(url: str) -> bool:
|
97
|
+
"""Check if the url is valid."""
|
98
|
+
parsed = urlparse(url)
|
99
|
+
return bool(parsed.netloc) and bool(parsed.scheme)
|
100
|
+
|
101
|
+
def load(self) -> List[Document]:
|
102
|
+
"""Load given path as pages."""
|
103
|
+
# mime_type = filetype.guess(self.file_path).mime
|
104
|
+
# if mime_type.endswith('pdf'):
|
105
|
+
# file_type = 'pdf'
|
106
|
+
# elif mime_type.startswith('image'):
|
107
|
+
# file_type = 'img'
|
108
|
+
|
109
|
+
# else:
|
110
|
+
# raise ValueError(f'file type {file_type} is not support.')
|
111
|
+
file = {'file': open(self.file_path, 'rb')}
|
112
|
+
result = {}
|
113
|
+
if self.task_type == 'extraction-job':
|
114
|
+
url = self.elm_api_base_url + '/task'
|
115
|
+
# 创建task
|
116
|
+
body = {'scene_id': self.elem_server_id}
|
117
|
+
elif self.task_type == 'logic-job':
|
118
|
+
url = self.elm_api_base_url + '/logic-job'
|
119
|
+
body = {'logic_service_id': self.elem_server_id}
|
120
|
+
|
121
|
+
resp = self.requests.post(url=url, json={}, data=body, files=file)
|
122
|
+
if resp.status_code == 200:
|
123
|
+
task_id = resp.json().get('data').get('task_id')
|
124
|
+
if not task_id:
|
125
|
+
logger.error(f'task_create_fail res={resp.text}')
|
126
|
+
return
|
127
|
+
# get status
|
128
|
+
status_url = url + f'/status?task_id={task_id}'
|
129
|
+
count = 0
|
130
|
+
while True:
|
131
|
+
status = self.requests.get(status_url).json()
|
132
|
+
if 1 == status.get('data').get('status'):
|
133
|
+
count += 1
|
134
|
+
sleep(2)
|
135
|
+
elif 3 == status.get('data').get('status'):
|
136
|
+
# 失败
|
137
|
+
logger.error(f'custom_kv type={self.task_type} resp={status}')
|
138
|
+
return []
|
139
|
+
else:
|
140
|
+
break
|
141
|
+
# get result
|
142
|
+
job_id = 'job_id' if self.task_type == 'logic-job' else 'task_id'
|
143
|
+
match = re.match(r'^(?:https?:\/\/)?(?:www\.)?([^\/\n]+)', self.elm_api_base_url)
|
144
|
+
detail_url = quote_plus(match.group()+f'/logic-job-detail/{task_id}')
|
145
|
+
result_url = url + f'/result?{job_id}={task_id}&detail_url={detail_url}'
|
146
|
+
result = self.requests.get(result_url).json()
|
147
|
+
# only for independent key
|
148
|
+
document_result = {}
|
149
|
+
try:
|
150
|
+
result = self.requests.get(result_url).json()
|
151
|
+
file_reuslt = result.get('data')
|
152
|
+
for result in file_reuslt:
|
153
|
+
independent = result.get('result').get('independent_list')
|
154
|
+
for element in independent:
|
155
|
+
if element.get('element_name') in self.schemas:
|
156
|
+
document_result[element.get('element_name')] = [el.get('words')
|
157
|
+
for el in element.get('entity_list')]
|
158
|
+
except Exception as e:
|
159
|
+
logger.error(f'task_result_error scene_id={self.elem_server_id} res={result} except={str(e)}')
|
160
|
+
raise Exception('custom_kv parse_error')
|
161
|
+
else:
|
162
|
+
logger.error(f'custom_kv=create_task resp={resp.text}')
|
163
|
+
raise Exception('custom_kv create task file')
|
164
|
+
content = json.dumps(document_result)
|
165
|
+
doc = Document(page_content=content)
|
166
|
+
return [doc]
|
@@ -1,31 +1,15 @@
|
|
1
1
|
# flake8: noqa
|
2
2
|
"""Loads PDF with semantic splilter."""
|
3
3
|
import base64
|
4
|
-
import io
|
5
|
-
import json
|
6
4
|
import logging
|
7
5
|
import os
|
8
|
-
import
|
9
|
-
|
10
|
-
import time
|
11
|
-
from abc import ABC
|
12
|
-
from collections import Counter
|
13
|
-
from copy import deepcopy
|
14
|
-
from pathlib import Path
|
15
|
-
from typing import Any, Iterator, List, Mapping, Optional, Union
|
16
|
-
from urllib.parse import urlparse
|
17
|
-
|
18
|
-
import fitz
|
19
|
-
import numpy as np
|
20
|
-
import pypdfium2
|
6
|
+
from typing import List
|
7
|
+
|
21
8
|
import requests
|
22
|
-
from bisheng_langchain.document_loaders.parsers import LayoutParser
|
23
9
|
from langchain.docstore.document import Document
|
24
|
-
from langchain.document_loaders.blob_loaders import Blob
|
25
10
|
from langchain.document_loaders.pdf import BasePDFLoader
|
26
|
-
from shapely import Polygon
|
27
|
-
from shapely import box as Rect
|
28
11
|
|
12
|
+
logger = logging.getLogger(__name__)
|
29
13
|
|
30
14
|
def merge_partitions(partitions):
|
31
15
|
text_elem_sep = '\n'
|
@@ -112,9 +96,7 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
112
96
|
|
113
97
|
|
114
98
|
class ElemUnstructuredLoaderV0(BasePDFLoader):
|
115
|
-
"""
|
116
|
-
|
117
|
-
Loader also stores page numbers in metadata.
|
99
|
+
"""The appropriate parser is automatically selected based on the file format and OCR is supported
|
118
100
|
"""
|
119
101
|
def __init__(self,
|
120
102
|
file_name : str,
|
@@ -143,6 +125,9 @@ class ElemUnstructuredLoaderV0(BasePDFLoader):
|
|
143
125
|
headers=self.headers,
|
144
126
|
json=payload).json()
|
145
127
|
|
128
|
+
if 200!=resp.get('status_code'):
|
129
|
+
logger.info(f'not return resp={resp}')
|
130
|
+
|
146
131
|
page_content = resp['text']
|
147
132
|
meta = {'source': self.file_name}
|
148
133
|
doc = Document(page_content=page_content, metadata=meta)
|
@@ -1,30 +1,19 @@
|
|
1
1
|
# flake8: noqa
|
2
2
|
"""Loads PDF with semantic splilter."""
|
3
3
|
import base64
|
4
|
-
import io
|
5
4
|
import json
|
6
|
-
import logging
|
7
5
|
import os
|
8
|
-
import re
|
9
|
-
import tempfile
|
10
|
-
import time
|
11
|
-
import filetype
|
12
|
-
import cv2
|
13
6
|
from collections import defaultdict
|
14
|
-
from
|
15
|
-
from collections import Counter
|
16
|
-
from copy import deepcopy
|
17
|
-
from pathlib import Path
|
18
|
-
from typing import Any, Iterator, List, Mapping, Optional, Union
|
19
|
-
from urllib.parse import urlparse
|
20
|
-
from PIL import Image
|
7
|
+
from typing import List
|
21
8
|
|
9
|
+
import cv2
|
10
|
+
import filetype
|
22
11
|
import fitz
|
23
12
|
import numpy as np
|
24
|
-
import
|
13
|
+
from bisheng_langchain.document_loaders.parsers import ELLMClient
|
25
14
|
from langchain.docstore.document import Document
|
26
15
|
from langchain.document_loaders.base import BaseLoader
|
27
|
-
from
|
16
|
+
from PIL import Image
|
28
17
|
|
29
18
|
|
30
19
|
def convert_base64(image):
|
@@ -45,11 +34,11 @@ def transpdf2png(pdf_file):
|
|
45
34
|
pix = page.get_pixmap(dpi=dpi)
|
46
35
|
if min(pix.width, pix.height) >= 1600: break
|
47
36
|
|
48
|
-
mode =
|
37
|
+
mode = 'RGBA' if pix.alpha else 'RGB'
|
49
38
|
img = Image.frombytes(mode, [pix.width, pix.height], pix.samples)
|
50
39
|
# RGB to BGR
|
51
40
|
img = np.array(img)[:, :, ::-1]
|
52
|
-
img_name =
|
41
|
+
img_name = 'page_{:03d}'.format(page.number)
|
53
42
|
pdf_images[img_name] = img
|
54
43
|
|
55
44
|
return pdf_images
|
@@ -79,7 +68,7 @@ class UniversalKVLoader(BaseLoader):
|
|
79
68
|
elif mime_type.startswith('image'):
|
80
69
|
file_type = 'img'
|
81
70
|
else:
|
82
|
-
raise ValueError(f
|
71
|
+
raise ValueError(f'file type {file_type} is not support.')
|
83
72
|
|
84
73
|
if file_type == 'img':
|
85
74
|
bytes_data = open(self.file_path, 'rb').read()
|
@@ -90,7 +79,7 @@ class UniversalKVLoader(BaseLoader):
|
|
90
79
|
if 'code' in resp and resp['code'] == 200:
|
91
80
|
key_values = resp['result']['ellm_result']
|
92
81
|
else:
|
93
|
-
raise ValueError(f
|
82
|
+
raise ValueError(f'universal kv load failed: {resp}')
|
94
83
|
|
95
84
|
kv_results = defaultdict(list)
|
96
85
|
for key, value in key_values.items():
|
@@ -118,7 +107,7 @@ class UniversalKVLoader(BaseLoader):
|
|
118
107
|
if 'code' in resp and resp['code'] == 200:
|
119
108
|
key_values = resp['result']['ellm_result']
|
120
109
|
else:
|
121
|
-
raise ValueError(f
|
110
|
+
raise ValueError(f'universal kv load failed: {resp}')
|
122
111
|
|
123
112
|
for key, value in key_values.items():
|
124
113
|
kv_results[key].extend(value['text'])
|
@@ -128,4 +117,3 @@ class UniversalKVLoader(BaseLoader):
|
|
128
117
|
meta = {'source': file_name}
|
129
118
|
doc = Document(page_content=content, metadata=meta)
|
130
119
|
return [doc]
|
131
|
-
|
@@ -1,23 +1,48 @@
|
|
1
1
|
|
2
|
-
from typing import List
|
2
|
+
from typing import List, Optional
|
3
3
|
|
4
4
|
from pydantic import BaseModel, Extra
|
5
5
|
|
6
6
|
|
7
|
-
class
|
7
|
+
class InputNode(BaseModel):
|
8
|
+
"""Input组件,用来控制输入"""
|
9
|
+
input: Optional[List[str]]
|
10
|
+
|
11
|
+
def text(self):
|
12
|
+
return self.input
|
13
|
+
|
14
|
+
|
15
|
+
class VariableNode(BaseModel):
|
8
16
|
"""用来设置变量"""
|
9
|
-
|
10
|
-
|
17
|
+
# key
|
18
|
+
variables: Optional[List[str]]
|
19
|
+
# vaulues
|
20
|
+
variable_value: Optional[List[str]] = []
|
11
21
|
|
12
22
|
class Config:
|
13
23
|
"""Configuration for this pydantic object."""
|
14
24
|
|
15
25
|
extra = Extra.forbid
|
16
26
|
|
17
|
-
|
18
|
-
|
19
|
-
|
27
|
+
def text(self):
|
28
|
+
if self.variable_value:
|
29
|
+
text = {}
|
30
|
+
for index, value in enumerate(self.variable_value):
|
31
|
+
text[self.variables[index]] = value
|
32
|
+
|
33
|
+
return text
|
34
|
+
else:
|
35
|
+
return {}
|
36
|
+
|
20
37
|
|
38
|
+
class InputFileNode(BaseModel):
|
39
|
+
file_path: Optional[str]
|
40
|
+
file_name: Optional[str]
|
41
|
+
file_type: Optional[str] # tips for file
|
42
|
+
"""Output组件,用来控制输出"""
|
21
43
|
|
22
|
-
|
23
|
-
|
44
|
+
def text(self):
|
45
|
+
# judge if file_path is oss address
|
46
|
+
if not self.file_path:
|
47
|
+
return ''
|
48
|
+
return [self.file_path, self.file_name]
|
@@ -0,0 +1,276 @@
|
|
1
|
+
import json
|
2
|
+
from typing import Any, Dict, List, Optional
|
3
|
+
from venv import logger
|
4
|
+
|
5
|
+
from bisheng_langchain.chains import LoaderOutputChain
|
6
|
+
from langchain.callbacks.manager import AsyncCallbackManagerForChainRun, CallbackManagerForChainRun
|
7
|
+
from langchain.chains.base import Chain
|
8
|
+
from pydantic import BaseModel, Extra
|
9
|
+
|
10
|
+
_TEXT_COLOR_MAPPING = {
|
11
|
+
'blue': '36;1',
|
12
|
+
'yellow': '33;1',
|
13
|
+
'pink': '38;5;200',
|
14
|
+
'green': '32;1',
|
15
|
+
'red': '31;1',
|
16
|
+
}
|
17
|
+
|
18
|
+
|
19
|
+
def get_color_mapping(
|
20
|
+
items: List[str], excluded_colors: Optional[List] = None
|
21
|
+
) -> Dict[str, str]:
|
22
|
+
"""Get mapping for items to a support color."""
|
23
|
+
colors = list(_TEXT_COLOR_MAPPING.keys())
|
24
|
+
if excluded_colors is not None:
|
25
|
+
colors = [c for c in colors if c not in excluded_colors]
|
26
|
+
color_mapping = {item: colors[i % len(colors)] for i, item in enumerate(items)}
|
27
|
+
return color_mapping
|
28
|
+
|
29
|
+
|
30
|
+
class Output(BaseModel):
|
31
|
+
"""Output组件,用来控制输出"""
|
32
|
+
|
33
|
+
@classmethod
|
34
|
+
def initialize(cls, file_path: str = None):
|
35
|
+
return file_path if file_path else ''
|
36
|
+
|
37
|
+
|
38
|
+
class Report(Chain):
|
39
|
+
# ```
|
40
|
+
# chain Dict:
|
41
|
+
# object: langchain_object
|
42
|
+
# node_id: object_key prefix
|
43
|
+
# input: triger query
|
44
|
+
# variables Dict:
|
45
|
+
# variable_name: name
|
46
|
+
# variable_value: value
|
47
|
+
# `
|
48
|
+
chains: Optional[List[Dict]]
|
49
|
+
variables: Optional[List[Dict]]
|
50
|
+
report_name: str
|
51
|
+
stop_flag: bool = False
|
52
|
+
|
53
|
+
input_key: str = 'report_name' #: :meta private:
|
54
|
+
output_key: str = 'text' #: :meta private:
|
55
|
+
|
56
|
+
class Config:
|
57
|
+
"""Configuration for this pydantic object."""
|
58
|
+
extra = Extra.forbid
|
59
|
+
arbitrary_types_allowed = True
|
60
|
+
|
61
|
+
@property
|
62
|
+
def input_keys(self) -> List[str]:
|
63
|
+
"""Expect input key.
|
64
|
+
:meta private:
|
65
|
+
"""
|
66
|
+
return [self.input_key]
|
67
|
+
|
68
|
+
@property
|
69
|
+
def output_keys(self) -> List[str]:
|
70
|
+
"""Return output key.
|
71
|
+
:meta private:
|
72
|
+
"""
|
73
|
+
return [self.output_key]
|
74
|
+
|
75
|
+
def validate_chains(cls, values: Dict) -> Dict:
|
76
|
+
"""Validate chains."""
|
77
|
+
if values.get('chains'):
|
78
|
+
for chain in values['chains']:
|
79
|
+
chain_output_keys = chain['object'].output_keys
|
80
|
+
if len(chain_output_keys) != 1:
|
81
|
+
raise ValueError(
|
82
|
+
'Chain used in Report should all have one output, got '
|
83
|
+
f"{chain['object']} with {len(chain_output_keys)} outputs."
|
84
|
+
)
|
85
|
+
return values
|
86
|
+
|
87
|
+
def func_call(self,
|
88
|
+
inputs: Dict[str, Any],
|
89
|
+
outputs: Dict[str, Any],
|
90
|
+
intermedia_stop: list,
|
91
|
+
chain: Chain,
|
92
|
+
node_id: str,
|
93
|
+
run_manager: Optional[CallbackManagerForChainRun] = None,):
|
94
|
+
question = list(inputs.values())[0]
|
95
|
+
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
96
|
+
|
97
|
+
if isinstance(chain, LoaderOutputChain):
|
98
|
+
question = 'Get' + ','.join(question)
|
99
|
+
_run_manager.on_text(text='', log='', type='start', category='question')
|
100
|
+
_run_manager.on_text(text='', log=question, type='end', category='question')
|
101
|
+
_run_manager.on_text(text='', log='', type='start', category='answer')
|
102
|
+
message_reply = {'log': question, 'category': 'question'}
|
103
|
+
intermedia_stop.append(message_reply)
|
104
|
+
|
105
|
+
chain_outputs = chain(inputs, callbacks=_run_manager.get_child())
|
106
|
+
result = (chain_outputs.get(chain.output_keys[0])
|
107
|
+
if isinstance(chain_outputs, dict) else chain_outputs)
|
108
|
+
if isinstance(chain, LoaderOutputChain):
|
109
|
+
for schema in inputs.values():
|
110
|
+
result = json.loads(result)
|
111
|
+
for key in schema:
|
112
|
+
if result.get(key):
|
113
|
+
result_str = (';'.join([str(x) for x in result.get(key)])
|
114
|
+
if isinstance(result.get(key), list)
|
115
|
+
else result.get(key))
|
116
|
+
outputs.update({node_id+'_'+key: result_str})
|
117
|
+
result = json.dumps(result, ensure_ascii=False)
|
118
|
+
else:
|
119
|
+
outputs.update({node_id: result})
|
120
|
+
message_reply = {'log': result, 'category': 'answer'}
|
121
|
+
intermedia_stop.append(message_reply)
|
122
|
+
_run_manager.on_text(text='', log=result, type='end', category='answer')
|
123
|
+
|
124
|
+
async def func_acall(self,
|
125
|
+
inputs: Dict[str, Any],
|
126
|
+
outputs: Dict[str, Any],
|
127
|
+
intermedia_stop: list,
|
128
|
+
chain: Chain,
|
129
|
+
node_id: str,
|
130
|
+
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,):
|
131
|
+
question = list(inputs.values())[0]
|
132
|
+
_run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
|
133
|
+
|
134
|
+
if isinstance(chain, LoaderOutputChain):
|
135
|
+
question = 'Get' + ','.join(question)
|
136
|
+
await _run_manager.on_text(text='', log='', type='start', category='question')
|
137
|
+
await _run_manager.on_text(text='', log=question, type='end', category='question')
|
138
|
+
await _run_manager.on_text(text='', log='', type='start', category='answer')
|
139
|
+
message_reply = {'log': question, 'category': 'question'}
|
140
|
+
intermedia_stop.append(message_reply)
|
141
|
+
|
142
|
+
# process
|
143
|
+
try:
|
144
|
+
chain_outputs = await chain.arun(inputs, callbacks=_run_manager.get_child())
|
145
|
+
except Exception as e:
|
146
|
+
logger.exception(e)
|
147
|
+
await _run_manager.on_text(text='', log=str(e), type='stream', category='processing')
|
148
|
+
try:
|
149
|
+
chain_outputs = chain(inputs)
|
150
|
+
except Exception as e2:
|
151
|
+
logger.exception(e2)
|
152
|
+
await _run_manager.on_text(text='', log=str(e2), type='stream', category='processing')
|
153
|
+
chain_outputs = ''
|
154
|
+
|
155
|
+
result = (chain_outputs.get(chain.output_keys[0])
|
156
|
+
if isinstance(chain_outputs, dict) else chain_outputs)
|
157
|
+
if isinstance(chain, LoaderOutputChain):
|
158
|
+
for schema in inputs.values():
|
159
|
+
result = json.loads(result)
|
160
|
+
for key in schema:
|
161
|
+
if result.get(key):
|
162
|
+
result_str = (';'.join([str(x) for x in result.get(key)])
|
163
|
+
if isinstance(result.get(key), list)
|
164
|
+
else result.get(key))
|
165
|
+
outputs.update({node_id+'_'+key: result_str})
|
166
|
+
result = json.dumps(result, ensure_ascii=False)
|
167
|
+
else:
|
168
|
+
outputs.update({node_id: result})
|
169
|
+
message_reply = {'log': result, 'category': 'answer'}
|
170
|
+
intermedia_stop.append(message_reply)
|
171
|
+
await _run_manager.on_text(text='', log=result, type='end', category='answer')
|
172
|
+
|
173
|
+
def _call(
|
174
|
+
self,
|
175
|
+
inputs: Dict[str, Any],
|
176
|
+
run_manager: Optional[CallbackManagerForChainRun] = None,
|
177
|
+
verbose: Optional[bool] = None,
|
178
|
+
) -> Dict[str, str]:
|
179
|
+
intermedia_steps = []
|
180
|
+
outputs = {}
|
181
|
+
self.stop_flag = False
|
182
|
+
# variables
|
183
|
+
if self.variables and self.variables[0]:
|
184
|
+
for variable in self.variables:
|
185
|
+
variable_kv = variable['input']
|
186
|
+
for k, v in variable_kv.items():
|
187
|
+
outputs.update({variable['node_id']+'_'+k: v})
|
188
|
+
|
189
|
+
if self.chains:
|
190
|
+
for i, chain in enumerate(self.chains):
|
191
|
+
if 'node_id' not in chain:
|
192
|
+
logger.info(f"report_skip_nonsence_chain chain={chain['object']}")
|
193
|
+
continue
|
194
|
+
if not isinstance(chain['object'], Chain):
|
195
|
+
raise TypeError(
|
196
|
+
f"{chain['object']} not be runnable Chain object"
|
197
|
+
)
|
198
|
+
if isinstance(chain['object'], LoaderOutputChain):
|
199
|
+
# loaderchain questions use new parse
|
200
|
+
self.func_call(chain['input'], outputs, intermedia_steps,
|
201
|
+
chain['object'], chain['node_id'], run_manager)
|
202
|
+
continue
|
203
|
+
|
204
|
+
preset_question = chain['input']
|
205
|
+
for k, v in preset_question.items():
|
206
|
+
# log print
|
207
|
+
if isinstance(v, str):
|
208
|
+
self.func_call(preset_question, outputs, intermedia_steps,
|
209
|
+
chain['object'], chain['node_id']+'_'+v, run_manager)
|
210
|
+
else:
|
211
|
+
for question in v:
|
212
|
+
question_dict = {k: question}
|
213
|
+
self.func_call(question_dict, outputs, intermedia_steps,
|
214
|
+
chain['object'], chain['node_id']+'_'+question,
|
215
|
+
run_manager)
|
216
|
+
|
217
|
+
return {self.output_key: outputs, self.input_key: self.report_name,
|
218
|
+
'intermediate_steps': intermedia_steps}
|
219
|
+
|
220
|
+
async def _acall(
|
221
|
+
self,
|
222
|
+
inputs: Dict[str, Any],
|
223
|
+
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
|
224
|
+
verbose: Optional[bool] = None,
|
225
|
+
) -> Dict[str, Any]:
|
226
|
+
intermedia_steps = []
|
227
|
+
outputs = {}
|
228
|
+
await run_manager.on_text(text='', log='', type='end', category='processing') # end father start
|
229
|
+
self.stop_flag = False
|
230
|
+
# variables
|
231
|
+
if self.variables and self.variables[0]:
|
232
|
+
for variable in self.variables:
|
233
|
+
variable_kv = variable['input']
|
234
|
+
for k, v in variable_kv.items():
|
235
|
+
outputs.update({variable['node_id']+'_'+k: v})
|
236
|
+
|
237
|
+
# functions
|
238
|
+
if self.chains:
|
239
|
+
for i, chain in enumerate(self.chains):
|
240
|
+
if 'node_id' not in chain:
|
241
|
+
logger.info(f"report_skip_nonsence_chain chain={chain['object']}")
|
242
|
+
continue
|
243
|
+
if self.stop_flag:
|
244
|
+
break
|
245
|
+
if not isinstance(chain['object'], Chain):
|
246
|
+
raise TypeError(
|
247
|
+
f"{chain['object']} not be runnable Chain object"
|
248
|
+
)
|
249
|
+
if isinstance(chain['object'], LoaderOutputChain):
|
250
|
+
# loaderchain questions use new parse
|
251
|
+
await self.func_acall(chain['input'], outputs, intermedia_steps,
|
252
|
+
chain['object'], chain['node_id'], run_manager)
|
253
|
+
continue
|
254
|
+
# normal chain
|
255
|
+
preset_question = chain['input']
|
256
|
+
for k, v in preset_question.items():
|
257
|
+
if isinstance(v, str):
|
258
|
+
await self.func_acall(preset_question, outputs, intermedia_steps,
|
259
|
+
chain['object'], chain['node_id']+'_'+v, run_manager)
|
260
|
+
else:
|
261
|
+
for question in v:
|
262
|
+
question_dict = {k: question}
|
263
|
+
await self.func_acall(question_dict, outputs, intermedia_steps,
|
264
|
+
chain['object'], chain['node_id']+'_'+question,
|
265
|
+
run_manager)
|
266
|
+
|
267
|
+
# keep whole process paired
|
268
|
+
await run_manager.on_text(text='', log='', type='start', category='processing')
|
269
|
+
return {self.output_key: outputs, self.input_key: self.report_name,
|
270
|
+
'intermediate_steps': intermedia_steps}
|
271
|
+
|
272
|
+
def stop(self):
|
273
|
+
self.stop_flag = True
|
274
|
+
|
275
|
+
def stop_status(self):
|
276
|
+
return self.stop_flag
|
@@ -33,28 +33,28 @@ class Requests(BaseModel):
|
|
33
33
|
timeout=self.request_timeout,
|
34
34
|
**kwargs)
|
35
35
|
|
36
|
-
def post(self, url: str,
|
36
|
+
def post(self, url: str, json: Dict[str, Any], **kwargs: Any) -> requests.Response:
|
37
37
|
"""POST to the URL and return the text."""
|
38
38
|
return requests.post(url,
|
39
|
-
json=
|
39
|
+
json=json,
|
40
40
|
headers=self.headers,
|
41
41
|
auth=self.auth,
|
42
42
|
timeout=self.request_timeout,
|
43
43
|
**kwargs)
|
44
44
|
|
45
|
-
def patch(self, url: str,
|
45
|
+
def patch(self, url: str, json: Dict[str, Any], **kwargs: Any) -> requests.Response:
|
46
46
|
"""PATCH the URL and return the text."""
|
47
47
|
return requests.patch(url,
|
48
|
-
json=
|
48
|
+
json=json,
|
49
49
|
headers=self.headers,
|
50
50
|
auth=self.auth,
|
51
51
|
timeout=self.request_timeout,
|
52
52
|
**kwargs)
|
53
53
|
|
54
|
-
def put(self, url: str,
|
54
|
+
def put(self, url: str, json: Dict[str, Any], **kwargs: Any) -> requests.Response:
|
55
55
|
"""PUT the URL and return the text."""
|
56
56
|
return requests.put(url,
|
57
|
-
json=
|
57
|
+
json=json,
|
58
58
|
headers=self.headers,
|
59
59
|
auth=self.auth,
|
60
60
|
timeout=self.request_timeout,
|
@@ -98,24 +98,24 @@ class Requests(BaseModel):
|
|
98
98
|
yield response
|
99
99
|
|
100
100
|
@asynccontextmanager
|
101
|
-
async def apost(self, url: str,
|
101
|
+
async def apost(self, url: str, json: Dict[str, Any],
|
102
102
|
**kwargs: Any) -> AsyncGenerator[aiohttp.ClientResponse, None]:
|
103
103
|
"""POST to the URL and return the text asynchronously."""
|
104
|
-
async with self._arequest('POST', url, json=
|
104
|
+
async with self._arequest('POST', url, json=json, auth=self.auth, **kwargs) as response:
|
105
105
|
yield response
|
106
106
|
|
107
107
|
@asynccontextmanager
|
108
|
-
async def apatch(self, url: str,
|
108
|
+
async def apatch(self, url: str, json: Dict[str, Any],
|
109
109
|
**kwargs: Any) -> AsyncGenerator[aiohttp.ClientResponse, None]:
|
110
110
|
"""PATCH the URL and return the text asynchronously."""
|
111
|
-
async with self._arequest('PATCH', url, json=
|
111
|
+
async with self._arequest('PATCH', url, json=json, auth=self.auth, **kwargs) as response:
|
112
112
|
yield response
|
113
113
|
|
114
114
|
@asynccontextmanager
|
115
|
-
async def aput(self, url: str,
|
115
|
+
async def aput(self, url: str, json: Dict[str, Any],
|
116
116
|
**kwargs: Any) -> AsyncGenerator[aiohttp.ClientResponse, None]:
|
117
117
|
"""PUT the URL and return the text asynchronously."""
|
118
|
-
async with self._arequest('PUT', url, json=
|
118
|
+
async with self._arequest('PUT', url, json=json, auth=self.auth, **kwargs) as response:
|
119
119
|
yield response
|
120
120
|
|
121
121
|
@asynccontextmanager
|
@@ -150,17 +150,17 @@ class TextRequestsWrapper(BaseModel):
|
|
150
150
|
"""GET the URL and return the text."""
|
151
151
|
return self.requests.get(url, **kwargs).text
|
152
152
|
|
153
|
-
def post(self, url: str,
|
153
|
+
def post(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
|
154
154
|
"""POST to the URL and return the text."""
|
155
|
-
return self.requests.post(url,
|
155
|
+
return self.requests.post(url, json, **kwargs).text
|
156
156
|
|
157
|
-
def patch(self, url: str,
|
157
|
+
def patch(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
|
158
158
|
"""PATCH the URL and return the text."""
|
159
|
-
return self.requests.patch(url,
|
159
|
+
return self.requests.patch(url, json, **kwargs).text
|
160
160
|
|
161
|
-
def put(self, url: str,
|
161
|
+
def put(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
|
162
162
|
"""PUT the URL and return the text."""
|
163
|
-
return self.requests.put(url,
|
163
|
+
return self.requests.put(url, json, **kwargs).text
|
164
164
|
|
165
165
|
def delete(self, url: str, **kwargs: Any) -> str:
|
166
166
|
"""DELETE the URL and return the text."""
|
@@ -171,19 +171,19 @@ class TextRequestsWrapper(BaseModel):
|
|
171
171
|
async with self.requests.aget(url, **kwargs) as response:
|
172
172
|
return await response.text()
|
173
173
|
|
174
|
-
async def apost(self, url: str,
|
174
|
+
async def apost(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
|
175
175
|
"""POST to the URL and return the text asynchronously."""
|
176
|
-
async with self.requests.apost(url,
|
176
|
+
async with self.requests.apost(url, json, **kwargs) as response:
|
177
177
|
return await response.text()
|
178
178
|
|
179
|
-
async def apatch(self, url: str,
|
179
|
+
async def apatch(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
|
180
180
|
"""PATCH the URL and return the text asynchronously."""
|
181
|
-
async with self.requests.apatch(url,
|
181
|
+
async with self.requests.apatch(url, json, **kwargs) as response:
|
182
182
|
return await response.text()
|
183
183
|
|
184
|
-
async def aput(self, url: str,
|
184
|
+
async def aput(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
|
185
185
|
"""PUT the URL and return the text asynchronously."""
|
186
|
-
async with self.requests.aput(url,
|
186
|
+
async with self.requests.aput(url, json, **kwargs) as response:
|
187
187
|
return await response.text()
|
188
188
|
|
189
189
|
async def adelete(self, url: str, **kwargs: Any) -> str:
|
File without changes
|
@@ -0,0 +1,111 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, ClassVar, Collection, Dict, List
|
4
|
+
from venv import logger
|
5
|
+
|
6
|
+
import requests
|
7
|
+
from langchain.schema.document import Document
|
8
|
+
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
|
9
|
+
from pydantic import Field, root_validator
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from langchain.callbacks.manager import (
|
13
|
+
AsyncCallbackManagerForRetrieverRun,
|
14
|
+
CallbackManagerForRetrieverRun,
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
class VectorStoreFilterRetriever(VectorStoreRetriever):
|
19
|
+
vectorstore: VectorStore
|
20
|
+
search_type: str = 'similarity'
|
21
|
+
search_kwargs: dict = Field(default_factory=dict)
|
22
|
+
allowed_search_types: ClassVar[Collection[str]] = (
|
23
|
+
'similarity',
|
24
|
+
'similarity_score_threshold',
|
25
|
+
'mmr',
|
26
|
+
)
|
27
|
+
access_url: str = None
|
28
|
+
|
29
|
+
class Config:
|
30
|
+
"""Configuration for this pydantic object."""
|
31
|
+
|
32
|
+
arbitrary_types_allowed = True
|
33
|
+
|
34
|
+
@root_validator()
|
35
|
+
def validate_search_type(cls, values: Dict) -> Dict:
|
36
|
+
"""Validate search type."""
|
37
|
+
search_type = values['search_type']
|
38
|
+
if search_type not in cls.allowed_search_types:
|
39
|
+
raise ValueError(
|
40
|
+
f'search_type of {search_type} not allowed. Valid values are: '
|
41
|
+
f'{cls.allowed_search_types}'
|
42
|
+
)
|
43
|
+
if search_type == 'similarity_score_threshold':
|
44
|
+
score_threshold = values['search_kwargs'].get('score_threshold')
|
45
|
+
if (score_threshold is None) or (not isinstance(score_threshold, float)):
|
46
|
+
raise ValueError(
|
47
|
+
'`score_threshold` is not specified with a float value(0~1) '
|
48
|
+
'in `search_kwargs`.'
|
49
|
+
)
|
50
|
+
return values
|
51
|
+
|
52
|
+
def _get_relevant_documents(
|
53
|
+
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
54
|
+
) -> List[Document]:
|
55
|
+
if self.search_type == 'similarity':
|
56
|
+
docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
|
57
|
+
elif self.search_type == 'similarity_score_threshold':
|
58
|
+
docs_and_similarities = (
|
59
|
+
self.vectorstore.similarity_search_with_relevance_scores(
|
60
|
+
query, **self.search_kwargs
|
61
|
+
)
|
62
|
+
)
|
63
|
+
docs = [doc for doc, _ in docs_and_similarities]
|
64
|
+
elif self.search_type == 'mmr':
|
65
|
+
docs = self.vectorstore.max_marginal_relevance_search(
|
66
|
+
query, **self.search_kwargs
|
67
|
+
)
|
68
|
+
else:
|
69
|
+
raise ValueError(f'search_type of {self.search_type} not allowed.')
|
70
|
+
|
71
|
+
return self.get_file_access(docs)
|
72
|
+
|
73
|
+
async def _aget_relevant_documents(
|
74
|
+
self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
|
75
|
+
) -> List[Document]:
|
76
|
+
if self.search_type == 'similarity':
|
77
|
+
docs = await self.vectorstore.asimilarity_search(
|
78
|
+
query, **self.search_kwargs
|
79
|
+
)
|
80
|
+
elif self.search_type == 'similarity_score_threshold':
|
81
|
+
docs_and_similarities = (
|
82
|
+
await self.vectorstore.asimilarity_search_with_relevance_scores(
|
83
|
+
query, **self.search_kwargs
|
84
|
+
)
|
85
|
+
)
|
86
|
+
docs = [doc for doc, _ in docs_and_similarities]
|
87
|
+
elif self.search_type == 'mmr':
|
88
|
+
docs = await self.vectorstore.amax_marginal_relevance_search(
|
89
|
+
query, **self.search_kwargs
|
90
|
+
)
|
91
|
+
else:
|
92
|
+
raise ValueError(f'search_type of {self.search_type} not allowed.')
|
93
|
+
return self.get_file_access(docs)
|
94
|
+
|
95
|
+
def get_file_access(self, docs: List[Document]):
|
96
|
+
file_ids = [doc.metadata.get('file_id') for doc in docs]
|
97
|
+
if file_ids:
|
98
|
+
res = requests.get(self.access_url, json=file_ids)
|
99
|
+
if res.status_code == 200:
|
100
|
+
doc_res = res.json().get('data')
|
101
|
+
doc_right = {doc.get('docid') for doc in doc_res if doc.get('result') == 1}
|
102
|
+
for doc in docs:
|
103
|
+
if doc.metadata.get('file_id') not in doc_right:
|
104
|
+
doc.page_content = ''
|
105
|
+
doc.metadata['right'] = False
|
106
|
+
return docs
|
107
|
+
else:
|
108
|
+
logger.error(f'query_file_access_fail url={self.access_url} res={res.text}')
|
109
|
+
return [Document(page_content='', metadata={})]
|
110
|
+
else:
|
111
|
+
return docs
|
@@ -1,12 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: bisheng-langchain
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0rc0
|
4
4
|
Summary: bisheng langchain modules
|
5
5
|
Home-page: https://github.com/dataelement/bisheng
|
6
6
|
Author: DataElem
|
7
7
|
Author-email: contact@dataelem.com
|
8
8
|
License: Apache 2.0
|
9
|
-
Platform: UNKNOWN
|
10
9
|
Classifier: Programming Language :: Python :: 3
|
11
10
|
Classifier: Programming Language :: Python :: 3.6
|
12
11
|
Classifier: Programming Language :: Python :: 3.7
|
@@ -20,8 +19,8 @@ Requires-Dist: langchain
|
|
20
19
|
Requires-Dist: zhipuai
|
21
20
|
Requires-Dist: websocket-client
|
22
21
|
Requires-Dist: elasticsearch
|
23
|
-
Requires-Dist: opencv-python ==4.5.5.64
|
24
|
-
Requires-Dist: Pillow ==9.5.0
|
22
|
+
Requires-Dist: opencv-python (==4.5.5.64)
|
23
|
+
Requires-Dist: Pillow (==9.5.0)
|
25
24
|
Requires-Dist: bisheng-pyautogen
|
26
25
|
|
27
26
|
## What is bisheng-langchain?
|
@@ -65,5 +64,3 @@ check out [bisheng-langchain Dev Docs](https://dataelem.feishu.cn/wiki/Xaq8wKQjk
|
|
65
64
|
bisheng-langchain adopts dependencies from the following:
|
66
65
|
|
67
66
|
- Thanks to [langchain](https://github.com/langchain-ai/langchain) for the main framework.
|
68
|
-
|
69
|
-
|
@@ -5,17 +5,20 @@ bisheng_langchain/autogen_role/assistant.py,sha256=6gA36zaolvV0MA35kPbdkbdsmmRiH
|
|
5
5
|
bisheng_langchain/autogen_role/custom.py,sha256=WiHK0vTIP0vMDhtKPNR4-A950i0qwLlowJO0v-g0d40,2423
|
6
6
|
bisheng_langchain/autogen_role/groupchat_manager.py,sha256=L53RmXyv4QMVuw1zGoXRe8hB2s39DVUEb_zASWoO_yI,1746
|
7
7
|
bisheng_langchain/autogen_role/user.py,sha256=Hwp2bu5_GJPMjZJnMLePxBtl7nizOCW0KtKPyu49DQ0,4953
|
8
|
-
bisheng_langchain/chains/__init__.py,sha256=
|
8
|
+
bisheng_langchain/chains/__init__.py,sha256=zd1FQGN0G5b_52oibBbdHsny0Y4T36YQnj6j1tRd4zQ,456
|
9
9
|
bisheng_langchain/chains/loader_output.py,sha256=02ZercAFaudStTZ4t7mcVkGRj5pD78HZ6NO8HbmbDH8,1903
|
10
10
|
bisheng_langchain/chains/autogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
bisheng_langchain/chains/autogen/auto_gen.py,sha256=QIkfCO9-VN2wRkl3_TWVj-JkdL2dqMQNy93j3uB401s,3270
|
12
12
|
bisheng_langchain/chains/combine_documents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
bisheng_langchain/chains/combine_documents/stuff.py,sha256=z_E_wfhJrAYWcNVRPomPm5fGRDI3hqoC52wcMzgzxVA,2369
|
14
14
|
bisheng_langchain/chains/question_answering/__init__.py,sha256=RWbSgTQ0IqZhrXkhaJUKzEXurA9NJE7_6P0zLy0IBjs,8636
|
15
|
+
bisheng_langchain/chains/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
+
bisheng_langchain/chains/router/multi_rule.py,sha256=BiFryj3-7rOxfttD-MyOkKWLCSGB9LVYd2rjOsIfQC8,375
|
17
|
+
bisheng_langchain/chains/router/rule_router.py,sha256=QWLKqJ9ZCQb9E3oh6pd0C6YZRhHyoJSmDFkx31LNpME,1563
|
15
18
|
bisheng_langchain/chat_models/__init__.py,sha256=FBNhc-zwFtPmWdNlt0QgzgOx2hVT-He1yOzgKZEBYQE,463
|
16
|
-
bisheng_langchain/chat_models/host_llm.py,sha256=
|
19
|
+
bisheng_langchain/chat_models/host_llm.py,sha256=UV-u3XaIQsw1Qz8XCJuADxWd7m8Xpey5fQdnOtEpIgo,16993
|
17
20
|
bisheng_langchain/chat_models/minimax.py,sha256=VFq1U4Ax1ZPFrsxzxilRE6NXvUH0HEoK1vDq5kXADTU,13963
|
18
|
-
bisheng_langchain/chat_models/proxy_llm.py,sha256=
|
21
|
+
bisheng_langchain/chat_models/proxy_llm.py,sha256=JdxQq7QJjHwmuZ7QsTaZawkWzDESZohKstiwMoqrPDY,16637
|
19
22
|
bisheng_langchain/chat_models/wenxin.py,sha256=GRhFFqToVpYwbHz9NQrH4lvVlOQ_9rtQEl6uikgNH_w,13793
|
20
23
|
bisheng_langchain/chat_models/xunfeiai.py,sha256=GP30oqpbb8UAxrkJ2l0anDq67Q40CUNulg9nu7PtQZc,14019
|
21
24
|
bisheng_langchain/chat_models/zhipuai.py,sha256=v29bsAZoe_5KR2LQVoKE7429U4Qc3FqcZIgAm8p2aX4,14941
|
@@ -27,12 +30,13 @@ bisheng_langchain/chat_models/interface/utils.py,sha256=qww_uYsWDqK7cLuv-KzZmmlg
|
|
27
30
|
bisheng_langchain/chat_models/interface/wenxin.py,sha256=z_K1Nj78dDYYgiVIzc5sGkOiGr8OAoRwaKwmpWXssH0,4246
|
28
31
|
bisheng_langchain/chat_models/interface/xunfei.py,sha256=DPHAZM_uHg0A8GnebgkRbLENhBW7bBtRHzKC0gFKZgc,7514
|
29
32
|
bisheng_langchain/chat_models/interface/zhipuai.py,sha256=67Ej6DRk-IlXUfEZPg-pjcYPyqZb32ClrBP-9k-3EEs,2636
|
30
|
-
bisheng_langchain/document_loaders/__init__.py,sha256=
|
33
|
+
bisheng_langchain/document_loaders/__init__.py,sha256=LuQ-zMYxde2FeiEcvVtjQqnHozki5pF_pDDa88_fBdg,366
|
34
|
+
bisheng_langchain/document_loaders/custom_kv.py,sha256=1xGir73LcH6lfIyzy4HX-Rg5bjbd_MWJPpqKDkf29CI,6623
|
31
35
|
bisheng_langchain/document_loaders/elem_html.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
36
|
bisheng_langchain/document_loaders/elem_image.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
37
|
bisheng_langchain/document_loaders/elem_pdf.py,sha256=64kUITkrTVJe9CH6IAVSdDVcn2Ekx2PM-jT0cdClXlo,22716
|
34
|
-
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=
|
35
|
-
bisheng_langchain/document_loaders/universal_kv.py,sha256=
|
38
|
+
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=qLqDTYA7IRguI0OiQ6PY3HzQCiQnc76Qy3_QHf90BMs,4616
|
39
|
+
bisheng_langchain/document_loaders/universal_kv.py,sha256=dJF_GQGKBMUjB_kX9CSp7xZRhXgwVuGPbMIzJwPh-C0,4063
|
36
40
|
bisheng_langchain/document_loaders/parsers/__init__.py,sha256=OOM_FJkwaU-zNS58fASw0TH8FNT6VXKb0VrvisgdrII,171
|
37
41
|
bisheng_langchain/document_loaders/parsers/ellm_client.py,sha256=B4Dea8xXXnGvB9j2OXv53HILNUmnWeNJz9ssNM-2fLM,1760
|
38
42
|
bisheng_langchain/document_loaders/parsers/image.py,sha256=7Vx4dD_WiSTojS4TMIJFxfE8nvze0kwNnwTd6f1cLds,938
|
@@ -44,16 +48,18 @@ bisheng_langchain/embeddings/wenxin.py,sha256=8kYqWuHydx5Cylb_Lmdti0YLHrOM1Qha3e
|
|
44
48
|
bisheng_langchain/embeddings/interface/__init__.py,sha256=GNY3tibpRxpAdAfSvQmXBKo0xKSLke_9y4clofi_WOE,98
|
45
49
|
bisheng_langchain/embeddings/interface/types.py,sha256=VdurbtsnjCPdlOjPFcK2Mg6r9bJYYHb3tepvkk-y3nM,461
|
46
50
|
bisheng_langchain/embeddings/interface/wenxin.py,sha256=5d9gI4enmfkD80s0FHKiDt33O0mwM8Xc5WTubnMUy8c,3104
|
47
|
-
bisheng_langchain/input_output/__init__.py,sha256=
|
48
|
-
bisheng_langchain/input_output/input.py,sha256=
|
49
|
-
bisheng_langchain/input_output/output.py,sha256=
|
51
|
+
bisheng_langchain/input_output/__init__.py,sha256=sW_GB7MlrHYsqY1Meb_LeimQqNsMz1gH-00Tqb2BUyM,153
|
52
|
+
bisheng_langchain/input_output/input.py,sha256=I5YDmgbvvj1o2lO9wi8LE37wM0wP5jkhUREU32YrZMQ,1094
|
53
|
+
bisheng_langchain/input_output/output.py,sha256=6U-az6-Cwz665C2YmcH3SYctWVjPFjmW8s70CA_qphk,11585
|
50
54
|
bisheng_langchain/retrievers/__init__.py,sha256=TcyK31IMgFJcYaOCLd9O6qFzXt1VMbtLs-g4C6ml_3w,117
|
51
55
|
bisheng_langchain/retrievers/mix_es_vector.py,sha256=-V1IGQJZMar2a35FrXf5MaHV_R8TpYdF8r1jIR-JDjA,3973
|
52
56
|
bisheng_langchain/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
|
-
bisheng_langchain/utils/requests.py,sha256=
|
54
|
-
bisheng_langchain/vectorstores/__init__.py,sha256=
|
57
|
+
bisheng_langchain/utils/requests.py,sha256=abZvV6-p3cyJpFtFzcvzCUGRYh17Bjdy2BRElG7zPL4,8397
|
58
|
+
bisheng_langchain/vectorstores/__init__.py,sha256=3xXjqJj_DwgQcQUC1MpPrYYhSKeg4yk67OtzmVAjco0,176
|
55
59
|
bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=UU3yJoSSH-prBJtoWQR8dYQzrSOLSvVBRdU45QtE7KA,11648
|
56
|
-
bisheng_langchain
|
57
|
-
bisheng_langchain
|
58
|
-
bisheng_langchain-0.
|
59
|
-
bisheng_langchain-0.
|
60
|
+
bisheng_langchain/vectorstores/milvus.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
61
|
+
bisheng_langchain/vectorstores/retriever.py,sha256=wq__1xRN3PfAgU8Kh8Am8iEsUkkVuJnhWTY6GFDAADg,4365
|
62
|
+
bisheng_langchain-0.2.0rc0.dist-info/METADATA,sha256=6KiZWWBaZ9hv4pkDlbXM-62q5gA65O3onYng4EHl4Ek,2125
|
63
|
+
bisheng_langchain-0.2.0rc0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
64
|
+
bisheng_langchain-0.2.0rc0.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
|
65
|
+
bisheng_langchain-0.2.0rc0.dist-info/RECORD,,
|
File without changes
|