bisheng-langchain 0.1.10.1__py3-none-any.whl → 0.2.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,9 @@
1
1
  from bisheng_langchain.chains.autogen.auto_gen import AutoGenChain
2
2
  from bisheng_langchain.chains.combine_documents.stuff import StuffDocumentsChain
3
+ from bisheng_langchain.chains.router.multi_rule import MultiRuleChain
4
+ from bisheng_langchain.chains.router.rule_router import RuleBasedRouter
3
5
 
4
6
  from .loader_output import LoaderOutputChain
5
7
 
6
- __all__ = ['StuffDocumentsChain', 'LoaderOutputChain', 'AutoGenChain']
8
+ __all__ = ['StuffDocumentsChain', 'LoaderOutputChain', 'AutoGenChain',
9
+ 'RuleBasedRouter', 'MultiRuleChain']
File without changes
@@ -0,0 +1,14 @@
1
+ from typing import List, Mapping
2
+
3
+ from langchain.chains.router.base import Chain, MultiRouteChain, RouterChain
4
+
5
+
6
+ class MultiRuleChain(MultiRouteChain):
7
+ router_chain: RouterChain
8
+ destination_chains: Mapping[str, Chain]
9
+ default_chain: Chain
10
+ output_variables: List[str]
11
+
12
+ @property
13
+ def output_keys(self) -> List[str]:
14
+ return self.output_variables
@@ -0,0 +1,47 @@
1
+ from typing import Any, Callable, Dict, List, Union
2
+
3
+ from langchain.callbacks.manager import Callbacks
4
+ from langchain.chains.router.base import Route, RouterChain
5
+
6
+
7
+ class RuleBasedRouter(RouterChain):
8
+ rule_function: Callable[..., str]
9
+ input_variables: List[str]
10
+
11
+ @property
12
+ def input_keys(self):
13
+ return self.input_variables
14
+
15
+ def _validate_outputs(self, outputs: Dict[str, Any]) -> None:
16
+ super()._validate_outputs(outputs)
17
+ if not isinstance(outputs['next_inputs'], dict):
18
+ raise ValueError
19
+
20
+ def _call(
21
+ self,
22
+ inputs: Union[Dict[str, Any], Any],
23
+ ) -> Route:
24
+ result = self.rule_function(inputs)
25
+ if not result.get('destination') or not result:
26
+ return Route(None, result['next_inputs'])
27
+ return Route(result['destination'], result['next_inputs'])
28
+
29
+ def route(
30
+ self,
31
+ inputs: Union[Dict[str, Any], Any],
32
+ callbacks: Callbacks = None,
33
+ ) -> Route:
34
+ result = self.rule_function(inputs)
35
+ if not result.get('destination') or not result:
36
+ return Route(None, result['next_inputs'])
37
+ return Route(result['destination'], result['next_inputs'])
38
+
39
+ async def aroute(
40
+ self,
41
+ inputs: Union[Dict[str, Any], Any],
42
+ callbacks: Callbacks = None,
43
+ ) -> Route:
44
+ result = await self.rule_function(inputs)
45
+ if not result.get('destination') or not result:
46
+ return Route(None, result['next_inputs'])
47
+ return Route(result['destination'], result['next_inputs'])
@@ -379,7 +379,7 @@ class HostQwenChat(BaseHostChatLLM):
379
379
  model_name: str = Field('Qwen-7B-Chat', alias='model')
380
380
 
381
381
  temperature: float = 0
382
- top_p: float = 0.5
382
+ top_p: float = 1
383
383
  max_tokens: int = 8192
384
384
 
385
385
  @property
@@ -194,10 +194,14 @@ class ProxyChatLLM(BaseChatModel):
194
194
  'function_call': kwargs.get('function_call', None),
195
195
  'functions': kwargs.get('functions', [])
196
196
  }
197
- response = self.client.post(self.elemai_base_url, data=params)
197
+ response = self.client.post(self.elemai_base_url, json=params)
198
198
  return response.json()
199
199
 
200
- return _completion_with_retry(**kwargs)
200
+ rsp_dict = _completion_with_retry(**kwargs)
201
+ if 200 != rsp_dict.get('status_code'):
202
+ logger.error(f"proxy_llm_error resp={rsp_dict}")
203
+ raise Exception(rsp_dict)
204
+ return rsp_dict
201
205
 
202
206
  def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> dict:
203
207
  overall_token_usage: dict = {}
@@ -233,7 +237,7 @@ class ProxyChatLLM(BaseChatModel):
233
237
  @retry_decorator
234
238
  async def _acompletion_with_retry(**kwargs: Any) -> Any:
235
239
  # Use OpenAI's async api https://github.com/openai/openai-python#async-api
236
- async with self.client.apost(url=self.elemai_base_url, data=kwargs) as response:
240
+ async with self.client.apost(url=self.elemai_base_url, json=kwargs) as response:
237
241
  async for txt in response.content.iter_any():
238
242
  if b'\n' in txt:
239
243
  for txt_ in txt.split(b'\n'):
@@ -1,3 +1,4 @@
1
+ from .custom_kv import CustomKVLoader
1
2
  from .elem_pdf import PDFWithSemanticLoader
2
3
  from .elem_unstrcutured_loader import ElemUnstructuredLoader, ElemUnstructuredLoaderV0
3
4
  from .universal_kv import UniversalKVLoader
@@ -7,4 +8,5 @@ __all__ = [
7
8
  'ElemUnstructuredLoader',
8
9
  'ElemUnstructuredLoaderV0',
9
10
  'UniversalKVLoader',
11
+ 'CustomKVLoader',
10
12
  ]
@@ -0,0 +1,166 @@
1
+ # flake8: noqa
2
+ """Loads PDF with semantic splilter."""
3
+ import base64
4
+ import json
5
+ import logging
6
+ import os
7
+ import re
8
+ import tempfile
9
+ from pathlib import Path
10
+ from time import sleep
11
+ from typing import List, Optional, Tuple, Union
12
+ from urllib.parse import quote_plus, unquote, urlparse
13
+
14
+ import cv2
15
+ import fitz
16
+ import numpy as np
17
+ from bisheng_langchain.utils.requests import Requests
18
+ from langchain.docstore.document import Document
19
+ from langchain.document_loaders.base import BaseLoader
20
+ from PIL import Image
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ def convert_base64(image):
25
+ image_binary = cv2.imencode('.jpg', image)[1].tobytes()
26
+ x = base64.b64encode(image_binary)
27
+ return x.decode('ascii').replace('\n', '')
28
+
29
+
30
+ def transpdf2png(pdf_file):
31
+ pdf_bytes = open(pdf_file, 'rb').read()
32
+ pdf = fitz.Document(stream=pdf_bytes, filetype='pdf')
33
+ dpis = [72, 144, 200]
34
+
35
+ pdf_images = dict()
36
+ for page in pdf:
37
+ pix = None
38
+ for dpi in dpis:
39
+ pix = page.get_pixmap(dpi=dpi)
40
+ if min(pix.width, pix.height) >= 1600: break
41
+
42
+ mode = 'RGBA' if pix.alpha else 'RGB'
43
+ img = Image.frombytes(mode, [pix.width, pix.height], pix.samples)
44
+ # RGB to BGR
45
+ img = np.array(img)[:, :, ::-1]
46
+ img_name = 'page_{:03d}'.format(page.number)
47
+ pdf_images[img_name] = img
48
+
49
+ return pdf_images
50
+
51
+
52
+ class CustomKVLoader(BaseLoader):
53
+ """Extract key-value from pdf or image.
54
+ """
55
+ def __init__(self, file_path:str,
56
+ elm_api_base_url: str,
57
+ elm_api_key: str,
58
+ schemas: str,
59
+ elem_server_id: str,
60
+ task_type: str,
61
+ request_timeout: Optional[Union[float, Tuple[float, float]]] = 30) -> None:
62
+ """Initialize with a file path."""
63
+ self.file_path = file_path
64
+ self.elm_api_base_url = elm_api_base_url
65
+ self.elm_api_key = elm_api_key
66
+ self.elem_server_id = elem_server_id
67
+ self.task_type = task_type
68
+ self.schemas = set(schemas.split('|'))
69
+ self.headers = {'Authorization': f'Bearer {elm_api_key}'}
70
+ self.requests = Requests(headers=self.headers,
71
+ request_timeout=request_timeout)
72
+ if '~' in self.file_path:
73
+ self.file_path = os.path.expanduser(self.file_path)
74
+
75
+ # If the file is a web path, download it to a temporary file, and use that
76
+ if not os.path.isfile(self.file_path) and self._is_valid_url(self.file_path):
77
+ r = self.requests.get(self.file_path)
78
+
79
+ if r.status_code != 200:
80
+ raise ValueError(
81
+ 'Check the url of your file; returned status code %s'
82
+ % r.status_code
83
+ )
84
+
85
+ self.temp_dir = tempfile.TemporaryDirectory()
86
+ temp_file = Path(self.temp_dir.name) / unquote(urlparse(self.file_path
87
+ ).path.split('/')[-1])
88
+ with open(temp_file, mode='wb') as f:
89
+ f.write(r.content)
90
+ self.file_path = str(temp_file)
91
+ elif not os.path.isfile(self.file_path):
92
+ raise ValueError('File path %s is not a valid file or url' % self.file_path)
93
+ super().__init__()
94
+
95
+ @staticmethod
96
+ def _is_valid_url(url: str) -> bool:
97
+ """Check if the url is valid."""
98
+ parsed = urlparse(url)
99
+ return bool(parsed.netloc) and bool(parsed.scheme)
100
+
101
+ def load(self) -> List[Document]:
102
+ """Load given path as pages."""
103
+ # mime_type = filetype.guess(self.file_path).mime
104
+ # if mime_type.endswith('pdf'):
105
+ # file_type = 'pdf'
106
+ # elif mime_type.startswith('image'):
107
+ # file_type = 'img'
108
+
109
+ # else:
110
+ # raise ValueError(f'file type {file_type} is not support.')
111
+ file = {'file': open(self.file_path, 'rb')}
112
+ result = {}
113
+ if self.task_type == 'extraction-job':
114
+ url = self.elm_api_base_url + '/task'
115
+ # 创建task
116
+ body = {'scene_id': self.elem_server_id}
117
+ elif self.task_type == 'logic-job':
118
+ url = self.elm_api_base_url + '/logic-job'
119
+ body = {'logic_service_id': self.elem_server_id}
120
+
121
+ resp = self.requests.post(url=url, json={}, data=body, files=file)
122
+ if resp.status_code == 200:
123
+ task_id = resp.json().get('data').get('task_id')
124
+ if not task_id:
125
+ logger.error(f'task_create_fail res={resp.text}')
126
+ return
127
+ # get status
128
+ status_url = url + f'/status?task_id={task_id}'
129
+ count = 0
130
+ while True:
131
+ status = self.requests.get(status_url).json()
132
+ if 1 == status.get('data').get('status'):
133
+ count += 1
134
+ sleep(2)
135
+ elif 3 == status.get('data').get('status'):
136
+ # 失败
137
+ logger.error(f'custom_kv type={self.task_type} resp={status}')
138
+ return []
139
+ else:
140
+ break
141
+ # get result
142
+ job_id = 'job_id' if self.task_type == 'logic-job' else 'task_id'
143
+ match = re.match(r'^(?:https?:\/\/)?(?:www\.)?([^\/\n]+)', self.elm_api_base_url)
144
+ detail_url = quote_plus(match.group()+f'/logic-job-detail/{task_id}')
145
+ result_url = url + f'/result?{job_id}={task_id}&detail_url={detail_url}'
146
+ result = self.requests.get(result_url).json()
147
+ # only for independent key
148
+ document_result = {}
149
+ try:
150
+ result = self.requests.get(result_url).json()
151
+ file_reuslt = result.get('data')
152
+ for result in file_reuslt:
153
+ independent = result.get('result').get('independent_list')
154
+ for element in independent:
155
+ if element.get('element_name') in self.schemas:
156
+ document_result[element.get('element_name')] = [el.get('words')
157
+ for el in element.get('entity_list')]
158
+ except Exception as e:
159
+ logger.error(f'task_result_error scene_id={self.elem_server_id} res={result} except={str(e)}')
160
+ raise Exception('custom_kv parse_error')
161
+ else:
162
+ logger.error(f'custom_kv=create_task resp={resp.text}')
163
+ raise Exception('custom_kv create task file')
164
+ content = json.dumps(document_result)
165
+ doc = Document(page_content=content)
166
+ return [doc]
@@ -1,31 +1,15 @@
1
1
  # flake8: noqa
2
2
  """Loads PDF with semantic splilter."""
3
3
  import base64
4
- import io
5
- import json
6
4
  import logging
7
5
  import os
8
- import re
9
- import tempfile
10
- import time
11
- from abc import ABC
12
- from collections import Counter
13
- from copy import deepcopy
14
- from pathlib import Path
15
- from typing import Any, Iterator, List, Mapping, Optional, Union
16
- from urllib.parse import urlparse
17
-
18
- import fitz
19
- import numpy as np
20
- import pypdfium2
6
+ from typing import List
7
+
21
8
  import requests
22
- from bisheng_langchain.document_loaders.parsers import LayoutParser
23
9
  from langchain.docstore.document import Document
24
- from langchain.document_loaders.blob_loaders import Blob
25
10
  from langchain.document_loaders.pdf import BasePDFLoader
26
- from shapely import Polygon
27
- from shapely import box as Rect
28
11
 
12
+ logger = logging.getLogger(__name__)
29
13
 
30
14
  def merge_partitions(partitions):
31
15
  text_elem_sep = '\n'
@@ -112,9 +96,7 @@ class ElemUnstructuredLoader(BasePDFLoader):
112
96
 
113
97
 
114
98
  class ElemUnstructuredLoaderV0(BasePDFLoader):
115
- """Loads a PDF with pypdf and chunks at character level. dummy version
116
-
117
- Loader also stores page numbers in metadata.
99
+ """The appropriate parser is automatically selected based on the file format and OCR is supported
118
100
  """
119
101
  def __init__(self,
120
102
  file_name : str,
@@ -143,6 +125,9 @@ class ElemUnstructuredLoaderV0(BasePDFLoader):
143
125
  headers=self.headers,
144
126
  json=payload).json()
145
127
 
128
+ if 200!=resp.get('status_code'):
129
+ logger.info(f'not return resp={resp}')
130
+
146
131
  page_content = resp['text']
147
132
  meta = {'source': self.file_name}
148
133
  doc = Document(page_content=page_content, metadata=meta)
@@ -1,30 +1,19 @@
1
1
  # flake8: noqa
2
2
  """Loads PDF with semantic splilter."""
3
3
  import base64
4
- import io
5
4
  import json
6
- import logging
7
5
  import os
8
- import re
9
- import tempfile
10
- import time
11
- import filetype
12
- import cv2
13
6
  from collections import defaultdict
14
- from abc import ABC
15
- from collections import Counter
16
- from copy import deepcopy
17
- from pathlib import Path
18
- from typing import Any, Iterator, List, Mapping, Optional, Union
19
- from urllib.parse import urlparse
20
- from PIL import Image
7
+ from typing import List
21
8
 
9
+ import cv2
10
+ import filetype
22
11
  import fitz
23
12
  import numpy as np
24
- import requests
13
+ from bisheng_langchain.document_loaders.parsers import ELLMClient
25
14
  from langchain.docstore.document import Document
26
15
  from langchain.document_loaders.base import BaseLoader
27
- from bisheng_langchain.document_loaders.parsers import ELLMClient, OCRClient
16
+ from PIL import Image
28
17
 
29
18
 
30
19
  def convert_base64(image):
@@ -45,11 +34,11 @@ def transpdf2png(pdf_file):
45
34
  pix = page.get_pixmap(dpi=dpi)
46
35
  if min(pix.width, pix.height) >= 1600: break
47
36
 
48
- mode = "RGBA" if pix.alpha else "RGB"
37
+ mode = 'RGBA' if pix.alpha else 'RGB'
49
38
  img = Image.frombytes(mode, [pix.width, pix.height], pix.samples)
50
39
  # RGB to BGR
51
40
  img = np.array(img)[:, :, ::-1]
52
- img_name = "page_{:03d}".format(page.number)
41
+ img_name = 'page_{:03d}'.format(page.number)
53
42
  pdf_images[img_name] = img
54
43
 
55
44
  return pdf_images
@@ -79,7 +68,7 @@ class UniversalKVLoader(BaseLoader):
79
68
  elif mime_type.startswith('image'):
80
69
  file_type = 'img'
81
70
  else:
82
- raise ValueError(f"file type {file_type} is not support.")
71
+ raise ValueError(f'file type {file_type} is not support.')
83
72
 
84
73
  if file_type == 'img':
85
74
  bytes_data = open(self.file_path, 'rb').read()
@@ -90,7 +79,7 @@ class UniversalKVLoader(BaseLoader):
90
79
  if 'code' in resp and resp['code'] == 200:
91
80
  key_values = resp['result']['ellm_result']
92
81
  else:
93
- raise ValueError(f"universal kv load failed: {resp}")
82
+ raise ValueError(f'universal kv load failed: {resp}')
94
83
 
95
84
  kv_results = defaultdict(list)
96
85
  for key, value in key_values.items():
@@ -118,7 +107,7 @@ class UniversalKVLoader(BaseLoader):
118
107
  if 'code' in resp and resp['code'] == 200:
119
108
  key_values = resp['result']['ellm_result']
120
109
  else:
121
- raise ValueError(f"universal kv load failed: {resp}")
110
+ raise ValueError(f'universal kv load failed: {resp}')
122
111
 
123
112
  for key, value in key_values.items():
124
113
  kv_results[key].extend(value['text'])
@@ -128,4 +117,3 @@ class UniversalKVLoader(BaseLoader):
128
117
  meta = {'source': file_name}
129
118
  doc = Document(page_content=content, metadata=meta)
130
119
  return [doc]
131
-
@@ -0,0 +1,4 @@
1
+ from .input import InputFileNode, InputNode, VariableNode
2
+ from .output import Report
3
+
4
+ __all__ = ['InputNode', 'InputFileNode', 'Report', 'VariableNode']
@@ -1,23 +1,48 @@
1
1
 
2
- from typing import List
2
+ from typing import List, Optional
3
3
 
4
4
  from pydantic import BaseModel, Extra
5
5
 
6
6
 
7
- class Variable(BaseModel):
7
+ class InputNode(BaseModel):
8
+ """Input组件,用来控制输入"""
9
+ input: Optional[List[str]]
10
+
11
+ def text(self):
12
+ return self.input
13
+
14
+
15
+ class VariableNode(BaseModel):
8
16
  """用来设置变量"""
9
- variable_name: str
10
- variable_key: str
17
+ # key
18
+ variables: Optional[List[str]]
19
+ # vaulues
20
+ variable_value: Optional[List[str]] = []
11
21
 
12
22
  class Config:
13
23
  """Configuration for this pydantic object."""
14
24
 
15
25
  extra = Extra.forbid
16
26
 
17
- @classmethod
18
- def initialize(cls, input: List[str] = None):
19
- return input if input else ''
27
+ def text(self):
28
+ if self.variable_value:
29
+ text = {}
30
+ for index, value in enumerate(self.variable_value):
31
+ text[self.variables[index]] = value
32
+
33
+ return text
34
+ else:
35
+ return {}
36
+
20
37
 
38
+ class InputFileNode(BaseModel):
39
+ file_path: Optional[str]
40
+ file_name: Optional[str]
41
+ file_type: Optional[str] # tips for file
42
+ """Output组件,用来控制输出"""
21
43
 
22
- class PresetQuestion(BaseModel):
23
- pass
44
+ def text(self):
45
+ # judge if file_path is oss address
46
+ if not self.file_path:
47
+ return ''
48
+ return [self.file_path, self.file_name]
@@ -0,0 +1,276 @@
1
+ import json
2
+ from typing import Any, Dict, List, Optional
3
+ from venv import logger
4
+
5
+ from bisheng_langchain.chains import LoaderOutputChain
6
+ from langchain.callbacks.manager import AsyncCallbackManagerForChainRun, CallbackManagerForChainRun
7
+ from langchain.chains.base import Chain
8
+ from pydantic import BaseModel, Extra
9
+
10
+ _TEXT_COLOR_MAPPING = {
11
+ 'blue': '36;1',
12
+ 'yellow': '33;1',
13
+ 'pink': '38;5;200',
14
+ 'green': '32;1',
15
+ 'red': '31;1',
16
+ }
17
+
18
+
19
+ def get_color_mapping(
20
+ items: List[str], excluded_colors: Optional[List] = None
21
+ ) -> Dict[str, str]:
22
+ """Get mapping for items to a support color."""
23
+ colors = list(_TEXT_COLOR_MAPPING.keys())
24
+ if excluded_colors is not None:
25
+ colors = [c for c in colors if c not in excluded_colors]
26
+ color_mapping = {item: colors[i % len(colors)] for i, item in enumerate(items)}
27
+ return color_mapping
28
+
29
+
30
+ class Output(BaseModel):
31
+ """Output组件,用来控制输出"""
32
+
33
+ @classmethod
34
+ def initialize(cls, file_path: str = None):
35
+ return file_path if file_path else ''
36
+
37
+
38
+ class Report(Chain):
39
+ # ```
40
+ # chain Dict:
41
+ # object: langchain_object
42
+ # node_id: object_key prefix
43
+ # input: triger query
44
+ # variables Dict:
45
+ # variable_name: name
46
+ # variable_value: value
47
+ # `
48
+ chains: Optional[List[Dict]]
49
+ variables: Optional[List[Dict]]
50
+ report_name: str
51
+ stop_flag: bool = False
52
+
53
+ input_key: str = 'report_name' #: :meta private:
54
+ output_key: str = 'text' #: :meta private:
55
+
56
+ class Config:
57
+ """Configuration for this pydantic object."""
58
+ extra = Extra.forbid
59
+ arbitrary_types_allowed = True
60
+
61
+ @property
62
+ def input_keys(self) -> List[str]:
63
+ """Expect input key.
64
+ :meta private:
65
+ """
66
+ return [self.input_key]
67
+
68
+ @property
69
+ def output_keys(self) -> List[str]:
70
+ """Return output key.
71
+ :meta private:
72
+ """
73
+ return [self.output_key]
74
+
75
+ def validate_chains(cls, values: Dict) -> Dict:
76
+ """Validate chains."""
77
+ if values.get('chains'):
78
+ for chain in values['chains']:
79
+ chain_output_keys = chain['object'].output_keys
80
+ if len(chain_output_keys) != 1:
81
+ raise ValueError(
82
+ 'Chain used in Report should all have one output, got '
83
+ f"{chain['object']} with {len(chain_output_keys)} outputs."
84
+ )
85
+ return values
86
+
87
+ def func_call(self,
88
+ inputs: Dict[str, Any],
89
+ outputs: Dict[str, Any],
90
+ intermedia_stop: list,
91
+ chain: Chain,
92
+ node_id: str,
93
+ run_manager: Optional[CallbackManagerForChainRun] = None,):
94
+ question = list(inputs.values())[0]
95
+ _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
96
+
97
+ if isinstance(chain, LoaderOutputChain):
98
+ question = 'Get' + ','.join(question)
99
+ _run_manager.on_text(text='', log='', type='start', category='question')
100
+ _run_manager.on_text(text='', log=question, type='end', category='question')
101
+ _run_manager.on_text(text='', log='', type='start', category='answer')
102
+ message_reply = {'log': question, 'category': 'question'}
103
+ intermedia_stop.append(message_reply)
104
+
105
+ chain_outputs = chain(inputs, callbacks=_run_manager.get_child())
106
+ result = (chain_outputs.get(chain.output_keys[0])
107
+ if isinstance(chain_outputs, dict) else chain_outputs)
108
+ if isinstance(chain, LoaderOutputChain):
109
+ for schema in inputs.values():
110
+ result = json.loads(result)
111
+ for key in schema:
112
+ if result.get(key):
113
+ result_str = (';'.join([str(x) for x in result.get(key)])
114
+ if isinstance(result.get(key), list)
115
+ else result.get(key))
116
+ outputs.update({node_id+'_'+key: result_str})
117
+ result = json.dumps(result, ensure_ascii=False)
118
+ else:
119
+ outputs.update({node_id: result})
120
+ message_reply = {'log': result, 'category': 'answer'}
121
+ intermedia_stop.append(message_reply)
122
+ _run_manager.on_text(text='', log=result, type='end', category='answer')
123
+
124
+ async def func_acall(self,
125
+ inputs: Dict[str, Any],
126
+ outputs: Dict[str, Any],
127
+ intermedia_stop: list,
128
+ chain: Chain,
129
+ node_id: str,
130
+ run_manager: Optional[AsyncCallbackManagerForChainRun] = None,):
131
+ question = list(inputs.values())[0]
132
+ _run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
133
+
134
+ if isinstance(chain, LoaderOutputChain):
135
+ question = 'Get' + ','.join(question)
136
+ await _run_manager.on_text(text='', log='', type='start', category='question')
137
+ await _run_manager.on_text(text='', log=question, type='end', category='question')
138
+ await _run_manager.on_text(text='', log='', type='start', category='answer')
139
+ message_reply = {'log': question, 'category': 'question'}
140
+ intermedia_stop.append(message_reply)
141
+
142
+ # process
143
+ try:
144
+ chain_outputs = await chain.arun(inputs, callbacks=_run_manager.get_child())
145
+ except Exception as e:
146
+ logger.exception(e)
147
+ await _run_manager.on_text(text='', log=str(e), type='stream', category='processing')
148
+ try:
149
+ chain_outputs = chain(inputs)
150
+ except Exception as e2:
151
+ logger.exception(e2)
152
+ await _run_manager.on_text(text='', log=str(e2), type='stream', category='processing')
153
+ chain_outputs = ''
154
+
155
+ result = (chain_outputs.get(chain.output_keys[0])
156
+ if isinstance(chain_outputs, dict) else chain_outputs)
157
+ if isinstance(chain, LoaderOutputChain):
158
+ for schema in inputs.values():
159
+ result = json.loads(result)
160
+ for key in schema:
161
+ if result.get(key):
162
+ result_str = (';'.join([str(x) for x in result.get(key)])
163
+ if isinstance(result.get(key), list)
164
+ else result.get(key))
165
+ outputs.update({node_id+'_'+key: result_str})
166
+ result = json.dumps(result, ensure_ascii=False)
167
+ else:
168
+ outputs.update({node_id: result})
169
+ message_reply = {'log': result, 'category': 'answer'}
170
+ intermedia_stop.append(message_reply)
171
+ await _run_manager.on_text(text='', log=result, type='end', category='answer')
172
+
173
+ def _call(
174
+ self,
175
+ inputs: Dict[str, Any],
176
+ run_manager: Optional[CallbackManagerForChainRun] = None,
177
+ verbose: Optional[bool] = None,
178
+ ) -> Dict[str, str]:
179
+ intermedia_steps = []
180
+ outputs = {}
181
+ self.stop_flag = False
182
+ # variables
183
+ if self.variables and self.variables[0]:
184
+ for variable in self.variables:
185
+ variable_kv = variable['input']
186
+ for k, v in variable_kv.items():
187
+ outputs.update({variable['node_id']+'_'+k: v})
188
+
189
+ if self.chains:
190
+ for i, chain in enumerate(self.chains):
191
+ if 'node_id' not in chain:
192
+ logger.info(f"report_skip_nonsence_chain chain={chain['object']}")
193
+ continue
194
+ if not isinstance(chain['object'], Chain):
195
+ raise TypeError(
196
+ f"{chain['object']} not be runnable Chain object"
197
+ )
198
+ if isinstance(chain['object'], LoaderOutputChain):
199
+ # loaderchain questions use new parse
200
+ self.func_call(chain['input'], outputs, intermedia_steps,
201
+ chain['object'], chain['node_id'], run_manager)
202
+ continue
203
+
204
+ preset_question = chain['input']
205
+ for k, v in preset_question.items():
206
+ # log print
207
+ if isinstance(v, str):
208
+ self.func_call(preset_question, outputs, intermedia_steps,
209
+ chain['object'], chain['node_id']+'_'+v, run_manager)
210
+ else:
211
+ for question in v:
212
+ question_dict = {k: question}
213
+ self.func_call(question_dict, outputs, intermedia_steps,
214
+ chain['object'], chain['node_id']+'_'+question,
215
+ run_manager)
216
+
217
+ return {self.output_key: outputs, self.input_key: self.report_name,
218
+ 'intermediate_steps': intermedia_steps}
219
+
220
+ async def _acall(
221
+ self,
222
+ inputs: Dict[str, Any],
223
+ run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
224
+ verbose: Optional[bool] = None,
225
+ ) -> Dict[str, Any]:
226
+ intermedia_steps = []
227
+ outputs = {}
228
+ await run_manager.on_text(text='', log='', type='end', category='processing') # end father start
229
+ self.stop_flag = False
230
+ # variables
231
+ if self.variables and self.variables[0]:
232
+ for variable in self.variables:
233
+ variable_kv = variable['input']
234
+ for k, v in variable_kv.items():
235
+ outputs.update({variable['node_id']+'_'+k: v})
236
+
237
+ # functions
238
+ if self.chains:
239
+ for i, chain in enumerate(self.chains):
240
+ if 'node_id' not in chain:
241
+ logger.info(f"report_skip_nonsence_chain chain={chain['object']}")
242
+ continue
243
+ if self.stop_flag:
244
+ break
245
+ if not isinstance(chain['object'], Chain):
246
+ raise TypeError(
247
+ f"{chain['object']} not be runnable Chain object"
248
+ )
249
+ if isinstance(chain['object'], LoaderOutputChain):
250
+ # loaderchain questions use new parse
251
+ await self.func_acall(chain['input'], outputs, intermedia_steps,
252
+ chain['object'], chain['node_id'], run_manager)
253
+ continue
254
+ # normal chain
255
+ preset_question = chain['input']
256
+ for k, v in preset_question.items():
257
+ if isinstance(v, str):
258
+ await self.func_acall(preset_question, outputs, intermedia_steps,
259
+ chain['object'], chain['node_id']+'_'+v, run_manager)
260
+ else:
261
+ for question in v:
262
+ question_dict = {k: question}
263
+ await self.func_acall(question_dict, outputs, intermedia_steps,
264
+ chain['object'], chain['node_id']+'_'+question,
265
+ run_manager)
266
+
267
+ # keep whole process paired
268
+ await run_manager.on_text(text='', log='', type='start', category='processing')
269
+ return {self.output_key: outputs, self.input_key: self.report_name,
270
+ 'intermediate_steps': intermedia_steps}
271
+
272
+ def stop(self):
273
+ self.stop_flag = True
274
+
275
+ def stop_status(self):
276
+ return self.stop_flag
@@ -33,28 +33,28 @@ class Requests(BaseModel):
33
33
  timeout=self.request_timeout,
34
34
  **kwargs)
35
35
 
36
- def post(self, url: str, data: Dict[str, Any], **kwargs: Any) -> requests.Response:
36
+ def post(self, url: str, json: Dict[str, Any], **kwargs: Any) -> requests.Response:
37
37
  """POST to the URL and return the text."""
38
38
  return requests.post(url,
39
- json=data,
39
+ json=json,
40
40
  headers=self.headers,
41
41
  auth=self.auth,
42
42
  timeout=self.request_timeout,
43
43
  **kwargs)
44
44
 
45
- def patch(self, url: str, data: Dict[str, Any], **kwargs: Any) -> requests.Response:
45
+ def patch(self, url: str, json: Dict[str, Any], **kwargs: Any) -> requests.Response:
46
46
  """PATCH the URL and return the text."""
47
47
  return requests.patch(url,
48
- json=data,
48
+ json=json,
49
49
  headers=self.headers,
50
50
  auth=self.auth,
51
51
  timeout=self.request_timeout,
52
52
  **kwargs)
53
53
 
54
- def put(self, url: str, data: Dict[str, Any], **kwargs: Any) -> requests.Response:
54
+ def put(self, url: str, json: Dict[str, Any], **kwargs: Any) -> requests.Response:
55
55
  """PUT the URL and return the text."""
56
56
  return requests.put(url,
57
- json=data,
57
+ json=json,
58
58
  headers=self.headers,
59
59
  auth=self.auth,
60
60
  timeout=self.request_timeout,
@@ -98,24 +98,24 @@ class Requests(BaseModel):
98
98
  yield response
99
99
 
100
100
  @asynccontextmanager
101
- async def apost(self, url: str, data: Dict[str, Any],
101
+ async def apost(self, url: str, json: Dict[str, Any],
102
102
  **kwargs: Any) -> AsyncGenerator[aiohttp.ClientResponse, None]:
103
103
  """POST to the URL and return the text asynchronously."""
104
- async with self._arequest('POST', url, json=data, auth=self.auth, **kwargs) as response:
104
+ async with self._arequest('POST', url, json=json, auth=self.auth, **kwargs) as response:
105
105
  yield response
106
106
 
107
107
  @asynccontextmanager
108
- async def apatch(self, url: str, data: Dict[str, Any],
108
+ async def apatch(self, url: str, json: Dict[str, Any],
109
109
  **kwargs: Any) -> AsyncGenerator[aiohttp.ClientResponse, None]:
110
110
  """PATCH the URL and return the text asynchronously."""
111
- async with self._arequest('PATCH', url, json=data, auth=self.auth, **kwargs) as response:
111
+ async with self._arequest('PATCH', url, json=json, auth=self.auth, **kwargs) as response:
112
112
  yield response
113
113
 
114
114
  @asynccontextmanager
115
- async def aput(self, url: str, data: Dict[str, Any],
115
+ async def aput(self, url: str, json: Dict[str, Any],
116
116
  **kwargs: Any) -> AsyncGenerator[aiohttp.ClientResponse, None]:
117
117
  """PUT the URL and return the text asynchronously."""
118
- async with self._arequest('PUT', url, json=data, auth=self.auth, **kwargs) as response:
118
+ async with self._arequest('PUT', url, json=json, auth=self.auth, **kwargs) as response:
119
119
  yield response
120
120
 
121
121
  @asynccontextmanager
@@ -150,17 +150,17 @@ class TextRequestsWrapper(BaseModel):
150
150
  """GET the URL and return the text."""
151
151
  return self.requests.get(url, **kwargs).text
152
152
 
153
- def post(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str:
153
+ def post(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
154
154
  """POST to the URL and return the text."""
155
- return self.requests.post(url, data, **kwargs).text
155
+ return self.requests.post(url, json, **kwargs).text
156
156
 
157
- def patch(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str:
157
+ def patch(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
158
158
  """PATCH the URL and return the text."""
159
- return self.requests.patch(url, data, **kwargs).text
159
+ return self.requests.patch(url, json, **kwargs).text
160
160
 
161
- def put(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str:
161
+ def put(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
162
162
  """PUT the URL and return the text."""
163
- return self.requests.put(url, data, **kwargs).text
163
+ return self.requests.put(url, json, **kwargs).text
164
164
 
165
165
  def delete(self, url: str, **kwargs: Any) -> str:
166
166
  """DELETE the URL and return the text."""
@@ -171,19 +171,19 @@ class TextRequestsWrapper(BaseModel):
171
171
  async with self.requests.aget(url, **kwargs) as response:
172
172
  return await response.text()
173
173
 
174
- async def apost(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str:
174
+ async def apost(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
175
175
  """POST to the URL and return the text asynchronously."""
176
- async with self.requests.apost(url, data, **kwargs) as response:
176
+ async with self.requests.apost(url, json, **kwargs) as response:
177
177
  return await response.text()
178
178
 
179
- async def apatch(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str:
179
+ async def apatch(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
180
180
  """PATCH the URL and return the text asynchronously."""
181
- async with self.requests.apatch(url, data, **kwargs) as response:
181
+ async with self.requests.apatch(url, json, **kwargs) as response:
182
182
  return await response.text()
183
183
 
184
- async def aput(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str:
184
+ async def aput(self, url: str, json: Dict[str, Any], **kwargs: Any) -> str:
185
185
  """PUT the URL and return the text asynchronously."""
186
- async with self.requests.aput(url, data, **kwargs) as response:
186
+ async with self.requests.aput(url, json, **kwargs) as response:
187
187
  return await response.text()
188
188
 
189
189
  async def adelete(self, url: str, **kwargs: Any) -> str:
@@ -1,3 +1,4 @@
1
1
  from .elastic_keywords_search import ElasticKeywordsSearch
2
+ from .retriever import VectorStoreFilterRetriever
2
3
 
3
- __all__ = ['ElasticKeywordsSearch']
4
+ __all__ = ['ElasticKeywordsSearch', 'VectorStoreFilterRetriever']
File without changes
@@ -0,0 +1,111 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, ClassVar, Collection, Dict, List
4
+ from venv import logger
5
+
6
+ import requests
7
+ from langchain.schema.document import Document
8
+ from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
9
+ from pydantic import Field, root_validator
10
+
11
+ if TYPE_CHECKING:
12
+ from langchain.callbacks.manager import (
13
+ AsyncCallbackManagerForRetrieverRun,
14
+ CallbackManagerForRetrieverRun,
15
+ )
16
+
17
+
18
+ class VectorStoreFilterRetriever(VectorStoreRetriever):
19
+ vectorstore: VectorStore
20
+ search_type: str = 'similarity'
21
+ search_kwargs: dict = Field(default_factory=dict)
22
+ allowed_search_types: ClassVar[Collection[str]] = (
23
+ 'similarity',
24
+ 'similarity_score_threshold',
25
+ 'mmr',
26
+ )
27
+ access_url: str = None
28
+
29
+ class Config:
30
+ """Configuration for this pydantic object."""
31
+
32
+ arbitrary_types_allowed = True
33
+
34
+ @root_validator()
35
+ def validate_search_type(cls, values: Dict) -> Dict:
36
+ """Validate search type."""
37
+ search_type = values['search_type']
38
+ if search_type not in cls.allowed_search_types:
39
+ raise ValueError(
40
+ f'search_type of {search_type} not allowed. Valid values are: '
41
+ f'{cls.allowed_search_types}'
42
+ )
43
+ if search_type == 'similarity_score_threshold':
44
+ score_threshold = values['search_kwargs'].get('score_threshold')
45
+ if (score_threshold is None) or (not isinstance(score_threshold, float)):
46
+ raise ValueError(
47
+ '`score_threshold` is not specified with a float value(0~1) '
48
+ 'in `search_kwargs`.'
49
+ )
50
+ return values
51
+
52
+ def _get_relevant_documents(
53
+ self, query: str, *, run_manager: CallbackManagerForRetrieverRun
54
+ ) -> List[Document]:
55
+ if self.search_type == 'similarity':
56
+ docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
57
+ elif self.search_type == 'similarity_score_threshold':
58
+ docs_and_similarities = (
59
+ self.vectorstore.similarity_search_with_relevance_scores(
60
+ query, **self.search_kwargs
61
+ )
62
+ )
63
+ docs = [doc for doc, _ in docs_and_similarities]
64
+ elif self.search_type == 'mmr':
65
+ docs = self.vectorstore.max_marginal_relevance_search(
66
+ query, **self.search_kwargs
67
+ )
68
+ else:
69
+ raise ValueError(f'search_type of {self.search_type} not allowed.')
70
+
71
+ return self.get_file_access(docs)
72
+
73
+ async def _aget_relevant_documents(
74
+ self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
75
+ ) -> List[Document]:
76
+ if self.search_type == 'similarity':
77
+ docs = await self.vectorstore.asimilarity_search(
78
+ query, **self.search_kwargs
79
+ )
80
+ elif self.search_type == 'similarity_score_threshold':
81
+ docs_and_similarities = (
82
+ await self.vectorstore.asimilarity_search_with_relevance_scores(
83
+ query, **self.search_kwargs
84
+ )
85
+ )
86
+ docs = [doc for doc, _ in docs_and_similarities]
87
+ elif self.search_type == 'mmr':
88
+ docs = await self.vectorstore.amax_marginal_relevance_search(
89
+ query, **self.search_kwargs
90
+ )
91
+ else:
92
+ raise ValueError(f'search_type of {self.search_type} not allowed.')
93
+ return self.get_file_access(docs)
94
+
95
+ def get_file_access(self, docs: List[Document]):
96
+ file_ids = [doc.metadata.get('file_id') for doc in docs]
97
+ if file_ids:
98
+ res = requests.get(self.access_url, json=file_ids)
99
+ if res.status_code == 200:
100
+ doc_res = res.json().get('data')
101
+ doc_right = {doc.get('docid') for doc in doc_res if doc.get('result') == 1}
102
+ for doc in docs:
103
+ if doc.metadata.get('file_id') not in doc_right:
104
+ doc.page_content = ''
105
+ doc.metadata['right'] = False
106
+ return docs
107
+ else:
108
+ logger.error(f'query_file_access_fail url={self.access_url} res={res.text}')
109
+ return [Document(page_content='', metadata={})]
110
+ else:
111
+ return docs
@@ -1,12 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bisheng-langchain
3
- Version: 0.1.10.1
3
+ Version: 0.2.0rc0
4
4
  Summary: bisheng langchain modules
5
5
  Home-page: https://github.com/dataelement/bisheng
6
6
  Author: DataElem
7
7
  Author-email: contact@dataelem.com
8
8
  License: Apache 2.0
9
- Platform: UNKNOWN
10
9
  Classifier: Programming Language :: Python :: 3
11
10
  Classifier: Programming Language :: Python :: 3.6
12
11
  Classifier: Programming Language :: Python :: 3.7
@@ -20,8 +19,8 @@ Requires-Dist: langchain
20
19
  Requires-Dist: zhipuai
21
20
  Requires-Dist: websocket-client
22
21
  Requires-Dist: elasticsearch
23
- Requires-Dist: opencv-python ==4.5.5.64
24
- Requires-Dist: Pillow ==9.5.0
22
+ Requires-Dist: opencv-python (==4.5.5.64)
23
+ Requires-Dist: Pillow (==9.5.0)
25
24
  Requires-Dist: bisheng-pyautogen
26
25
 
27
26
  ## What is bisheng-langchain?
@@ -65,5 +64,3 @@ check out [bisheng-langchain Dev Docs](https://dataelem.feishu.cn/wiki/Xaq8wKQjk
65
64
  bisheng-langchain adopts dependencies from the following:
66
65
 
67
66
  - Thanks to [langchain](https://github.com/langchain-ai/langchain) for the main framework.
68
-
69
-
@@ -5,17 +5,20 @@ bisheng_langchain/autogen_role/assistant.py,sha256=6gA36zaolvV0MA35kPbdkbdsmmRiH
5
5
  bisheng_langchain/autogen_role/custom.py,sha256=WiHK0vTIP0vMDhtKPNR4-A950i0qwLlowJO0v-g0d40,2423
6
6
  bisheng_langchain/autogen_role/groupchat_manager.py,sha256=L53RmXyv4QMVuw1zGoXRe8hB2s39DVUEb_zASWoO_yI,1746
7
7
  bisheng_langchain/autogen_role/user.py,sha256=Hwp2bu5_GJPMjZJnMLePxBtl7nizOCW0KtKPyu49DQ0,4953
8
- bisheng_langchain/chains/__init__.py,sha256=uba9Igpyk83fEp4bWYSWOAlwS-U5RABNLQv8H8ZugWU,266
8
+ bisheng_langchain/chains/__init__.py,sha256=zd1FQGN0G5b_52oibBbdHsny0Y4T36YQnj6j1tRd4zQ,456
9
9
  bisheng_langchain/chains/loader_output.py,sha256=02ZercAFaudStTZ4t7mcVkGRj5pD78HZ6NO8HbmbDH8,1903
10
10
  bisheng_langchain/chains/autogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  bisheng_langchain/chains/autogen/auto_gen.py,sha256=QIkfCO9-VN2wRkl3_TWVj-JkdL2dqMQNy93j3uB401s,3270
12
12
  bisheng_langchain/chains/combine_documents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  bisheng_langchain/chains/combine_documents/stuff.py,sha256=z_E_wfhJrAYWcNVRPomPm5fGRDI3hqoC52wcMzgzxVA,2369
14
14
  bisheng_langchain/chains/question_answering/__init__.py,sha256=RWbSgTQ0IqZhrXkhaJUKzEXurA9NJE7_6P0zLy0IBjs,8636
15
+ bisheng_langchain/chains/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ bisheng_langchain/chains/router/multi_rule.py,sha256=BiFryj3-7rOxfttD-MyOkKWLCSGB9LVYd2rjOsIfQC8,375
17
+ bisheng_langchain/chains/router/rule_router.py,sha256=QWLKqJ9ZCQb9E3oh6pd0C6YZRhHyoJSmDFkx31LNpME,1563
15
18
  bisheng_langchain/chat_models/__init__.py,sha256=FBNhc-zwFtPmWdNlt0QgzgOx2hVT-He1yOzgKZEBYQE,463
16
- bisheng_langchain/chat_models/host_llm.py,sha256=HgxWx3AmwwRlvXLimh4ZT1C8C_xAnOMadywqqUJ_gHU,16995
19
+ bisheng_langchain/chat_models/host_llm.py,sha256=UV-u3XaIQsw1Qz8XCJuADxWd7m8Xpey5fQdnOtEpIgo,16993
17
20
  bisheng_langchain/chat_models/minimax.py,sha256=VFq1U4Ax1ZPFrsxzxilRE6NXvUH0HEoK1vDq5kXADTU,13963
18
- bisheng_langchain/chat_models/proxy_llm.py,sha256=oIH8sGZTK4bklRaSwvIcPCPtAlPdhxSBZqfp7cNmYWo,16463
21
+ bisheng_langchain/chat_models/proxy_llm.py,sha256=JdxQq7QJjHwmuZ7QsTaZawkWzDESZohKstiwMoqrPDY,16637
19
22
  bisheng_langchain/chat_models/wenxin.py,sha256=GRhFFqToVpYwbHz9NQrH4lvVlOQ_9rtQEl6uikgNH_w,13793
20
23
  bisheng_langchain/chat_models/xunfeiai.py,sha256=GP30oqpbb8UAxrkJ2l0anDq67Q40CUNulg9nu7PtQZc,14019
21
24
  bisheng_langchain/chat_models/zhipuai.py,sha256=v29bsAZoe_5KR2LQVoKE7429U4Qc3FqcZIgAm8p2aX4,14941
@@ -27,12 +30,13 @@ bisheng_langchain/chat_models/interface/utils.py,sha256=qww_uYsWDqK7cLuv-KzZmmlg
27
30
  bisheng_langchain/chat_models/interface/wenxin.py,sha256=z_K1Nj78dDYYgiVIzc5sGkOiGr8OAoRwaKwmpWXssH0,4246
28
31
  bisheng_langchain/chat_models/interface/xunfei.py,sha256=DPHAZM_uHg0A8GnebgkRbLENhBW7bBtRHzKC0gFKZgc,7514
29
32
  bisheng_langchain/chat_models/interface/zhipuai.py,sha256=67Ej6DRk-IlXUfEZPg-pjcYPyqZb32ClrBP-9k-3EEs,2636
30
- bisheng_langchain/document_loaders/__init__.py,sha256=lkLjlw6zyOu_ekObVYw8O95Z27ZwXu49PvowYWgK4mQ,306
33
+ bisheng_langchain/document_loaders/__init__.py,sha256=LuQ-zMYxde2FeiEcvVtjQqnHozki5pF_pDDa88_fBdg,366
34
+ bisheng_langchain/document_loaders/custom_kv.py,sha256=1xGir73LcH6lfIyzy4HX-Rg5bjbd_MWJPpqKDkf29CI,6623
31
35
  bisheng_langchain/document_loaders/elem_html.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
36
  bisheng_langchain/document_loaders/elem_image.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
37
  bisheng_langchain/document_loaders/elem_pdf.py,sha256=64kUITkrTVJe9CH6IAVSdDVcn2Ekx2PM-jT0cdClXlo,22716
34
- bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=5r1O4h3uM4EV7OQ8E8-H3wNUoYIVtFNIT5HGT1tz-z8,4980
35
- bisheng_langchain/document_loaders/universal_kv.py,sha256=RA5OMGZcBCfCrArE8fNuAh6wa4xGwnaXgRj4ebHM9HU,4332
38
+ bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=qLqDTYA7IRguI0OiQ6PY3HzQCiQnc76Qy3_QHf90BMs,4616
39
+ bisheng_langchain/document_loaders/universal_kv.py,sha256=dJF_GQGKBMUjB_kX9CSp7xZRhXgwVuGPbMIzJwPh-C0,4063
36
40
  bisheng_langchain/document_loaders/parsers/__init__.py,sha256=OOM_FJkwaU-zNS58fASw0TH8FNT6VXKb0VrvisgdrII,171
37
41
  bisheng_langchain/document_loaders/parsers/ellm_client.py,sha256=B4Dea8xXXnGvB9j2OXv53HILNUmnWeNJz9ssNM-2fLM,1760
38
42
  bisheng_langchain/document_loaders/parsers/image.py,sha256=7Vx4dD_WiSTojS4TMIJFxfE8nvze0kwNnwTd6f1cLds,938
@@ -44,16 +48,18 @@ bisheng_langchain/embeddings/wenxin.py,sha256=8kYqWuHydx5Cylb_Lmdti0YLHrOM1Qha3e
44
48
  bisheng_langchain/embeddings/interface/__init__.py,sha256=GNY3tibpRxpAdAfSvQmXBKo0xKSLke_9y4clofi_WOE,98
45
49
  bisheng_langchain/embeddings/interface/types.py,sha256=VdurbtsnjCPdlOjPFcK2Mg6r9bJYYHb3tepvkk-y3nM,461
46
50
  bisheng_langchain/embeddings/interface/wenxin.py,sha256=5d9gI4enmfkD80s0FHKiDt33O0mwM8Xc5WTubnMUy8c,3104
47
- bisheng_langchain/input_output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- bisheng_langchain/input_output/input.py,sha256=Na2-DLeLym6Phuh8RIQk8N-xWcFEiRzHkhyeKD429cg,420
49
- bisheng_langchain/input_output/output.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
+ bisheng_langchain/input_output/__init__.py,sha256=sW_GB7MlrHYsqY1Meb_LeimQqNsMz1gH-00Tqb2BUyM,153
52
+ bisheng_langchain/input_output/input.py,sha256=I5YDmgbvvj1o2lO9wi8LE37wM0wP5jkhUREU32YrZMQ,1094
53
+ bisheng_langchain/input_output/output.py,sha256=6U-az6-Cwz665C2YmcH3SYctWVjPFjmW8s70CA_qphk,11585
50
54
  bisheng_langchain/retrievers/__init__.py,sha256=TcyK31IMgFJcYaOCLd9O6qFzXt1VMbtLs-g4C6ml_3w,117
51
55
  bisheng_langchain/retrievers/mix_es_vector.py,sha256=-V1IGQJZMar2a35FrXf5MaHV_R8TpYdF8r1jIR-JDjA,3973
52
56
  bisheng_langchain/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- bisheng_langchain/utils/requests.py,sha256=WmlHmV6pw5InxXSKNiIVc1kvcwt28CJadA6EY6XgrqA,8397
54
- bisheng_langchain/vectorstores/__init__.py,sha256=K3xQouSGl05Q0ehFCKZafip-35NzCrv8SCANvfxDpKE,96
57
+ bisheng_langchain/utils/requests.py,sha256=abZvV6-p3cyJpFtFzcvzCUGRYh17Bjdy2BRElG7zPL4,8397
58
+ bisheng_langchain/vectorstores/__init__.py,sha256=3xXjqJj_DwgQcQUC1MpPrYYhSKeg4yk67OtzmVAjco0,176
55
59
  bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=UU3yJoSSH-prBJtoWQR8dYQzrSOLSvVBRdU45QtE7KA,11648
56
- bisheng_langchain-0.1.10.1.dist-info/METADATA,sha256=1PnMWVLeB95JJ7x20O4FyWPlNiytyisujisMIIEqH_E,2141
57
- bisheng_langchain-0.1.10.1.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
58
- bisheng_langchain-0.1.10.1.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
59
- bisheng_langchain-0.1.10.1.dist-info/RECORD,,
60
+ bisheng_langchain/vectorstores/milvus.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
+ bisheng_langchain/vectorstores/retriever.py,sha256=wq__1xRN3PfAgU8Kh8Am8iEsUkkVuJnhWTY6GFDAADg,4365
62
+ bisheng_langchain-0.2.0rc0.dist-info/METADATA,sha256=6KiZWWBaZ9hv4pkDlbXM-62q5gA65O3onYng4EHl4Ek,2125
63
+ bisheng_langchain-0.2.0rc0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
64
+ bisheng_langchain-0.2.0rc0.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
65
+ bisheng_langchain-0.2.0rc0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.3)
2
+ Generator: bdist_wheel (0.38.4)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5