bisheng-langchain 0.3.3b0__py3-none-any.whl → 0.3.3.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/chains/qa_generation/base_v2.py +45 -15
- bisheng_langchain/chat_models/proxy_llm.py +1 -1
- bisheng_langchain/document_loaders/elem_unstrcutured_loader.py +21 -10
- bisheng_langchain/gpts/assistant.py +1 -1
- bisheng_langchain/gpts/prompts/assistant_prompt_base.py +19 -1
- bisheng_langchain/gpts/prompts/react_agent_prompt.py +4 -4
- bisheng_langchain/rag/init_retrievers/keyword_retriever.py +1 -1
- bisheng_langchain/utils/azure_dalle_image_generator.py +140 -0
- bisheng_langchain/vectorstores/milvus.py +70 -68
- {bisheng_langchain-0.3.3b0.dist-info → bisheng_langchain-0.3.3.dev2.dist-info}/METADATA +1 -1
- {bisheng_langchain-0.3.3b0.dist-info → bisheng_langchain-0.3.3.dev2.dist-info}/RECORD +13 -12
- {bisheng_langchain-0.3.3b0.dist-info → bisheng_langchain-0.3.3.dev2.dist-info}/WHEEL +0 -0
- {bisheng_langchain-0.3.3b0.dist-info → bisheng_langchain-0.3.3.dev2.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ from collections import defaultdict, namedtuple
|
|
10
10
|
from dataclasses import dataclass
|
11
11
|
from langchain_core.callbacks import CallbackManagerForChainRun
|
12
12
|
from langchain_core.language_models import BaseLanguageModel
|
13
|
+
from langchain_core.prompts import HumanMessagePromptTemplate, PromptTemplate
|
13
14
|
|
14
15
|
try:
|
15
16
|
from llama_index.node_parser import SimpleNodeParser
|
@@ -133,6 +134,9 @@ class TrainsetGenerator:
|
|
133
134
|
chunk_size: int = 1024,
|
134
135
|
seed: int = 42,
|
135
136
|
prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
|
137
|
+
filter_lowquality_context: bool = False,
|
138
|
+
filter_lowquality_question: bool = False,
|
139
|
+
answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
|
136
140
|
) -> None:
|
137
141
|
self.generator_llm = generator_llm
|
138
142
|
self.critic_llm = critic_llm
|
@@ -150,6 +154,11 @@ class TrainsetGenerator:
|
|
150
154
|
self.threshold = 5.0
|
151
155
|
self.rng = default_rng(seed)
|
152
156
|
self.prompt = prompt
|
157
|
+
self.filter_lowquality_context = filter_lowquality_context
|
158
|
+
self.filter_lowquality_question = filter_lowquality_question
|
159
|
+
if answer_prompt is None:
|
160
|
+
answer_prompt = ANSWER_FORMULATE
|
161
|
+
self.answer_prompt = answer_prompt
|
153
162
|
|
154
163
|
@classmethod
|
155
164
|
def from_default(
|
@@ -158,6 +167,9 @@ class TrainsetGenerator:
|
|
158
167
|
chunk_size: int = 512,
|
159
168
|
trainset_distribution: dict = DEFAULT_TRAIN_DISTRIBUTION,
|
160
169
|
prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
|
170
|
+
filter_lowquality_context: bool = False,
|
171
|
+
filter_lowquality_question: bool = False,
|
172
|
+
answer_prompt: Optional[PromptTemplate] = ANSWER_FORMULATE,
|
161
173
|
):
|
162
174
|
generator_llm = llm
|
163
175
|
critic_llm = llm
|
@@ -167,6 +179,9 @@ class TrainsetGenerator:
|
|
167
179
|
chunk_size=chunk_size,
|
168
180
|
trainset_distribution=trainset_distribution,
|
169
181
|
prompt=prompt,
|
182
|
+
filter_lowquality_context=filter_lowquality_context,
|
183
|
+
filter_lowquality_question=filter_lowquality_question,
|
184
|
+
answer_prompt=answer_prompt,
|
170
185
|
)
|
171
186
|
|
172
187
|
def _get_evolve_type(self) -> str:
|
@@ -221,7 +236,7 @@ class TrainsetGenerator:
|
|
221
236
|
|
222
237
|
def _generate_answer(self, question: str, context: t.List[str]) -> t.List[str]:
|
223
238
|
return [
|
224
|
-
self._qc_template(
|
239
|
+
self._qc_template(self.answer_prompt, qstn, context[i])
|
225
240
|
for i, qstn in enumerate(question.split("\n"))
|
226
241
|
]
|
227
242
|
|
@@ -309,14 +324,17 @@ class TrainsetGenerator:
|
|
309
324
|
)
|
310
325
|
|
311
326
|
text_chunk = " ".join([node.get_content() for node in nodes])
|
312
|
-
|
313
|
-
|
314
|
-
|
327
|
+
if self.filter_lowquality_context:
|
328
|
+
score = self._filter_context(text_chunk)
|
329
|
+
if not score:
|
330
|
+
continue
|
315
331
|
seed_question = self._seed_question(text_chunk)
|
316
332
|
|
317
333
|
question = seed_question
|
318
|
-
|
319
|
-
|
334
|
+
if self.filter_lowquality_question:
|
335
|
+
is_valid_question = self._filter_question(question)
|
336
|
+
else:
|
337
|
+
is_valid_question = True
|
320
338
|
if is_valid_question:
|
321
339
|
context = [text_chunk] * len(question.split("\n"))
|
322
340
|
is_conv = len(context) > 1
|
@@ -354,7 +372,10 @@ class QAGenerationChainV2(Chain):
|
|
354
372
|
llm: BaseLanguageModel,
|
355
373
|
k: Optional[int] = None,
|
356
374
|
chunk_size: int = 512,
|
357
|
-
|
375
|
+
filter_lowquality_context: bool = False,
|
376
|
+
filter_lowquality_question: bool = False,
|
377
|
+
question_prompt: Optional[ChatPromptTemplate] = SEED_QUESTION_CHAT_PROMPT,
|
378
|
+
answer_prompt: Optional[HumanMessagePromptTemplate] = ANSWER_FORMULATE,
|
358
379
|
**kwargs: Any,
|
359
380
|
) -> QAGenerationChainV2:
|
360
381
|
"""
|
@@ -362,13 +383,21 @@ class QAGenerationChainV2(Chain):
|
|
362
383
|
|
363
384
|
Args:
|
364
385
|
llm: a language model
|
365
|
-
|
386
|
+
question_prompt: a prompt template for generate question
|
387
|
+
answer_prompt: a prompt template for generate answer
|
366
388
|
**kwargs: additional arguments
|
367
389
|
|
368
390
|
Returns:
|
369
391
|
a QAGenerationChain class
|
370
392
|
"""
|
371
|
-
generator = TrainsetGenerator.from_default(
|
393
|
+
generator = TrainsetGenerator.from_default(
|
394
|
+
llm,
|
395
|
+
chunk_size=chunk_size,
|
396
|
+
prompt=question_prompt,
|
397
|
+
answer_prompt=answer_prompt,
|
398
|
+
filter_lowquality_context=filter_lowquality_context,
|
399
|
+
filter_lowquality_question=filter_lowquality_question
|
400
|
+
)
|
372
401
|
return cls(documents=documents, generator=generator, k=k, **kwargs)
|
373
402
|
|
374
403
|
@property
|
@@ -395,13 +424,14 @@ class QAGenerationChainV2(Chain):
|
|
395
424
|
dataset = self.generator.generate(documents=self.documents, train_size=self.k)
|
396
425
|
df = dataset.to_pandas()
|
397
426
|
qa_pairs = df.to_dict("records")
|
398
|
-
qa =
|
427
|
+
qa = []
|
399
428
|
for pair in qa_pairs:
|
400
|
-
qa
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
429
|
+
qa.append({
|
430
|
+
"question": pair["question"],
|
431
|
+
"answer": pair["ground_truth"][0],
|
432
|
+
"context": pair["ground_truth_context"][0],
|
433
|
+
})
|
434
|
+
qa = f'```json\n{json.dumps(qa, ensure_ascii=False, indent=4)}\n```'
|
405
435
|
return {self.output_key: qa}
|
406
436
|
|
407
437
|
async def _acall(
|
@@ -276,7 +276,7 @@ class ProxyChatLLM(BaseChatModel):
|
|
276
276
|
function_call: Optional[dict] = None
|
277
277
|
async for stream_resp in self.acompletion_with_retry(messages=message_dicts, **params):
|
278
278
|
|
279
|
-
role = stream_resp['choices'][0]['delta'].get('role'
|
279
|
+
role = stream_resp['choices'][0]['delta'].get('role') or role
|
280
280
|
token = stream_resp['choices'][0]['delta'].get('content', '')
|
281
281
|
inner_completion += token or ''
|
282
282
|
_function_call = stream_resp['choices'][0]['delta'].get('function_call')
|
@@ -87,7 +87,8 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
87
87
|
|
88
88
|
resp = requests.post(self.unstructured_api_url, headers=self.headers, json=payload)
|
89
89
|
if resp.status_code != 200:
|
90
|
-
raise Exception(
|
90
|
+
raise Exception(
|
91
|
+
f'file partition {os.path.basename(self.file_name)} failed resp={resp.text}')
|
91
92
|
|
92
93
|
resp = resp.json()
|
93
94
|
if 200 != resp.get('status_code'):
|
@@ -97,6 +98,10 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
97
98
|
logger.info(f'partition_error resp={resp}')
|
98
99
|
logger.info(f'unstruct_return code={resp.get("status_code")}')
|
99
100
|
|
101
|
+
if resp.get('b64_pdf'):
|
102
|
+
with open(self.file_path, 'wb') as f:
|
103
|
+
f.write(base64.b64decode(resp['b64_pdf']))
|
104
|
+
|
100
105
|
content, metadata = merge_partitions(partitions)
|
101
106
|
metadata['source'] = self.file_name
|
102
107
|
|
@@ -144,27 +149,33 @@ class ElemUnstructuredLoaderV0(BasePDFLoader):
|
|
144
149
|
return res['text'], {'source': self.file_name}
|
145
150
|
# 说明文件解析失败,pdf文件直接返回报错
|
146
151
|
if self.file_name.endswith('.pdf'):
|
147
|
-
raise Exception(
|
152
|
+
raise Exception(
|
153
|
+
f'file text {os.path.basename(self.file_name)} failed resp={resp.text}')
|
148
154
|
# 非pdf文件,先将文件转为pdf格式,让后再执行partition模式解析文档
|
149
155
|
# 把文件转为pdf
|
150
|
-
resp = requests.post(self.unstructured_api_url,
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
156
|
+
resp = requests.post(self.unstructured_api_url,
|
157
|
+
headers=self.headers,
|
158
|
+
json={
|
159
|
+
'filename': os.path.basename(self.file_name),
|
160
|
+
'b64_data': [b64_data],
|
161
|
+
'mode': 'topdf',
|
162
|
+
})
|
155
163
|
if resp.status_code != 200 or resp.json().get('status_code') != 200:
|
156
|
-
raise Exception(
|
164
|
+
raise Exception(
|
165
|
+
f'file topdf {os.path.basename(self.file_name)} failed resp={resp.text}')
|
157
166
|
# 解析pdf文件
|
158
167
|
payload['mode'] = 'partition'
|
159
168
|
payload['b64_data'] = [resp.json()['b64_pdf']]
|
160
169
|
payload['filename'] = os.path.basename(self.file_name) + '.pdf'
|
161
170
|
resp = requests.post(self.unstructured_api_url, headers=self.headers, json=payload)
|
162
171
|
if resp.status_code != 200 or resp.json().get('status_code') != 200:
|
163
|
-
raise Exception(
|
172
|
+
raise Exception(
|
173
|
+
f'file partition {os.path.basename(self.file_name)} failed resp={resp.text}')
|
164
174
|
res = resp.json()
|
165
175
|
partitions = res['partitions']
|
166
176
|
if not partitions:
|
167
|
-
raise Exception(
|
177
|
+
raise Exception(
|
178
|
+
f'file partition empty {os.path.basename(self.file_name)} resp={resp.text}')
|
168
179
|
# 拼接结果为文本
|
169
180
|
content, _ = merge_partitions(partitions)
|
170
181
|
return content, {'source': self.file_name}
|
@@ -131,7 +131,7 @@ if __name__ == "__main__":
|
|
131
131
|
|
132
132
|
# set_debug(True)
|
133
133
|
# chat_history = []
|
134
|
-
# query = "
|
134
|
+
# query = "分析当日市场行情"
|
135
135
|
chat_history = ['你好', '你好,有什么可以帮助你吗?', '福蓉科技股价多少?', '福蓉科技(股票代码:300049)的当前股价为48.67元。']
|
136
136
|
query = '今天是什么时候?去年这个时候的股价是多少?'
|
137
137
|
bisheng_assistant = BishengAssistant("config/base_scene.yaml")
|
@@ -1 +1,19 @@
|
|
1
|
-
ASSISTANT_PROMPT_DEFAULT = "You are a helpful assistant."
|
1
|
+
ASSISTANT_PROMPT_DEFAULT = "You are a helpful assistant."
|
2
|
+
|
3
|
+
# ASSISTANT_PROMPT_DEFAULT = """
|
4
|
+
# # role
|
5
|
+
# 你是一名股票市场研究员。
|
6
|
+
|
7
|
+
# # skill 1:股票市场行情分析
|
8
|
+
# 当用户要求分析当日市场行情时,以表格形式展示当日股市情况:
|
9
|
+
# - 行:沪深300、中证500、中证1000 以及 小计
|
10
|
+
# - 列指标:当日涨跌幅、今年累计涨跌幅、成交额(单位:亿元)、成交额占比
|
11
|
+
# - 小计 说明:对"成交额"列进行求和,其他列为空即可
|
12
|
+
# - 指标计算说明:当日涨跌幅=实时行情数据涨跌幅; 今年累计涨跌幅=(实时行情数据现价-今年第一个交易日历史行情数据开盘价)/今年第一个交易日历史行情数据开盘价*100%; 成交额 = 实时行情数据成交额; 成交额占比=该指数占所有所有指数成交额的百分比。
|
13
|
+
|
14
|
+
# 为了完成上述任务,你可以调用以下工具:
|
15
|
+
# 1. 股票实时行情工具:获取当日现价、涨跌幅、成交额
|
16
|
+
# 2. 股票历史行情工具:获取历史某日开盘价
|
17
|
+
|
18
|
+
# 附:指数代码:沪深300(000300)、中证500(000905)、中证1000(000852)
|
19
|
+
# """
|
@@ -23,10 +23,10 @@ Valid "action" values: "Final Answer" or {tool_names}
|
|
23
23
|
Provide only ONE action per $JSON_BLOB, as shown:
|
24
24
|
|
25
25
|
```
|
26
|
-
{{
|
26
|
+
{{
|
27
27
|
"action": $TOOL_NAME,
|
28
28
|
"action_input": $INPUT
|
29
|
-
}}
|
29
|
+
}}
|
30
30
|
```
|
31
31
|
|
32
32
|
Follow this format:
|
@@ -42,10 +42,10 @@ Observation: action result
|
|
42
42
|
Thought: I know what to respond
|
43
43
|
Action:
|
44
44
|
```
|
45
|
-
{{
|
45
|
+
{{
|
46
46
|
"action": "Final Answer",
|
47
47
|
"action_input": "Final response to human"
|
48
|
-
}}
|
48
|
+
}}
|
49
49
|
|
50
50
|
Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation
|
51
51
|
"""
|
@@ -16,7 +16,7 @@ from langchain.text_splitter import TextSplitter
|
|
16
16
|
|
17
17
|
|
18
18
|
class KeywordRetriever(BaseRetriever):
|
19
|
-
keyword_store:
|
19
|
+
keyword_store: VectorStore
|
20
20
|
text_splitter: TextSplitter
|
21
21
|
search_type: str = 'similarity'
|
22
22
|
search_kwargs: dict = Field(default_factory=dict)
|
@@ -0,0 +1,140 @@
|
|
1
|
+
import os
|
2
|
+
from typing import Callable, Dict, Optional, Union
|
3
|
+
|
4
|
+
import openai
|
5
|
+
from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper
|
6
|
+
from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
|
7
|
+
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
|
8
|
+
|
9
|
+
|
10
|
+
class AzureDallEWrapper(DallEAPIWrapper):
|
11
|
+
"""`Azure OpenAI` Embeddings API.
|
12
|
+
|
13
|
+
To use, you should have the
|
14
|
+
environment variable ``AZURE_OPENAI_API_KEY`` set with your API key or pass it
|
15
|
+
as a named parameter to the constructor.
|
16
|
+
|
17
|
+
Example:
|
18
|
+
.. code-block:: python
|
19
|
+
|
20
|
+
from langchain_openai import AzureOpenAIEmbeddings
|
21
|
+
|
22
|
+
openai = AzureOpenAIEmbeddings(model="text-embedding-3-large")
|
23
|
+
"""
|
24
|
+
|
25
|
+
azure_endpoint: Union[str, None] = None
|
26
|
+
"""Your Azure endpoint, including the resource.
|
27
|
+
|
28
|
+
Automatically inferred from env var `AZURE_OPENAI_ENDPOINT` if not provided.
|
29
|
+
|
30
|
+
Example: `https://example-resource.azure.openai.com/`
|
31
|
+
"""
|
32
|
+
deployment: Optional[str] = Field(default=None, alias='azure_deployment')
|
33
|
+
"""A model deployment.
|
34
|
+
|
35
|
+
If given sets the base client URL to include `/deployments/{azure_deployment}`.
|
36
|
+
Note: this means you won't be able to use non-deployment endpoints.
|
37
|
+
"""
|
38
|
+
openai_api_key: Optional[SecretStr] = Field(default=None, alias='api_key')
|
39
|
+
"""Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided."""
|
40
|
+
azure_ad_token: Optional[SecretStr] = None
|
41
|
+
"""Your Azure Active Directory token.
|
42
|
+
|
43
|
+
Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided.
|
44
|
+
|
45
|
+
For more:
|
46
|
+
https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id.
|
47
|
+
"""
|
48
|
+
azure_ad_token_provider: Union[Callable[[], str], None] = None
|
49
|
+
"""A function that returns an Azure Active Directory token.
|
50
|
+
|
51
|
+
Will be invoked on every request.
|
52
|
+
"""
|
53
|
+
openai_api_version: Optional[str] = Field(default=None, alias='api_version')
|
54
|
+
"""Automatically inferred from env var `OPENAI_API_VERSION` if not provided."""
|
55
|
+
validate_base_url: bool = True
|
56
|
+
chunk_size: int = 2048
|
57
|
+
"""Maximum number of texts to embed in each batch"""
|
58
|
+
|
59
|
+
@root_validator()
|
60
|
+
def validate_environment(cls, values: Dict) -> Dict:
|
61
|
+
"""Validate that api key and python package exists in environment."""
|
62
|
+
# Check OPENAI_KEY for backwards compatibility.
|
63
|
+
# TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using
|
64
|
+
# other forms of azure credentials.
|
65
|
+
openai_api_key = (values['openai_api_key'] or os.getenv('AZURE_OPENAI_API_KEY')
|
66
|
+
or os.getenv('OPENAI_API_KEY'))
|
67
|
+
values['openai_api_key'] = (convert_to_secret_str(openai_api_key)
|
68
|
+
if openai_api_key else None)
|
69
|
+
values['openai_api_base'] = (values['openai_api_base'] if 'openai_api_base' in values else
|
70
|
+
os.getenv('OPENAI_API_BASE'))
|
71
|
+
values['openai_api_version'] = values['openai_api_version'] or os.getenv(
|
72
|
+
'OPENAI_API_VERSION', default='2023-05-15')
|
73
|
+
values['openai_api_type'] = get_from_dict_or_env(values,
|
74
|
+
'openai_api_type',
|
75
|
+
'OPENAI_API_TYPE',
|
76
|
+
default='azure')
|
77
|
+
values['openai_organization'] = (values['openai_organization']
|
78
|
+
or os.getenv('OPENAI_ORG_ID')
|
79
|
+
or os.getenv('OPENAI_ORGANIZATION'))
|
80
|
+
values['openai_proxy'] = get_from_dict_or_env(values,
|
81
|
+
'openai_proxy',
|
82
|
+
'OPENAI_PROXY',
|
83
|
+
default='')
|
84
|
+
values['azure_endpoint'] = values['azure_endpoint'] or os.getenv('AZURE_OPENAI_ENDPOINT')
|
85
|
+
azure_ad_token = values['azure_ad_token'] or os.getenv('AZURE_OPENAI_AD_TOKEN')
|
86
|
+
values['azure_ad_token'] = (convert_to_secret_str(azure_ad_token)
|
87
|
+
if azure_ad_token else None)
|
88
|
+
# For backwards compatibility. Before openai v1, no distinction was made
|
89
|
+
# between azure_endpoint and base_url (openai_api_base).
|
90
|
+
openai_api_base = values['openai_api_base']
|
91
|
+
if openai_api_base and values['validate_base_url']:
|
92
|
+
if '/openai' not in openai_api_base:
|
93
|
+
values['openai_api_base'] += '/openai'
|
94
|
+
raise ValueError('As of openai>=1.0.0, Azure endpoints should be specified via '
|
95
|
+
'the `azure_endpoint` param not `openai_api_base` '
|
96
|
+
'(or alias `base_url`). ')
|
97
|
+
if values['deployment']:
|
98
|
+
raise ValueError('As of openai>=1.0.0, if `deployment` (or alias '
|
99
|
+
'`azure_deployment`) is specified then '
|
100
|
+
'`openai_api_base` (or alias `base_url`) should not be. '
|
101
|
+
'Instead use `deployment` (or alias `azure_deployment`) '
|
102
|
+
'and `azure_endpoint`.')
|
103
|
+
client_params = {
|
104
|
+
'api_version':
|
105
|
+
values['openai_api_version'],
|
106
|
+
'azure_endpoint':
|
107
|
+
values['azure_endpoint'],
|
108
|
+
'azure_deployment':
|
109
|
+
values['deployment'],
|
110
|
+
'api_key':
|
111
|
+
(values['openai_api_key'].get_secret_value() if values['openai_api_key'] else None),
|
112
|
+
'azure_ad_token':
|
113
|
+
(values['azure_ad_token'].get_secret_value() if values['azure_ad_token'] else None),
|
114
|
+
'azure_ad_token_provider':
|
115
|
+
values['azure_ad_token_provider'],
|
116
|
+
'organization':
|
117
|
+
values['openai_organization'],
|
118
|
+
'base_url':
|
119
|
+
values['openai_api_base'],
|
120
|
+
'timeout':
|
121
|
+
values['request_timeout'],
|
122
|
+
'max_retries':
|
123
|
+
values['max_retries'],
|
124
|
+
'default_headers':
|
125
|
+
values['default_headers'],
|
126
|
+
'default_query':
|
127
|
+
values['default_query'],
|
128
|
+
}
|
129
|
+
if not values.get('client'):
|
130
|
+
sync_specific = {'http_client': values['http_client']}
|
131
|
+
values['client'] = openai.AzureOpenAI(**client_params, **sync_specific).images
|
132
|
+
if not values.get('async_client'):
|
133
|
+
async_specific = {'http_client': values['http_async_client']}
|
134
|
+
values['async_client'] = openai.AsyncAzureOpenAI(**client_params,
|
135
|
+
**async_specific).images
|
136
|
+
return values
|
137
|
+
|
138
|
+
@property
|
139
|
+
def _llm_type(self) -> str:
|
140
|
+
return 'azure-openai-chat'
|
@@ -105,6 +105,7 @@ class Milvus(MilvusLangchain):
|
|
105
105
|
index_params: Optional[dict] = None,
|
106
106
|
search_params: Optional[dict] = None,
|
107
107
|
drop_old: Optional[bool] = False,
|
108
|
+
partition_key: Optional[str] = None,
|
108
109
|
*,
|
109
110
|
primary_field: str = 'pk',
|
110
111
|
text_field: str = 'text',
|
@@ -195,6 +196,7 @@ class Milvus(MilvusLangchain):
|
|
195
196
|
self._vector_field = vector_field
|
196
197
|
# partion key for multi-tenancy
|
197
198
|
self._partition_field = partition_field
|
199
|
+
self.partition_key = partition_key
|
198
200
|
|
199
201
|
self.fields: list[str] = []
|
200
202
|
# Create the connection to the server
|
@@ -438,13 +440,13 @@ class Milvus(MilvusLangchain):
|
|
438
440
|
self.col.load()
|
439
441
|
|
440
442
|
def add_texts(
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
443
|
+
self,
|
444
|
+
texts: Iterable[str],
|
445
|
+
metadatas: Optional[List[dict]] = None,
|
446
|
+
timeout: Optional[int] = None,
|
447
|
+
batch_size: int = 1000,
|
448
|
+
no_embedding: bool = False,
|
449
|
+
**kwargs: Any,
|
448
450
|
) -> List[str]:
|
449
451
|
"""Insert text data into Milvus.
|
450
452
|
|
@@ -529,13 +531,13 @@ class Milvus(MilvusLangchain):
|
|
529
531
|
return pks
|
530
532
|
|
531
533
|
def similarity_search(
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
534
|
+
self,
|
535
|
+
query: str,
|
536
|
+
k: int = 4,
|
537
|
+
param: Optional[dict] = None,
|
538
|
+
expr: Optional[str] = None,
|
539
|
+
timeout: Optional[int] = None,
|
540
|
+
**kwargs: Any,
|
539
541
|
) -> List[Document]:
|
540
542
|
"""Perform a similarity search against the query string.
|
541
543
|
|
@@ -567,13 +569,13 @@ class Milvus(MilvusLangchain):
|
|
567
569
|
return [doc for doc, _ in res]
|
568
570
|
|
569
571
|
def similarity_search_by_vector(
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
572
|
+
self,
|
573
|
+
embedding: List[float],
|
574
|
+
k: int = 4,
|
575
|
+
param: Optional[dict] = None,
|
576
|
+
expr: Optional[str] = None,
|
577
|
+
timeout: Optional[int] = None,
|
578
|
+
**kwargs: Any,
|
577
579
|
) -> List[Document]:
|
578
580
|
"""Perform a similarity search against the query string.
|
579
581
|
|
@@ -605,13 +607,13 @@ class Milvus(MilvusLangchain):
|
|
605
607
|
return [doc for doc, _ in res]
|
606
608
|
|
607
609
|
def similarity_search_with_score(
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
610
|
+
self,
|
611
|
+
query: str,
|
612
|
+
k: int = 4,
|
613
|
+
param: Optional[dict] = None,
|
614
|
+
expr: Optional[str] = None,
|
615
|
+
timeout: Optional[int] = None,
|
616
|
+
**kwargs: Any,
|
615
617
|
) -> List[Tuple[Document, float]]:
|
616
618
|
"""Perform a search on a query string and return results with score.
|
617
619
|
|
@@ -651,13 +653,13 @@ class Milvus(MilvusLangchain):
|
|
651
653
|
return res
|
652
654
|
|
653
655
|
def similarity_search_with_score_by_vector(
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
656
|
+
self,
|
657
|
+
embedding: List[float],
|
658
|
+
k: int = 4,
|
659
|
+
param: Optional[dict] = None,
|
660
|
+
expr: Optional[str] = None,
|
661
|
+
timeout: Optional[int] = None,
|
662
|
+
**kwargs: Any,
|
661
663
|
) -> List[Tuple[Document, float]]:
|
662
664
|
"""Perform a search on a query string and return results with score.
|
663
665
|
|
@@ -692,12 +694,12 @@ class Milvus(MilvusLangchain):
|
|
692
694
|
output_fields = self.fields[:]
|
693
695
|
output_fields.remove(self._vector_field)
|
694
696
|
# partition for multi-tenancy
|
695
|
-
if
|
697
|
+
if self.partition_key:
|
696
698
|
# add parttion
|
697
699
|
if expr:
|
698
|
-
expr = f"{expr} and {self._partition_field}==\"{
|
700
|
+
expr = f"{expr} and {self._partition_field}==\"{self.partition_key}\""
|
699
701
|
else:
|
700
|
-
expr = f"{self._partition_field}==\"{
|
702
|
+
expr = f"{self._partition_field}==\"{self.partition_key}\""
|
701
703
|
|
702
704
|
# Perform the search.
|
703
705
|
res = self.col.search(
|
@@ -721,15 +723,15 @@ class Milvus(MilvusLangchain):
|
|
721
723
|
return ret
|
722
724
|
|
723
725
|
def max_marginal_relevance_search(
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
726
|
+
self,
|
727
|
+
query: str,
|
728
|
+
k: int = 4,
|
729
|
+
fetch_k: int = 20,
|
730
|
+
lambda_mult: float = 0.5,
|
731
|
+
param: Optional[dict] = None,
|
732
|
+
expr: Optional[str] = None,
|
733
|
+
timeout: Optional[int] = None,
|
734
|
+
**kwargs: Any,
|
733
735
|
) -> List[Document]:
|
734
736
|
"""Perform a search and return results that are reordered by MMR.
|
735
737
|
|
@@ -774,15 +776,15 @@ class Milvus(MilvusLangchain):
|
|
774
776
|
)
|
775
777
|
|
776
778
|
def max_marginal_relevance_search_by_vector(
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
779
|
+
self,
|
780
|
+
embedding: list[float],
|
781
|
+
k: int = 4,
|
782
|
+
fetch_k: int = 20,
|
783
|
+
lambda_mult: float = 0.5,
|
784
|
+
param: Optional[dict] = None,
|
785
|
+
expr: Optional[str] = None,
|
786
|
+
timeout: Optional[int] = None,
|
787
|
+
**kwargs: Any,
|
786
788
|
) -> List[Document]:
|
787
789
|
"""Perform a search and return results that are reordered by MMR.
|
788
790
|
|
@@ -869,18 +871,18 @@ class Milvus(MilvusLangchain):
|
|
869
871
|
|
870
872
|
@classmethod
|
871
873
|
def from_texts(
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
874
|
+
cls,
|
875
|
+
texts: List[str],
|
876
|
+
embedding: Embeddings,
|
877
|
+
metadatas: Optional[List[dict]] = None,
|
878
|
+
collection_name: str = 'LangChainCollection',
|
879
|
+
connection_args: dict[str, Any] = DEFAULT_MILVUS_CONNECTION,
|
880
|
+
consistency_level: str = 'Session',
|
881
|
+
index_params: Optional[dict] = None,
|
882
|
+
search_params: Optional[dict] = None,
|
883
|
+
drop_old: bool = False,
|
884
|
+
no_embedding: bool = False,
|
885
|
+
**kwargs: Any,
|
884
886
|
) -> Milvus:
|
885
887
|
"""Create a Milvus collection, indexes it with HNSW, and insert data.
|
886
888
|
|
@@ -23,7 +23,7 @@ bisheng_langchain/chains/conversational_retrieval/__init__.py,sha256=47DEQpj8HBS
|
|
23
23
|
bisheng_langchain/chains/conversational_retrieval/base.py,sha256=XiqBqov6No-wTVCou6qyMT5p2JQgoQI7OLQOYH8XUos,5313
|
24
24
|
bisheng_langchain/chains/qa_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
bisheng_langchain/chains/qa_generation/base.py,sha256=VYGmLDB0bnlDQ6T8ivLP55wwFbMo9HOzlPEDUuRx5fU,4148
|
26
|
-
bisheng_langchain/chains/qa_generation/base_v2.py,sha256=
|
26
|
+
bisheng_langchain/chains/qa_generation/base_v2.py,sha256=2F2kGe3ermJraQu4oC-m8vm_ENBy_Zi4uHrJDcSOeJw,15460
|
27
27
|
bisheng_langchain/chains/qa_generation/prompt.py,sha256=4eJk9aDUYDN1qaaYRPy9EobCIncnwS8BbQaDFzzePtM,1944
|
28
28
|
bisheng_langchain/chains/qa_generation/prompt_v2.py,sha256=sQLanA_iOnLqrUIwzfTOTANt-1vJ44CM54HFDU8Jo1Q,8938
|
29
29
|
bisheng_langchain/chains/question_answering/__init__.py,sha256=_gOZMc-SWprK6xc-Jj64jcr9nc-G4YkZbEYwfJNq_bY,8795
|
@@ -35,7 +35,7 @@ bisheng_langchain/chains/router/rule_router.py,sha256=R2YRUnwn7s_7DbsSn27uPn4cIV
|
|
35
35
|
bisheng_langchain/chat_models/__init__.py,sha256=4-HTLE_SXO4hmNJu6yQxiQKBt2IFca_ezllVBLmvbEE,635
|
36
36
|
bisheng_langchain/chat_models/host_llm.py,sha256=35_jTdUm85mk-t2MARZYGC8dIPVtf5XXlGfFE6hQ1Gc,23153
|
37
37
|
bisheng_langchain/chat_models/minimax.py,sha256=JLs_f6vWD9beZYUtjD4FG28G8tZHrGUAWOwdLIuJomw,13901
|
38
|
-
bisheng_langchain/chat_models/proxy_llm.py,sha256=
|
38
|
+
bisheng_langchain/chat_models/proxy_llm.py,sha256=3ZCWU4n4eEMQcWiZt1BgD6DuVlbW8IyZSWGPsRunQco,17084
|
39
39
|
bisheng_langchain/chat_models/qwen.py,sha256=W73KxDRQBUZEzttEM4K7ZzPqbN-82O6YQmpX-HB_wZU,19971
|
40
40
|
bisheng_langchain/chat_models/sensetime.py,sha256=fuQ5yYGO5F7o7iQ7us17MlL4TAWRRFCCpNN9bAF-ydc,17056
|
41
41
|
bisheng_langchain/chat_models/wenxin.py,sha256=OBXmFWkUWZMu1lUz6hPAEawsbAcdgMWcm9WkJJLZyng,13671
|
@@ -54,7 +54,7 @@ bisheng_langchain/document_loaders/custom_kv.py,sha256=xWUPhcr1hjbdya4zgEHG4Fl0s
|
|
54
54
|
bisheng_langchain/document_loaders/elem_html.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
55
|
bisheng_langchain/document_loaders/elem_image.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
56
|
bisheng_langchain/document_loaders/elem_pdf.py,sha256=K-TXILGNFLFjavhun_MFbUF4t2_WGA3Z-kbnr75lmW8,22243
|
57
|
-
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=
|
57
|
+
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=FtoyfmE85CwZuMvr52_bqcHQCgypKCWMGwLZrzgQYbY,7353
|
58
58
|
bisheng_langchain/document_loaders/universal_kv.py,sha256=ZdIgFIc2fH2kkvJNb7j2wi6FLS_PaaatVy6z_YNV2hw,4114
|
59
59
|
bisheng_langchain/document_loaders/parsers/__init__.py,sha256=OOM_FJkwaU-zNS58fASw0TH8FNT6VXKb0VrvisgdrII,171
|
60
60
|
bisheng_langchain/document_loaders/parsers/ellm_client.py,sha256=Y_CRYwBr-gFArOirF1b76KyI5N8eVpsLeDiIsKtYkpU,1641
|
@@ -70,7 +70,7 @@ bisheng_langchain/embeddings/interface/__init__.py,sha256=GNY3tibpRxpAdAfSvQmXBK
|
|
70
70
|
bisheng_langchain/embeddings/interface/types.py,sha256=VdurbtsnjCPdlOjPFcK2Mg6r9bJYYHb3tepvkk-y3nM,461
|
71
71
|
bisheng_langchain/embeddings/interface/wenxin.py,sha256=5d9gI4enmfkD80s0FHKiDt33O0mwM8Xc5WTubnMUy8c,3104
|
72
72
|
bisheng_langchain/gpts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
73
|
-
bisheng_langchain/gpts/assistant.py,sha256=
|
73
|
+
bisheng_langchain/gpts/assistant.py,sha256=jPGVjPhqx-z4nvEHVsprWLqAVbO99Uus_GADAPASXQE,5438
|
74
74
|
bisheng_langchain/gpts/auto_optimization.py,sha256=WNsC19rgvuDYQlSIaYThq5RqCbuobDbzCwAJW4Ksw0c,3626
|
75
75
|
bisheng_langchain/gpts/auto_tool_selected.py,sha256=21WETf9o0YS-QEBwv3mmZRObKWszefQkXEqAA6KzoaM,1582
|
76
76
|
bisheng_langchain/gpts/load_tools.py,sha256=LiiK1OqFu7Ki-F_Rhfi1rgp0wBQCSrTDdqsgwciTOIU,8099
|
@@ -80,12 +80,12 @@ bisheng_langchain/gpts/agent_types/__init__.py,sha256=88tFt1GfrfIqa4hCg0cMJk7rTe
|
|
80
80
|
bisheng_langchain/gpts/agent_types/llm_functions_agent.py,sha256=IXg5u8dSk-FcLvjrvvLcN5revGccXylXkD73ZWhaDWs,8715
|
81
81
|
bisheng_langchain/gpts/agent_types/llm_react_agent.py,sha256=W6IJMwoFUj_a2cXZ_nXOpzaHBjP5IBrFxRTSejNVi9A,6678
|
82
82
|
bisheng_langchain/gpts/prompts/__init__.py,sha256=pOnXvk6_PjqAoLrh68sI9o3o6znKGxoLMVFP-0XTCJo,704
|
83
|
-
bisheng_langchain/gpts/prompts/assistant_prompt_base.py,sha256=
|
83
|
+
bisheng_langchain/gpts/prompts/assistant_prompt_base.py,sha256=Yp9M1XbZb5jHeBG_txcwWA84Euvl89t0g-GbJMa5Ur0,1133
|
84
84
|
bisheng_langchain/gpts/prompts/assistant_prompt_cohere.py,sha256=GLQ77oXqSlE7Xes2ObsFsNon5nOJOCRhQOKE5bUpgaI,2421
|
85
85
|
bisheng_langchain/gpts/prompts/assistant_prompt_opt.py,sha256=TZsRK4XPMrUhGg0PoMyiE3wE-aG34UmlVflkCl_c0QI,4151
|
86
86
|
bisheng_langchain/gpts/prompts/breif_description_prompt.py,sha256=w4A5et0jB-GkxEMQBp4i6GKX3RkVeu7NzWEjOZZAicM,5336
|
87
87
|
bisheng_langchain/gpts/prompts/opening_dialog_prompt.py,sha256=VVF0JLHtetupVB0kabiFHWDHlQaa4nFLcbYXgIBA3nw,5965
|
88
|
-
bisheng_langchain/gpts/prompts/react_agent_prompt.py,sha256=
|
88
|
+
bisheng_langchain/gpts/prompts/react_agent_prompt.py,sha256=MA5FReipAYfe6ypOvg_SJSlHxsjNdCh1HFG22axjFLo,1968
|
89
89
|
bisheng_langchain/gpts/prompts/select_tools_prompt.py,sha256=AyvVnrLEsQy7RHuGTPkcrMUxgA98Q0TzF-xweoc7GyY,1400
|
90
90
|
bisheng_langchain/gpts/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
91
91
|
bisheng_langchain/gpts/tools/api_tools/__init__.py,sha256=CkEjgIFM4GIv86V1B7SsFLaB6M86c54QuO8wIRizUZ8,1608
|
@@ -126,7 +126,7 @@ bisheng_langchain/rag/config/baseline_s2b_mix.yaml,sha256=rkPfzU2-mvjRrZ0zMHaQsn
|
|
126
126
|
bisheng_langchain/rag/config/baseline_v2.yaml,sha256=RP-DwIRIS_ZK8ixbXi2Z28rKqHD56pWmr2o2WWIwq3Y,2382
|
127
127
|
bisheng_langchain/rag/init_retrievers/__init__.py,sha256=qpLLAuqZPtumTlJj17Ie5AbDDmiUiDxYefg_pumqu-c,218
|
128
128
|
bisheng_langchain/rag/init_retrievers/baseline_vector_retriever.py,sha256=oRKZZpxlLQAtsubIcAXeXpf1a9h6Pt6uOtNTLeD2jps,2362
|
129
|
-
bisheng_langchain/rag/init_retrievers/keyword_retriever.py,sha256=
|
129
|
+
bisheng_langchain/rag/init_retrievers/keyword_retriever.py,sha256=Da4Q5BrfN0GckJaeAgPYMlzQAp9ll7ZGGyvs7OdCQ5c,2513
|
130
130
|
bisheng_langchain/rag/init_retrievers/mix_retriever.py,sha256=Whxq4kjNPLsxnHcVo60usdFFwLTCD-1jO38q08LXkVQ,4653
|
131
131
|
bisheng_langchain/rag/init_retrievers/smaller_chunks_retriever.py,sha256=RQ7QLEOOhBrkw-EimXVJqIGa96D-KkNDik2h9hzg9fU,3805
|
132
132
|
bisheng_langchain/rag/prompts/__init__.py,sha256=IUCq9gzqGQN_6IDk0D_F5t3mOUI_KbmSzYnnXoX4VKE,223
|
@@ -147,12 +147,13 @@ bisheng_langchain/retrievers/mix_es_vector.py,sha256=dSrrsuMPSgGiu181EOzACyIKiDX
|
|
147
147
|
bisheng_langchain/sql/__init__.py,sha256=2arRtNQ-kUvIsy_8v_PrLxf5r9W-S7mbqptG_l4_1RE,88
|
148
148
|
bisheng_langchain/sql/base.py,sha256=WNHCy16UoxvDbroHnJq8CsZ9ot4NGflCm8Bgiv45kks,6152
|
149
149
|
bisheng_langchain/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
150
|
+
bisheng_langchain/utils/azure_dalle_image_generator.py,sha256=wbOIrVX8lN8zZni0GjUpwKBESy0TmlqrGLZCY5PbUsM,6819
|
150
151
|
bisheng_langchain/utils/requests.py,sha256=vWGKyNTxApVeaVdKxqACfIT1Q8wMy-jC3kUv2Ce9Mzc,8688
|
151
152
|
bisheng_langchain/vectorstores/__init__.py,sha256=zCZgDe7LyQ0iDkfcm5UJ5NxwKQSRHnqrsjx700Fy11M,213
|
152
153
|
bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=Pm1rS50GJ0HWbjBsFDgs28SVuVbjGSRPOor6yJlnE7w,13347
|
153
|
-
bisheng_langchain/vectorstores/milvus.py,sha256=
|
154
|
+
bisheng_langchain/vectorstores/milvus.py,sha256=8HHbIxoSbLYDFlFJSfmjLOfqGpOSZd24iVYWSYz3TX0,36637
|
154
155
|
bisheng_langchain/vectorstores/retriever.py,sha256=hj4nAAl352EV_ANnU2OHJn7omCH3nBK82ydo14KqMH4,4353
|
155
|
-
bisheng_langchain-0.3.
|
156
|
-
bisheng_langchain-0.3.
|
157
|
-
bisheng_langchain-0.3.
|
158
|
-
bisheng_langchain-0.3.
|
156
|
+
bisheng_langchain-0.3.3.dev2.dist-info/METADATA,sha256=9JMcguR6d_cAwkNX3ynYJi55rnk6PjZSqfUIXH9gnG4,2489
|
157
|
+
bisheng_langchain-0.3.3.dev2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
158
|
+
bisheng_langchain-0.3.3.dev2.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
|
159
|
+
bisheng_langchain-0.3.3.dev2.dist-info/RECORD,,
|
File without changes
|
{bisheng_langchain-0.3.3b0.dist-info → bisheng_langchain-0.3.3.dev2.dist-info}/top_level.txt
RENAMED
File without changes
|