bisheng-langchain 0.3.0rc0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/chat_models/host_llm.py +1 -1
- bisheng_langchain/document_loaders/elem_unstrcutured_loader.py +5 -3
- bisheng_langchain/gpts/agent_types/llm_functions_agent.py +7 -1
- bisheng_langchain/gpts/assistant.py +8 -5
- bisheng_langchain/gpts/auto_optimization.py +28 -27
- bisheng_langchain/gpts/auto_tool_selected.py +14 -15
- bisheng_langchain/gpts/load_tools.py +53 -1
- bisheng_langchain/gpts/prompts/__init__.py +4 -2
- bisheng_langchain/gpts/prompts/assistant_prompt_base.py +1 -0
- bisheng_langchain/gpts/prompts/assistant_prompt_cohere.py +19 -0
- bisheng_langchain/gpts/prompts/opening_dialog_prompt.py +1 -1
- bisheng_langchain/gpts/tools/api_tools/__init__.py +1 -1
- bisheng_langchain/gpts/tools/api_tools/base.py +3 -3
- bisheng_langchain/gpts/tools/api_tools/flow.py +19 -7
- bisheng_langchain/gpts/tools/api_tools/macro_data.py +175 -4
- bisheng_langchain/gpts/tools/api_tools/openapi.py +101 -0
- bisheng_langchain/gpts/tools/api_tools/sina.py +2 -2
- bisheng_langchain/gpts/tools/code_interpreter/tool.py +118 -39
- bisheng_langchain/rag/__init__.py +5 -0
- bisheng_langchain/rag/bisheng_rag_pipeline.py +320 -0
- bisheng_langchain/rag/bisheng_rag_pipeline_v2.py +359 -0
- bisheng_langchain/rag/bisheng_rag_pipeline_v2_cohere_raw_prompting.py +376 -0
- bisheng_langchain/rag/bisheng_rag_tool.py +288 -0
- bisheng_langchain/rag/config/baseline.yaml +86 -0
- bisheng_langchain/rag/config/baseline_caibao.yaml +82 -0
- bisheng_langchain/rag/config/baseline_caibao_knowledge_v2.yaml +110 -0
- bisheng_langchain/rag/config/baseline_caibao_v2.yaml +112 -0
- bisheng_langchain/rag/config/baseline_demo_v2.yaml +92 -0
- bisheng_langchain/rag/config/baseline_s2b_mix.yaml +88 -0
- bisheng_langchain/rag/config/baseline_v2.yaml +90 -0
- bisheng_langchain/rag/extract_info.py +38 -0
- bisheng_langchain/rag/init_retrievers/__init__.py +4 -0
- bisheng_langchain/rag/init_retrievers/baseline_vector_retriever.py +61 -0
- bisheng_langchain/rag/init_retrievers/keyword_retriever.py +65 -0
- bisheng_langchain/rag/init_retrievers/mix_retriever.py +103 -0
- bisheng_langchain/rag/init_retrievers/smaller_chunks_retriever.py +92 -0
- bisheng_langchain/rag/prompts/__init__.py +9 -0
- bisheng_langchain/rag/prompts/extract_key_prompt.py +34 -0
- bisheng_langchain/rag/prompts/prompt.py +47 -0
- bisheng_langchain/rag/prompts/prompt_cohere.py +111 -0
- bisheng_langchain/rag/qa_corpus/__init__.py +0 -0
- bisheng_langchain/rag/qa_corpus/qa_generator.py +143 -0
- bisheng_langchain/rag/rerank/__init__.py +5 -0
- bisheng_langchain/rag/rerank/rerank.py +48 -0
- bisheng_langchain/rag/rerank/rerank_benchmark.py +139 -0
- bisheng_langchain/rag/run_qa_gen_web.py +47 -0
- bisheng_langchain/rag/run_rag_evaluate_web.py +55 -0
- bisheng_langchain/rag/scoring/__init__.py +0 -0
- bisheng_langchain/rag/scoring/llama_index_score.py +91 -0
- bisheng_langchain/rag/scoring/ragas_score.py +183 -0
- bisheng_langchain/rag/utils.py +181 -0
- bisheng_langchain/retrievers/ensemble.py +2 -1
- bisheng_langchain/vectorstores/elastic_keywords_search.py +2 -1
- {bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/METADATA +1 -1
- {bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/RECORD +57 -22
- bisheng_langchain/gpts/prompts/base_prompt.py +0 -1
- {bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/WHEEL +0 -0
- {bisheng_langchain-0.3.0rc0.dist-info → bisheng_langchain-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,183 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
from collections import defaultdict
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
from datasets import Dataset
|
11
|
+
from loguru import logger
|
12
|
+
from ragas import evaluate
|
13
|
+
from ragas.metrics import AnswerCorrectness, AnswerCorrectnessBisheng, AnswerRecallBisheng
|
14
|
+
|
15
|
+
|
16
|
+
@dataclass
|
17
|
+
class RagScore:
|
18
|
+
excel_path: str
|
19
|
+
save_path: str
|
20
|
+
question_column: str
|
21
|
+
gt_column: str
|
22
|
+
answer_column: str
|
23
|
+
metrics: List[str]
|
24
|
+
contexts_column: Optional[str] = None
|
25
|
+
query_type_column: Optional[str] = None
|
26
|
+
gt_split_column: Optional[str] = None
|
27
|
+
batch_size: int = 5
|
28
|
+
whether_gtsplit: bool = False
|
29
|
+
|
30
|
+
def _validate_metrics(self):
|
31
|
+
for metric in self.metrics:
|
32
|
+
if not hasattr(self, f'ragas_{metric}'):
|
33
|
+
raise Exception(f'"ragas_{metric}" 未实现!')
|
34
|
+
|
35
|
+
def ragas_answer_correctness(self, dataset: Dataset) -> pd.DataFrame:
|
36
|
+
# answer_correctness, 只考虑事实相似度
|
37
|
+
weights = [1.0, 0.0]
|
38
|
+
answer_correctness = AnswerCorrectness(weights=weights, batch_size=self.batch_size)
|
39
|
+
result = evaluate(
|
40
|
+
dataset=dataset,
|
41
|
+
metrics=[
|
42
|
+
answer_correctness,
|
43
|
+
],
|
44
|
+
)
|
45
|
+
self.score_map_keys = list(result.keys())
|
46
|
+
df = result.to_pandas()
|
47
|
+
return df
|
48
|
+
|
49
|
+
def ragas_answer_correctness_bisheng(self, dataset: Dataset) -> pd.DataFrame:
|
50
|
+
answer_correctness = AnswerCorrectnessBisheng(batch_size=self.batch_size)
|
51
|
+
result = evaluate(
|
52
|
+
dataset=dataset,
|
53
|
+
metrics=[answer_correctness],
|
54
|
+
)
|
55
|
+
self.score_map_keys = list(result.keys())
|
56
|
+
df = result.to_pandas()
|
57
|
+
return df
|
58
|
+
|
59
|
+
def ragas_answer_recall_bisheng(self, dataset: Dataset) -> pd.DataFrame:
|
60
|
+
answer_recall =AnswerRecallBisheng(batch_size=self.batch_size,
|
61
|
+
whether_gtsplit=self.whether_gtsplit)
|
62
|
+
result = evaluate(
|
63
|
+
dataset=dataset,
|
64
|
+
metrics=[answer_recall],
|
65
|
+
)
|
66
|
+
self.score_map_keys = list(result.keys())
|
67
|
+
df = result.to_pandas()
|
68
|
+
return df
|
69
|
+
|
70
|
+
def _remove_source(self, pred: str) -> str:
|
71
|
+
"""去除【1†source】, only for openai assistant"""
|
72
|
+
pattern = re.compile("【(\d+)†source】")
|
73
|
+
match = re.findall(pattern, pred)
|
74
|
+
for i in match:
|
75
|
+
str_temp = f"【{i}†source】"
|
76
|
+
pred = pred.replace(str_temp, '')
|
77
|
+
return pred
|
78
|
+
|
79
|
+
def score(self) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
|
80
|
+
df = pd.read_excel(self.excel_path)
|
81
|
+
ori_row_nums = df.shape[0]
|
82
|
+
|
83
|
+
# 删除含有na的行
|
84
|
+
columns_to_check = [
|
85
|
+
self.question_column,
|
86
|
+
self.gt_column,
|
87
|
+
self.answer_column,
|
88
|
+
self.contexts_column,
|
89
|
+
self.query_type_column,
|
90
|
+
]
|
91
|
+
# 是否有要点拆分列
|
92
|
+
if self.gt_split_column:
|
93
|
+
columns_to_check.append(self.gt_split_column)
|
94
|
+
|
95
|
+
df.dropna(subset=[col for col in columns_to_check if col], inplace=True)
|
96
|
+
df = df.reset_index()
|
97
|
+
print(f'删除含有na的行 {ori_row_nums - df.shape[0]} 个!')
|
98
|
+
print(f'总计 {df.shape[0]} 个问题')
|
99
|
+
|
100
|
+
questions = df[self.question_column].tolist()
|
101
|
+
answers = df[self.answer_column].tolist()
|
102
|
+
# answers = df[self.answer_column].apply(self._remove_source).tolist() # for openai assistant
|
103
|
+
ground_truths = df[self.gt_column].apply(lambda x: [x]).tolist()
|
104
|
+
# todo: contexts可能是保存在json中的,这段代码可能需要修改
|
105
|
+
contexts = (
|
106
|
+
[['']] * len(questions)
|
107
|
+
if not self.contexts_column
|
108
|
+
else df[self.contexts_column].apply(lambda x: [x]).tolist()
|
109
|
+
)
|
110
|
+
# To dict
|
111
|
+
if self.gt_split_column:
|
112
|
+
gtsplit = df[self.gt_split_column].tolist()
|
113
|
+
data: Dict[str, List[Any]] = {
|
114
|
+
"question": questions,
|
115
|
+
"answer": answers,
|
116
|
+
"contexts": contexts,
|
117
|
+
"ground_truths": ground_truths,
|
118
|
+
'gt_split_point': gtsplit
|
119
|
+
}
|
120
|
+
else:
|
121
|
+
data: Dict[str, List[Any]] = {
|
122
|
+
"question": questions,
|
123
|
+
"answer": answers,
|
124
|
+
"contexts": contexts,
|
125
|
+
"ground_truths": ground_truths,
|
126
|
+
}
|
127
|
+
# Convert dict to dataset
|
128
|
+
dataset = Dataset.from_dict(data)
|
129
|
+
|
130
|
+
self._validate_metrics()
|
131
|
+
|
132
|
+
save_group_df = dict()
|
133
|
+
for metric_name in self.metrics:
|
134
|
+
ragas_result = getattr(self, f'ragas_{metric_name}')(dataset)
|
135
|
+
if metric_name =='answer_recall_bisheng':
|
136
|
+
if self.gt_split_column:
|
137
|
+
df[self.gt_split_column] = ragas_result["gt_split_point"]
|
138
|
+
else:
|
139
|
+
df["gt_split_point"] = ragas_result["gt_split_point"]
|
140
|
+
df["analyse"] = ragas_result["analyse"]
|
141
|
+
|
142
|
+
score_map = dict().fromkeys(self.score_map_keys, ragas_result)
|
143
|
+
for metric, scores in score_map.items():
|
144
|
+
df[metric] = df.index.map({idx: rows[metric] for idx, rows in scores.iterrows()})
|
145
|
+
|
146
|
+
if self.query_type_column and self.query_type_column in df.columns:
|
147
|
+
grouped_df = df.groupby(self.query_type_column)
|
148
|
+
grouped_df = grouped_df.agg({self.question_column: 'count', **{metric: 'mean' for metric in score_map}})
|
149
|
+
grouped_df.rename(columns={self.question_column: '问题个数'}, inplace=True)
|
150
|
+
|
151
|
+
total_question = grouped_df['问题个数'].sum()
|
152
|
+
grouped_df.loc['总计', '问题个数'] = total_question
|
153
|
+
for metric in score_map:
|
154
|
+
grouped_df.loc['总计', metric] = df[metric].sum() / total_question
|
155
|
+
save_group_df[f'{metric_name}_group'] = grouped_df
|
156
|
+
|
157
|
+
print(grouped_df.to_markdown())
|
158
|
+
|
159
|
+
# save
|
160
|
+
output_path = Path(self.save_path) / f"{Path(self.excel_path).stem}_score.xlsx"
|
161
|
+
with pd.ExcelWriter(output_path) as writer:
|
162
|
+
df.to_excel(writer, sheet_name='Sheet1', index=False)
|
163
|
+
if len(save_group_df):
|
164
|
+
for metric, grouped_df in save_group_df.items():
|
165
|
+
grouped_df.to_excel(writer, sheet_name=metric, index=True)
|
166
|
+
print(f'保存到 {output_path} 成功!')
|
167
|
+
|
168
|
+
|
169
|
+
if __name__ == '__main__':
|
170
|
+
params = {
|
171
|
+
'excel_path': '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data/test.xlsx',
|
172
|
+
'save_path': '/home/gulixin/workspace/llm/bisheng/src/bisheng-langchain/experimental/rag/data',
|
173
|
+
'question_column': '问题',
|
174
|
+
'gt_column': 'GT',
|
175
|
+
'answer_column': 'rag_answer',
|
176
|
+
'query_type_column': '问题类型',
|
177
|
+
# 'metrics': ['answer_correctness_bisheng'],
|
178
|
+
'metrics': ['answer_recall_bisheng'],
|
179
|
+
'batch_size': 10,
|
180
|
+
'whether_gtsplit': False,
|
181
|
+
}
|
182
|
+
rag_score = RagScore(**params)
|
183
|
+
rag_score.score()
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# This module is used to import any langchain class by name.
|
2
|
+
|
3
|
+
import importlib
|
4
|
+
from typing import Any, Type
|
5
|
+
|
6
|
+
from langchain.agents import Agent
|
7
|
+
from langchain.base_language import BaseLanguageModel
|
8
|
+
from langchain.chains.base import Chain
|
9
|
+
from langchain.chat_models.base import BaseChatModel
|
10
|
+
from langchain.prompts import PromptTemplate
|
11
|
+
from langchain.tools import BaseTool
|
12
|
+
|
13
|
+
|
14
|
+
def import_module(module_path: str) -> Any:
|
15
|
+
"""Import module from module path"""
|
16
|
+
if 'from' not in module_path:
|
17
|
+
# Import the module using the module path
|
18
|
+
return importlib.import_module(module_path)
|
19
|
+
# Split the module path into its components
|
20
|
+
_, module_path, _, object_name = module_path.split()
|
21
|
+
|
22
|
+
# Import the module using the module path
|
23
|
+
module = importlib.import_module(module_path)
|
24
|
+
|
25
|
+
return getattr(module, object_name)
|
26
|
+
|
27
|
+
|
28
|
+
def import_class(class_path: str) -> Any:
|
29
|
+
"""Import class from class path"""
|
30
|
+
module_path, class_name = class_path.rsplit('.', 1)
|
31
|
+
module = import_module(module_path)
|
32
|
+
return getattr(module, class_name)
|
33
|
+
|
34
|
+
|
35
|
+
def import_by_type(_type: str, name: str) -> Any:
|
36
|
+
from bisheng_langchain import chat_models
|
37
|
+
"""Import class by type and name"""
|
38
|
+
if _type is None:
|
39
|
+
raise ValueError(f'Type cannot be None. Check if {name} is in the config file.')
|
40
|
+
func_dict = {
|
41
|
+
'agents': import_agent,
|
42
|
+
'prompts': import_prompt,
|
43
|
+
'llms': {
|
44
|
+
'llm': import_llm,
|
45
|
+
'chat': import_chat_llm,
|
46
|
+
'contribute': import_chain_contribute_llm
|
47
|
+
},
|
48
|
+
'tools': import_tool,
|
49
|
+
'chains': import_chain,
|
50
|
+
'toolkits': import_toolkit,
|
51
|
+
'memory': import_memory,
|
52
|
+
'embeddings': import_embedding,
|
53
|
+
'vectorstores': import_vectorstore,
|
54
|
+
'documentloaders': import_documentloader,
|
55
|
+
'textsplitters': import_textsplitter,
|
56
|
+
'utilities': import_utility,
|
57
|
+
'output_parsers': import_output_parser,
|
58
|
+
'retrievers': import_retriever,
|
59
|
+
'autogenRoles': import_autogenRoles,
|
60
|
+
'inputOutput': import_inputoutput,
|
61
|
+
}
|
62
|
+
if _type == 'llms':
|
63
|
+
key = 'contribute' if name in chat_models.__all__ else 'chat' if 'chat' in name.lower(
|
64
|
+
) else 'llm'
|
65
|
+
loaded_func = func_dict[_type][key] # type: ignore
|
66
|
+
else:
|
67
|
+
loaded_func = func_dict[_type]
|
68
|
+
|
69
|
+
return loaded_func(name)
|
70
|
+
|
71
|
+
|
72
|
+
def import_inputoutput(input_output: str) -> Any:
|
73
|
+
"""Import output parser from output parser name"""
|
74
|
+
return import_module(f'from bisheng_langchain.input_output import {input_output}')
|
75
|
+
|
76
|
+
|
77
|
+
def import_output_parser(output_parser: str) -> Any:
|
78
|
+
"""Import output parser from output parser name"""
|
79
|
+
return import_module(f'from langchain.output_parsers import {output_parser}')
|
80
|
+
|
81
|
+
|
82
|
+
def import_chat_llm(llm: str) -> BaseChatModel:
|
83
|
+
"""Import chat llm from llm name"""
|
84
|
+
return import_class(f'langchain.chat_models.{llm}')
|
85
|
+
|
86
|
+
|
87
|
+
def import_chain_contribute_llm(llm: str) -> BaseChatModel:
|
88
|
+
"""Import chat llm from llm name"""
|
89
|
+
return import_class(f'bisheng_langchain.chat_models.{llm}')
|
90
|
+
|
91
|
+
|
92
|
+
def import_retriever(retriever: str) -> Any:
|
93
|
+
"""Import retriever from retriever name"""
|
94
|
+
return import_module(f'from langchain.retrievers import {retriever}')
|
95
|
+
|
96
|
+
|
97
|
+
def import_autogenRoles(autogen: str) -> Any:
|
98
|
+
return import_module(f'from bisheng_langchain.autogen_role import {autogen}')
|
99
|
+
|
100
|
+
|
101
|
+
def import_memory(memory: str) -> Any:
|
102
|
+
"""Import memory from memory name"""
|
103
|
+
return import_module(f'from langchain.memory import {memory}')
|
104
|
+
|
105
|
+
|
106
|
+
def import_prompt(prompt: str) -> Type[PromptTemplate]:
|
107
|
+
"""Import prompt from prompt name"""
|
108
|
+
if prompt == 'ZeroShotPrompt':
|
109
|
+
return import_class('langchain.prompts.PromptTemplate')
|
110
|
+
return import_class(f'langchain.prompts.{prompt}')
|
111
|
+
|
112
|
+
|
113
|
+
def import_toolkit(toolkit: str) -> Any:
|
114
|
+
"""Import toolkit from toolkit name"""
|
115
|
+
return import_module(f'from langchain.agents.agent_toolkits import {toolkit}')
|
116
|
+
|
117
|
+
|
118
|
+
def import_agent(agent: str) -> Agent:
|
119
|
+
"""Import agent from agent name"""
|
120
|
+
# check for custom agent
|
121
|
+
from bisheng_langchain import agents
|
122
|
+
if agent in agents.__all__:
|
123
|
+
return import_class(f'bisheng_langchain.agents.{agent}')
|
124
|
+
return import_class(f'langchain.agents.{agent}')
|
125
|
+
|
126
|
+
|
127
|
+
def import_llm(llm: str) -> BaseLanguageModel:
|
128
|
+
"""Import llm from llm name"""
|
129
|
+
return import_class(f'langchain.llms.{llm}')
|
130
|
+
|
131
|
+
|
132
|
+
def import_tool(tool: str) -> BaseTool:
|
133
|
+
"""Import tool from tool name"""
|
134
|
+
return import_class(f'langchain.tools.{tool}')
|
135
|
+
|
136
|
+
|
137
|
+
def import_chain(chain: str) -> Type[Chain]:
|
138
|
+
"""Import chain from chain name"""
|
139
|
+
from bisheng_langchain import chains
|
140
|
+
if chain in chains.__all__:
|
141
|
+
return import_class(f'bisheng_langchain.chains.{chain}')
|
142
|
+
return import_class(f'langchain.chains.{chain}')
|
143
|
+
|
144
|
+
|
145
|
+
def import_embedding(embedding: str) -> Any:
|
146
|
+
"""Import embedding from embedding name"""
|
147
|
+
from bisheng_langchain import embeddings
|
148
|
+
if embedding in embeddings.__all__:
|
149
|
+
return import_class(f'bisheng_langchain.embeddings.{embedding}')
|
150
|
+
return import_class(f'langchain.embeddings.{embedding}')
|
151
|
+
|
152
|
+
|
153
|
+
def import_vectorstore(vectorstore: str) -> Any:
|
154
|
+
"""Import vectorstore from vectorstore name"""
|
155
|
+
from bisheng_langchain import vectorstores
|
156
|
+
if vectorstore in vectorstores.__all__:
|
157
|
+
return import_class(f'bisheng_langchain.vectorstores.{vectorstore}')
|
158
|
+
return import_class(f'langchain.vectorstores.{vectorstore}')
|
159
|
+
|
160
|
+
|
161
|
+
def import_documentloader(documentloader: str) -> Any:
|
162
|
+
"""Import documentloader from documentloader name"""
|
163
|
+
from bisheng_langchain import document_loaders
|
164
|
+
if documentloader in document_loaders.__all__:
|
165
|
+
return import_class(f'bisheng_langchain.document_loaders.{documentloader}')
|
166
|
+
return import_class(f'langchain.document_loaders.{documentloader}')
|
167
|
+
|
168
|
+
|
169
|
+
def import_textsplitter(textsplitter: str) -> Any:
|
170
|
+
"""Import textsplitter from textsplitter name"""
|
171
|
+
from bisheng_langchain import text_splitter
|
172
|
+
if textsplitter in dir(text_splitter):
|
173
|
+
return import_class(f'bisheng_langchain.text_splitter.{textsplitter}')
|
174
|
+
return import_class(f'langchain.text_splitter.{textsplitter}')
|
175
|
+
|
176
|
+
|
177
|
+
def import_utility(utility: str) -> Any:
|
178
|
+
"""Import utility from utility name"""
|
179
|
+
if utility == 'SQLDatabase':
|
180
|
+
return import_class(f'langchain.sql_database.{utility}')
|
181
|
+
return import_class(f'langchain.utilities.{utility}')
|
@@ -67,6 +67,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
67
67
|
query: str,
|
68
68
|
*,
|
69
69
|
run_manager: AsyncCallbackManagerForRetrieverRun,
|
70
|
+
**kwagrs: Any,
|
70
71
|
) -> List[Document]:
|
71
72
|
"""
|
72
73
|
Asynchronously get the relevant documents for a given query.
|
@@ -79,7 +80,7 @@ class EnsembleRetriever(BaseRetriever):
|
|
79
80
|
"""
|
80
81
|
|
81
82
|
# Get fused result of the retrievers.
|
82
|
-
fused_documents = await self.arank_fusion(query, run_manager)
|
83
|
+
fused_documents = await self.arank_fusion(query, run_manager, **kwagrs)
|
83
84
|
|
84
85
|
return fused_documents
|
85
86
|
|
@@ -228,7 +228,7 @@ class ElasticKeywordsSearch(VectorStore, ABC):
|
|
228
228
|
# llm or jiaba extract keywords
|
229
229
|
if self.llm_chain:
|
230
230
|
keywords_str = self.llm_chain.run(query)
|
231
|
-
print('
|
231
|
+
print('llm search keywords:', keywords_str)
|
232
232
|
try:
|
233
233
|
keywords = eval(keywords_str)
|
234
234
|
if not isinstance(keywords, list):
|
@@ -238,6 +238,7 @@ class ElasticKeywordsSearch(VectorStore, ABC):
|
|
238
238
|
keywords = jieba.analyse.extract_tags(query, topK=10, withWeight=False)
|
239
239
|
else:
|
240
240
|
keywords = jieba.analyse.extract_tags(query, topK=10, withWeight=False)
|
241
|
+
print('jieba search keywords:', keywords)
|
241
242
|
match_query = {'bool': {must_or_should: []}}
|
242
243
|
for key in keywords:
|
243
244
|
match_query['bool'][must_or_should].append({query_strategy: {'text': key}})
|
@@ -28,7 +28,7 @@ bisheng_langchain/chains/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
28
28
|
bisheng_langchain/chains/router/multi_rule.py,sha256=BiFryj3-7rOxfttD-MyOkKWLCSGB9LVYd2rjOsIfQC8,375
|
29
29
|
bisheng_langchain/chains/router/rule_router.py,sha256=R2YRUnwn7s_7DbsSn27uPn4cIV0D-5iXEORXir0tNGM,1835
|
30
30
|
bisheng_langchain/chat_models/__init__.py,sha256=4-HTLE_SXO4hmNJu6yQxiQKBt2IFca_ezllVBLmvbEE,635
|
31
|
-
bisheng_langchain/chat_models/host_llm.py,sha256=
|
31
|
+
bisheng_langchain/chat_models/host_llm.py,sha256=35_jTdUm85mk-t2MARZYGC8dIPVtf5XXlGfFE6hQ1Gc,23153
|
32
32
|
bisheng_langchain/chat_models/minimax.py,sha256=JLs_f6vWD9beZYUtjD4FG28G8tZHrGUAWOwdLIuJomw,13901
|
33
33
|
bisheng_langchain/chat_models/proxy_llm.py,sha256=wzVBZik9WC3-f7kyQ1eu3Ooibqpcocln08knf5lV1Nw,17082
|
34
34
|
bisheng_langchain/chat_models/qwen.py,sha256=W73KxDRQBUZEzttEM4K7ZzPqbN-82O6YQmpX-HB_wZU,19971
|
@@ -49,7 +49,7 @@ bisheng_langchain/document_loaders/custom_kv.py,sha256=xWUPhcr1hjbdya4zgEHG4Fl0s
|
|
49
49
|
bisheng_langchain/document_loaders/elem_html.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
50
|
bisheng_langchain/document_loaders/elem_image.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
51
|
bisheng_langchain/document_loaders/elem_pdf.py,sha256=K-TXILGNFLFjavhun_MFbUF4t2_WGA3Z-kbnr75lmW8,22243
|
52
|
-
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=
|
52
|
+
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=Dcu70Wz4vdUpq9y2vWPWroRdq6JrlOJNERoFTnZO0oU,5336
|
53
53
|
bisheng_langchain/document_loaders/universal_kv.py,sha256=dJF_GQGKBMUjB_kX9CSp7xZRhXgwVuGPbMIzJwPh-C0,4063
|
54
54
|
bisheng_langchain/document_loaders/parsers/__init__.py,sha256=OOM_FJkwaU-zNS58fASw0TH8FNT6VXKb0VrvisgdrII,171
|
55
55
|
bisheng_langchain/document_loaders/parsers/ellm_client.py,sha256=B4Dea8xXXnGvB9j2OXv53HILNUmnWeNJz9ssNM-2fLM,1760
|
@@ -65,33 +65,35 @@ bisheng_langchain/embeddings/interface/__init__.py,sha256=GNY3tibpRxpAdAfSvQmXBK
|
|
65
65
|
bisheng_langchain/embeddings/interface/types.py,sha256=VdurbtsnjCPdlOjPFcK2Mg6r9bJYYHb3tepvkk-y3nM,461
|
66
66
|
bisheng_langchain/embeddings/interface/wenxin.py,sha256=5d9gI4enmfkD80s0FHKiDt33O0mwM8Xc5WTubnMUy8c,3104
|
67
67
|
bisheng_langchain/gpts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
68
|
-
bisheng_langchain/gpts/assistant.py,sha256=
|
69
|
-
bisheng_langchain/gpts/auto_optimization.py,sha256=
|
70
|
-
bisheng_langchain/gpts/auto_tool_selected.py,sha256=
|
71
|
-
bisheng_langchain/gpts/load_tools.py,sha256=
|
68
|
+
bisheng_langchain/gpts/assistant.py,sha256=OEB9HA-FpNKGOviVoafld4MHDIEQS5u9AU0QlMuJ-po,5344
|
69
|
+
bisheng_langchain/gpts/auto_optimization.py,sha256=WNsC19rgvuDYQlSIaYThq5RqCbuobDbzCwAJW4Ksw0c,3626
|
70
|
+
bisheng_langchain/gpts/auto_tool_selected.py,sha256=21WETf9o0YS-QEBwv3mmZRObKWszefQkXEqAA6KzoaM,1582
|
71
|
+
bisheng_langchain/gpts/load_tools.py,sha256=LiiK1OqFu7Ki-F_Rhfi1rgp0wBQCSrTDdqsgwciTOIU,8099
|
72
72
|
bisheng_langchain/gpts/message_types.py,sha256=7EJOx62j9E1U67jxWgxE_I7a8IjAvvKANknXkD2gFm0,213
|
73
73
|
bisheng_langchain/gpts/utils.py,sha256=t3YDxaJ0OYd6EKsek7PJFRYnsezwzEFK5oVU-PRbu5g,6671
|
74
74
|
bisheng_langchain/gpts/agent_types/__init__.py,sha256=bg0zlTYGfNXoSBqcICHlzNpVQbejMYeyji_dzvP5qQ0,261
|
75
|
-
bisheng_langchain/gpts/agent_types/llm_functions_agent.py,sha256=
|
76
|
-
bisheng_langchain/gpts/prompts/__init__.py,sha256=
|
75
|
+
bisheng_langchain/gpts/agent_types/llm_functions_agent.py,sha256=IXg5u8dSk-FcLvjrvvLcN5revGccXylXkD73ZWhaDWs,8715
|
76
|
+
bisheng_langchain/gpts/prompts/__init__.py,sha256=pOnXvk6_PjqAoLrh68sI9o3o6znKGxoLMVFP-0XTCJo,704
|
77
|
+
bisheng_langchain/gpts/prompts/assistant_prompt_base.py,sha256=Q69qWxtwaeGPPwMWXQ44OsQPg7wHB7nYT17wbqQe3kM,57
|
78
|
+
bisheng_langchain/gpts/prompts/assistant_prompt_cohere.py,sha256=GLQ77oXqSlE7Xes2ObsFsNon5nOJOCRhQOKE5bUpgaI,2421
|
77
79
|
bisheng_langchain/gpts/prompts/assistant_prompt_opt.py,sha256=TZsRK4XPMrUhGg0PoMyiE3wE-aG34UmlVflkCl_c0QI,4151
|
78
|
-
bisheng_langchain/gpts/prompts/base_prompt.py,sha256=v2eO0c6RF8e6MtGdleHs5B4YTkikg6IZUuBvL2zvyOI,55
|
79
80
|
bisheng_langchain/gpts/prompts/breif_description_prompt.py,sha256=w4A5et0jB-GkxEMQBp4i6GKX3RkVeu7NzWEjOZZAicM,5336
|
80
|
-
bisheng_langchain/gpts/prompts/opening_dialog_prompt.py,sha256=
|
81
|
+
bisheng_langchain/gpts/prompts/opening_dialog_prompt.py,sha256=VVF0JLHtetupVB0kabiFHWDHlQaa4nFLcbYXgIBA3nw,5965
|
81
82
|
bisheng_langchain/gpts/prompts/select_tools_prompt.py,sha256=AyvVnrLEsQy7RHuGTPkcrMUxgA98Q0TzF-xweoc7GyY,1400
|
82
83
|
bisheng_langchain/gpts/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
83
|
-
bisheng_langchain/gpts/tools/api_tools/__init__.py,sha256=
|
84
|
-
bisheng_langchain/gpts/tools/api_tools/base.py,sha256=
|
85
|
-
bisheng_langchain/gpts/tools/api_tools/flow.py,sha256=
|
86
|
-
bisheng_langchain/gpts/tools/api_tools/macro_data.py,sha256=
|
87
|
-
bisheng_langchain/gpts/tools/api_tools/
|
84
|
+
bisheng_langchain/gpts/tools/api_tools/__init__.py,sha256=CkEjgIFM4GIv86V1B7SsFLaB6M86c54QuO8wIRizUZ8,1608
|
85
|
+
bisheng_langchain/gpts/tools/api_tools/base.py,sha256=fWQSDIOVb4JZrtJ9ML9q2ycsAa-_61gXTD0MT19J1LM,3618
|
86
|
+
bisheng_langchain/gpts/tools/api_tools/flow.py,sha256=ot2YAYgQGWgUpb2nCECAmpqHY6m0SgzwkupF9kDT3lU,2461
|
87
|
+
bisheng_langchain/gpts/tools/api_tools/macro_data.py,sha256=FyG-qtl2ECS1CDKt6olN0eDTDM91d-UvDkMDBiVLgYQ,27429
|
88
|
+
bisheng_langchain/gpts/tools/api_tools/openapi.py,sha256=3MlIpzR_NhZogaA-zbH6fnT_KUMm10NnbtDYcxKedS8,3907
|
89
|
+
bisheng_langchain/gpts/tools/api_tools/sina.py,sha256=GGA4ZYvNEpqBZ_l8MUYqgkI8xZe9XcGa9-KlHZVqr6I,9542
|
88
90
|
bisheng_langchain/gpts/tools/api_tools/tianyancha.py,sha256=abDAz-yAH1-2rKiSmZ6TgnrNUnpgAZpDY8oDiWfWapc,6684
|
89
91
|
bisheng_langchain/gpts/tools/bing_search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
90
92
|
bisheng_langchain/gpts/tools/bing_search/tool.py,sha256=v_VlqcMplITA5go5qWA4qZ5p43E1-1s0bzmyY7H0hqY,1710
|
91
93
|
bisheng_langchain/gpts/tools/calculator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
92
94
|
bisheng_langchain/gpts/tools/calculator/tool.py,sha256=iwGPE7jvxZg_jUL2Aq9HHwnRJrF9-ongwrsBX6uk1U0,705
|
93
95
|
bisheng_langchain/gpts/tools/code_interpreter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
94
|
-
bisheng_langchain/gpts/tools/code_interpreter/tool.py,sha256=
|
96
|
+
bisheng_langchain/gpts/tools/code_interpreter/tool.py,sha256=1VLkgngRR0k8YjA4eYkfPd1E7fD29tMKpqtCtn7WwYE,11443
|
95
97
|
bisheng_langchain/gpts/tools/dalle_image_generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
96
98
|
bisheng_langchain/gpts/tools/dalle_image_generator/tool.py,sha256=mhxdNNhBESjbOy30Rnp6hQhnrV4evQpv-B1fFXcU-68,7528
|
97
99
|
bisheng_langchain/gpts/tools/get_current_time/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -99,16 +101,49 @@ bisheng_langchain/gpts/tools/get_current_time/tool.py,sha256=3uvk7Yu07qhZy1sBrFM
|
|
99
101
|
bisheng_langchain/input_output/__init__.py,sha256=sW_GB7MlrHYsqY1Meb_LeimQqNsMz1gH-00Tqb2BUyM,153
|
100
102
|
bisheng_langchain/input_output/input.py,sha256=I5YDmgbvvj1o2lO9wi8LE37wM0wP5jkhUREU32YrZMQ,1094
|
101
103
|
bisheng_langchain/input_output/output.py,sha256=6U-az6-Cwz665C2YmcH3SYctWVjPFjmW8s70CA_qphk,11585
|
104
|
+
bisheng_langchain/rag/__init__.py,sha256=gGa3hx0HjA_11FxbnR3P0C0y_OwcZM-sYTHdAajmylk,102
|
105
|
+
bisheng_langchain/rag/bisheng_rag_pipeline.py,sha256=neoBK3TtuQ07_WeuJCzYlvtsDQNepUa_68NT8VCgytw,13749
|
106
|
+
bisheng_langchain/rag/bisheng_rag_pipeline_v2.py,sha256=FWmhOSLAKWqqhdj55Y6WoFJ5GYwK8fJdN3o-mqPmabI,15898
|
107
|
+
bisheng_langchain/rag/bisheng_rag_pipeline_v2_cohere_raw_prompting.py,sha256=Gql3IXaSTtW8WaC7wLLid0TURXoF0hRqAgl3GAYBBCo,16816
|
108
|
+
bisheng_langchain/rag/bisheng_rag_tool.py,sha256=X7mhqYHapLlqfpu90BaW-C3aDeW_PENIci9lNyQv-tI,11971
|
109
|
+
bisheng_langchain/rag/extract_info.py,sha256=emrdvzU9EfdTQU_IBF4RkwaTXmYLk1MOJgdfYhw6VtA,1753
|
110
|
+
bisheng_langchain/rag/run_qa_gen_web.py,sha256=-fIvHNnD3lD0iNU5m0Me1GDwRjlcsB8tE5RnPtFRG2s,1840
|
111
|
+
bisheng_langchain/rag/run_rag_evaluate_web.py,sha256=a9vMhq-ZhEiHHr43uKUzKtjdk280uAP_UHQW_eOaQMw,2224
|
112
|
+
bisheng_langchain/rag/utils.py,sha256=ZR3BwjjLX4XT49aRDv-pf8Z4Mqcbmt2_qbDSwXfDfso,6361
|
113
|
+
bisheng_langchain/rag/config/baseline.yaml,sha256=cFkfVpuNtmexYkQxXbqbcqQ4eDhx81kmIXf8T_rc_Oo,2266
|
114
|
+
bisheng_langchain/rag/config/baseline_caibao.yaml,sha256=uU2HSlPXB8uLq9Fh03wENs06STdMA9CS6ry3X-Wkn-E,2174
|
115
|
+
bisheng_langchain/rag/config/baseline_caibao_knowledge_v2.yaml,sha256=KrkMmHu-myQ8u4FlydXLwsppD4ucQGAYYtGGA0HCJzE,2812
|
116
|
+
bisheng_langchain/rag/config/baseline_caibao_v2.yaml,sha256=WY9wek70eeBhhyiNmkfyrffxhTfBntqPyLMRmoqM_vE,2846
|
117
|
+
bisheng_langchain/rag/config/baseline_demo_v2.yaml,sha256=hCa7bZMCcOWpu6bsAFX4DU1b7MV4eN2phyzvZzFZKkU,2382
|
118
|
+
bisheng_langchain/rag/config/baseline_s2b_mix.yaml,sha256=rkPfzU2-mvjRrZ0zMHaQsncPhq8DrdvVFsw4Sg_jeKc,2398
|
119
|
+
bisheng_langchain/rag/config/baseline_v2.yaml,sha256=yUMlK9CbeIqIrb4iJj14BaqH7JJ15QdUIf1EHd7T_R8,2350
|
120
|
+
bisheng_langchain/rag/init_retrievers/__init__.py,sha256=qpLLAuqZPtumTlJj17Ie5AbDDmiUiDxYefg_pumqu-c,218
|
121
|
+
bisheng_langchain/rag/init_retrievers/baseline_vector_retriever.py,sha256=oRKZZpxlLQAtsubIcAXeXpf1a9h6Pt6uOtNTLeD2jps,2362
|
122
|
+
bisheng_langchain/rag/init_retrievers/keyword_retriever.py,sha256=NRT0fBx6HFR7j9IbRl_NBuqF7hnL-9v5GCqHpgnrfPQ,2523
|
123
|
+
bisheng_langchain/rag/init_retrievers/mix_retriever.py,sha256=Whxq4kjNPLsxnHcVo60usdFFwLTCD-1jO38q08LXkVQ,4653
|
124
|
+
bisheng_langchain/rag/init_retrievers/smaller_chunks_retriever.py,sha256=RQ7QLEOOhBrkw-EimXVJqIGa96D-KkNDik2h9hzg9fU,3805
|
125
|
+
bisheng_langchain/rag/prompts/__init__.py,sha256=IUCq9gzqGQN_6IDk0D_F5t3mOUI_KbmSzYnnXoX4VKE,223
|
126
|
+
bisheng_langchain/rag/prompts/extract_key_prompt.py,sha256=THdcwolRzaKkddgEhabUDytn-caHs9UwECPkPxijWAs,1456
|
127
|
+
bisheng_langchain/rag/prompts/prompt.py,sha256=HPQ4mnMXDL9YsgBqK4S1M2sE6icq2uDL17KKgvEkovI,1925
|
128
|
+
bisheng_langchain/rag/prompts/prompt_cohere.py,sha256=FhW-RCIVnIfKtEqV_-7aQav1riIJmA0jV2hGxCSoysk,8151
|
129
|
+
bisheng_langchain/rag/qa_corpus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
130
|
+
bisheng_langchain/rag/qa_corpus/qa_generator.py,sha256=ZdipOLBd16XfKXygq1JeUx4iv3tfUsprk1HNuC5hcIk,6092
|
131
|
+
bisheng_langchain/rag/rerank/__init__.py,sha256=lcMRa_o5HGNnN9Hn6zHCimYq02OuD9pRAg6eFPZNgR8,72
|
132
|
+
bisheng_langchain/rag/rerank/rerank.py,sha256=RoEwFFb4t4l0aZQmE2-HHHfWIzNLKe9NuFDIoYoWz5g,1836
|
133
|
+
bisheng_langchain/rag/rerank/rerank_benchmark.py,sha256=OU9bh7dQ0-faw4hpz6BPEtmbQEhISTekvRiWpksOfCc,4490
|
134
|
+
bisheng_langchain/rag/scoring/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
135
|
+
bisheng_langchain/rag/scoring/llama_index_score.py,sha256=Xh8YFT14JGxE3OL2x4JSRa8nGlOK5DlWlsrzbmA2gu8,3001
|
136
|
+
bisheng_langchain/rag/scoring/ragas_score.py,sha256=ClVoRg1O7s5OMcsB2E5UbVHj_ErQ3WKr9hao8BcK6NA,7110
|
102
137
|
bisheng_langchain/retrievers/__init__.py,sha256=XqBeNyPyNCJf-SzNBiFlkxtjrtHUFTTi5pe2yPyOKrA,210
|
103
|
-
bisheng_langchain/retrievers/ensemble.py,sha256=
|
138
|
+
bisheng_langchain/retrievers/ensemble.py,sha256=umjBaZYBEdhJ2F7GlzQgXVLYjSfpybTptiJJbUgryZE,5975
|
104
139
|
bisheng_langchain/retrievers/mix_es_vector.py,sha256=dSrrsuMPSgGiu181EOzACyIKiDXR0qNBQz_914USD3E,4465
|
105
140
|
bisheng_langchain/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
106
141
|
bisheng_langchain/utils/requests.py,sha256=vWGKyNTxApVeaVdKxqACfIT1Q8wMy-jC3kUv2Ce9Mzc,8688
|
107
142
|
bisheng_langchain/vectorstores/__init__.py,sha256=zCZgDe7LyQ0iDkfcm5UJ5NxwKQSRHnqrsjx700Fy11M,213
|
108
|
-
bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=
|
143
|
+
bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=Pm1rS50GJ0HWbjBsFDgs28SVuVbjGSRPOor6yJlnE7w,13347
|
109
144
|
bisheng_langchain/vectorstores/milvus.py,sha256=lrnezKnYXhyH5M1g3a-Mcwpj9mwzAj44TKmzyUXlQYY,36297
|
110
145
|
bisheng_langchain/vectorstores/retriever.py,sha256=hj4nAAl352EV_ANnU2OHJn7omCH3nBK82ydo14KqMH4,4353
|
111
|
-
bisheng_langchain-0.3.
|
112
|
-
bisheng_langchain-0.3.
|
113
|
-
bisheng_langchain-0.3.
|
114
|
-
bisheng_langchain-0.3.
|
146
|
+
bisheng_langchain-0.3.1.dist-info/METADATA,sha256=uorWxDbAnQkGAmbh0c-ZmAnTf5MgzyhcY8hLKNDIp-Q,2411
|
147
|
+
bisheng_langchain-0.3.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
148
|
+
bisheng_langchain-0.3.1.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
|
149
|
+
bisheng_langchain-0.3.1.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
|
File without changes
|
File without changes
|