bisheng-langchain 1.2.0.dev1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bisheng_langchain/chat_models/__init__.py +2 -3
- bisheng_langchain/document_loaders/elem_unstrcutured_loader.py +12 -0
- bisheng_langchain/gpts/agent_types/llm_functions_agent.py +3 -3
- bisheng_langchain/gpts/tools/api_tools/base.py +2 -2
- bisheng_langchain/gpts/tools/code_interpreter/tool.py +11 -10
- bisheng_langchain/gpts/tools/message/email.py +4 -4
- bisheng_langchain/gpts/tools/sql_agent/tool.py +41 -198
- bisheng_langchain/rag/bisheng_rag_tool.py +1 -1
- bisheng_langchain/rag/extract_info.py +22 -12
- bisheng_langchain/text_splitter.py +2 -2
- {bisheng_langchain-1.2.0.dev1.dist-info → bisheng_langchain-1.3.0.dist-info}/METADATA +13 -13
- {bisheng_langchain-1.2.0.dev1.dist-info → bisheng_langchain-1.3.0.dist-info}/RECORD +14 -14
- {bisheng_langchain-1.2.0.dev1.dist-info → bisheng_langchain-1.3.0.dist-info}/WHEEL +1 -1
- {bisheng_langchain-1.2.0.dev1.dist-info → bisheng_langchain-1.3.0.dist-info}/top_level.txt +0 -0
@@ -4,11 +4,10 @@ from .proxy_llm import ProxyChatLLM
|
|
4
4
|
from .qwen import ChatQWen
|
5
5
|
from .wenxin import ChatWenxin
|
6
6
|
from .xunfeiai import ChatXunfeiAI
|
7
|
-
from .
|
8
|
-
from .sensetime import SenseChat
|
7
|
+
from .sensetime import SenseChat
|
9
8
|
|
10
9
|
__all__ = [
|
11
|
-
'ProxyChatLLM', 'ChatMinimaxAI', 'ChatWenxin', '
|
10
|
+
'ProxyChatLLM', 'ChatMinimaxAI', 'ChatWenxin', 'ChatXunfeiAI', 'HostChatGLM',
|
12
11
|
'HostBaichuanChat', 'HostLlama2Chat', 'HostQwenChat', 'CustomLLMChat', 'ChatQWen', 'SenseChat',
|
13
12
|
'HostYuanChat', 'HostYiChat', 'HostQwen1_5Chat'
|
14
13
|
]
|
@@ -64,6 +64,10 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
64
64
|
file_path: str,
|
65
65
|
unstructured_api_key: str = None,
|
66
66
|
unstructured_api_url: str = None,
|
67
|
+
force_ocr: bool = False,
|
68
|
+
enable_formular: bool = True,
|
69
|
+
filter_page_header_footer: bool = False,
|
70
|
+
ocr_sdk_url: str = None,
|
67
71
|
start: int = 0,
|
68
72
|
n: int = None,
|
69
73
|
verbose: bool = False,
|
@@ -71,6 +75,10 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
71
75
|
"""Initialize with a file path."""
|
72
76
|
self.unstructured_api_url = unstructured_api_url
|
73
77
|
self.unstructured_api_key = unstructured_api_key
|
78
|
+
self.force_ocr = force_ocr
|
79
|
+
self.enable_formular = enable_formular
|
80
|
+
self.filter_page_header_footer = filter_page_header_footer
|
81
|
+
self.ocr_sdk_url = ocr_sdk_url,
|
74
82
|
self.headers = {'Content-Type': 'application/json'}
|
75
83
|
self.file_name = file_name
|
76
84
|
self.start = start
|
@@ -84,9 +92,13 @@ class ElemUnstructuredLoader(BasePDFLoader):
|
|
84
92
|
b64_data = base64.b64encode(open(self.file_path, 'rb').read()).decode()
|
85
93
|
parameters = {'start': self.start, 'n': self.n}
|
86
94
|
parameters.update(self.extra_kwargs)
|
95
|
+
# TODO: add filter_page_header_footer into payload when elt4llm is ready.
|
87
96
|
payload = dict(filename=os.path.basename(self.file_name),
|
88
97
|
b64_data=[b64_data],
|
89
98
|
mode='partition',
|
99
|
+
force_ocr=self.force_ocr,
|
100
|
+
enable_formula=self.enable_formular,
|
101
|
+
ocr_sdk_url=self.ocr_sdk_url,
|
90
102
|
parameters=parameters)
|
91
103
|
|
92
104
|
resp = requests.post(self.unstructured_api_url, headers=self.headers, json=payload)
|
@@ -55,11 +55,11 @@ def get_openai_functions_agent_executor(tools: list[BaseTool], llm: LanguageMode
|
|
55
55
|
|
56
56
|
# Define the function to execute tools
|
57
57
|
async def acall_tool(messages):
|
58
|
-
tool_messages = await tool_nodes.
|
58
|
+
tool_messages = await tool_nodes.ainvoke(messages, None, store=None)
|
59
59
|
return tool_messages
|
60
60
|
|
61
61
|
def call_tool(messages):
|
62
|
-
tool_messages = tool_nodes.
|
62
|
+
tool_messages = tool_nodes.invoke(messages, config=None, store=None)
|
63
63
|
return tool_messages
|
64
64
|
|
65
65
|
workflow = MessageGraph()
|
@@ -147,7 +147,7 @@ def get_qwen_local_functions_agent_executor(
|
|
147
147
|
|
148
148
|
# Define the function to execute tools
|
149
149
|
async def call_tool(messages):
|
150
|
-
tool_messages = await tool_nodes.
|
150
|
+
tool_messages = await tool_nodes.ainvoke(messages, config=None, store=None)
|
151
151
|
return tool_messages
|
152
152
|
|
153
153
|
workflow = MessageGraph()
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Any, Dict, Tuple, Type, Union
|
1
|
+
from typing import Any, Dict, Tuple, Type, Union, Optional
|
2
2
|
|
3
3
|
from pydantic import ConfigDict, model_validator, BaseModel, Field
|
4
4
|
|
@@ -13,7 +13,7 @@ class ApiArg(BaseModel):
|
|
13
13
|
|
14
14
|
class MultArgsSchemaTool(Tool):
|
15
15
|
|
16
|
-
def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]:
|
16
|
+
def _to_args_and_kwargs(self, tool_input: Union[str, Dict], tool_call_id: Optional[str]) -> Tuple[Tuple, Dict]:
|
17
17
|
# For backwards compatibility, if run_input is a string,
|
18
18
|
# pass as a positional argument.
|
19
19
|
if isinstance(tool_input, str):
|
@@ -183,19 +183,20 @@ def upload_minio(
|
|
183
183
|
import minio
|
184
184
|
|
185
185
|
minio_client = minio.Minio(
|
186
|
-
endpoint=param.get('
|
187
|
-
access_key=param.get('
|
188
|
-
secret_key=param.get('
|
189
|
-
secure=param.get('
|
190
|
-
cert_check=param.get('
|
186
|
+
endpoint=param.get('endpoint'),
|
187
|
+
access_key=param.get('access_key'),
|
188
|
+
secret_key=param.get('secret_key'),
|
189
|
+
secure=param.get('schema'),
|
190
|
+
cert_check=param.get('cert_check'),
|
191
191
|
)
|
192
192
|
minio_share = minio.Minio(
|
193
|
-
endpoint=param.get('
|
194
|
-
access_key=param.get('
|
195
|
-
secret_key=param.get('
|
196
|
-
secure=param.get('
|
197
|
-
cert_check=param.get('
|
193
|
+
endpoint=param.get('sharepoint'),
|
194
|
+
access_key=param.get('access_key'),
|
195
|
+
secret_key=param.get('secret_key'),
|
196
|
+
secure=param.get('schema'),
|
197
|
+
cert_check=param.get('cert_check'),
|
198
198
|
)
|
199
|
+
bucket = param.get('tmp_bucket', 'tmp-dir')
|
199
200
|
logger.debug(
|
200
201
|
'upload_file obj={} bucket={} file_paht={}',
|
201
202
|
object_name,
|
@@ -15,7 +15,7 @@ class InputArgs(BaseModel):
|
|
15
15
|
content: str = Field(description="邮件正文内容")
|
16
16
|
|
17
17
|
|
18
|
-
class EmailMessageTool(
|
18
|
+
class EmailMessageTool(BaseModel):
|
19
19
|
|
20
20
|
email_account: str = Field(description="发件人邮箱")
|
21
21
|
email_password: str = Field(description="邮箱授权码/密码")
|
@@ -25,9 +25,9 @@ class EmailMessageTool(APIToolBase):
|
|
25
25
|
|
26
26
|
def send_email(
|
27
27
|
self,
|
28
|
-
receiver,
|
29
|
-
subject,
|
30
|
-
content,
|
28
|
+
receiver: str = None,
|
29
|
+
subject: str = None,
|
30
|
+
content: str = None,
|
31
31
|
):
|
32
32
|
"""
|
33
33
|
发送电子邮件函数
|
@@ -1,234 +1,77 @@
|
|
1
|
-
from typing import Type, Optional
|
1
|
+
from typing import Type, Optional
|
2
2
|
|
3
3
|
from langchain_community.agent_toolkits import SQLDatabaseToolkit
|
4
4
|
from langchain_community.utilities import SQLDatabase
|
5
5
|
from langchain_core.callbacks import CallbackManagerForToolRun
|
6
6
|
from langchain_core.language_models import BaseLanguageModel
|
7
|
-
from langchain_core.messages import
|
8
|
-
from langchain_core.
|
9
|
-
from
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from langgraph.graph import add_messages, StateGraph
|
13
|
-
from langgraph.prebuilt import ToolNode
|
14
|
-
from pydantic import ConfigDict, BaseModel, Field
|
7
|
+
from langchain_core.messages import HumanMessage
|
8
|
+
from langchain_core.tools import BaseTool
|
9
|
+
from langgraph.graph.graph import CompiledGraph
|
10
|
+
from langgraph.prebuilt import create_react_agent
|
11
|
+
from pydantic import BaseModel, Field, ConfigDict
|
15
12
|
|
13
|
+
_agent_system_prompt = """You are an autonomous agent that answers user questions by querying an SQL database through the provided tools.
|
16
14
|
|
17
|
-
|
18
|
-
messages: Annotated[list[AnyMessage], add_messages]
|
15
|
+
When a new question arrives, follow the steps *in order*:
|
19
16
|
|
17
|
+
1. ALWAYS call `sql_db_list_tables` first.
|
18
|
+
Purpose: discover what tables are available. Never skip this step.
|
20
19
|
|
21
|
-
|
22
|
-
|
23
|
-
tool_calls = state["messages"][-1].tool_calls
|
24
|
-
return {
|
25
|
-
"messages": [
|
26
|
-
ToolMessage(
|
27
|
-
content=f"Error: {repr(error)}\n please fix your mistakes.",
|
28
|
-
tool_call_id=tc["id"],
|
29
|
-
)
|
30
|
-
for tc in tool_calls
|
31
|
-
]
|
32
|
-
}
|
20
|
+
2. Choose the table(s) that are probably relevant, then call `sql_db_schema`
|
21
|
+
once for each of those tables to obtain their schemas.
|
33
22
|
|
23
|
+
3. Write one syntactically-correct {dialect} SELECT statement.
|
24
|
+
Guidelines for this query:
|
25
|
+
- Return no more than 50 rows **unless** the user explicitly requests another limit.
|
26
|
+
- Select only the columns needed to answer the question; avoid `SELECT *`.
|
27
|
+
- If helpful, add `ORDER BY` on a meaningful column so the most interesting rows appear first.
|
28
|
+
- ABSOLUTELY NO data-modification statements (INSERT, UPDATE, DELETE, DROP, …).
|
29
|
+
- Double-check the SQL before executing.
|
34
30
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
return ToolNode(tools).with_fallbacks(
|
40
|
-
[RunnableLambda(handle_tool_error)], exception_key="error"
|
41
|
-
)
|
42
|
-
|
31
|
+
4. Execute the query with the execution tool `sql_db_query`.
|
32
|
+
If execution fails, inspect the error, revise the SQL, and try again.
|
33
|
+
Repeat until the query runs successfully or you are certain the request
|
34
|
+
cannot be satisfied.
|
43
35
|
|
44
|
-
|
45
|
-
|
36
|
+
5. Read the resulting rows and craft a concise, direct answer for the user.
|
37
|
+
If the result set is empty, explain that no matching data was found.
|
46
38
|
|
47
|
-
|
39
|
+
6. Include the final SQL query in your answer unless the user asks you not to.
|
48
40
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
41
|
+
Remember:
|
42
|
+
- List tables → fetch schemas → write & verify SELECT → execute → answer.
|
43
|
+
- Never skip steps 1 or 2.
|
44
|
+
- Never perform DML.
|
45
|
+
- Keep answers focused on the user's question."""
|
54
46
|
|
55
|
-
db: SQLDatabase
|
56
|
-
|
57
|
-
def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None):
|
58
|
-
result = self.db.run_no_throw(query)
|
59
|
-
if not result:
|
60
|
-
return "Error: Query failed. Please rewrite your query and try again."
|
61
|
-
return result
|
62
47
|
|
63
48
|
class SqlAgentAPIWrapper(BaseModel):
|
49
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
50
|
+
|
64
51
|
llm: BaseLanguageModel = Field(description="llm to use for sql agent")
|
65
52
|
sql_address: str = Field(description="sql database address for SQLDatabase uri")
|
66
53
|
|
67
54
|
db: Optional[SQLDatabase] = None
|
68
|
-
|
69
|
-
get_schema_tool: Optional[BaseTool] = None
|
70
|
-
db_query_tool: Optional[BaseTool] = None
|
71
|
-
query_check: Optional[Any] = None
|
72
|
-
query_gen: Optional[Any] = None
|
73
|
-
workflow: Optional[StateGraph] = None
|
74
|
-
app: Optional[Any] = None
|
75
|
-
schema_llm: Optional[Any] = None
|
76
|
-
query_check_llm: Optional[Any] = None
|
77
|
-
query_gen_llm: Optional[Any] = None
|
78
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
55
|
+
agent: Optional[CompiledGraph] = None
|
79
56
|
|
80
57
|
def __init__(self, **kwargs):
|
81
58
|
super().__init__(**kwargs)
|
82
59
|
self.llm = kwargs.get('llm')
|
83
|
-
|
84
|
-
# todo 修改sql agent实现逻辑。此处逻辑只支持bishengLLM组件。原因是因为目前的实现必须实例化多个llm对象,每个llm对象绑定不同的tool
|
85
|
-
self.schema_llm = self.llm.__class__(model_id=self.llm.model_id, model_name=self.llm.model_name)
|
86
|
-
self.query_check_llm = self.llm.__class__(model_id=self.llm.model_id, model_name=self.llm.model_name)
|
87
|
-
self.query_gen_llm = self.llm.__class__(model_id=self.llm.model_id, model_name=self.llm.model_name)
|
88
60
|
self.sql_address = kwargs.get('sql_address')
|
89
61
|
|
90
62
|
self.db = SQLDatabase.from_uri(self.sql_address)
|
91
63
|
toolkit = SQLDatabaseToolkit(db=self.db, llm=self.llm)
|
92
64
|
tools = toolkit.get_tools()
|
93
|
-
self.
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
self.query_gen = self.init_query_gen()
|
99
|
-
|
100
|
-
# Define a new graph
|
101
|
-
self.workflow = StateGraph(State)
|
102
|
-
self.init_workflow()
|
103
|
-
self.app = self.workflow.compile(checkpointer=False, debug=True)
|
104
|
-
|
105
|
-
def init_workflow(self):
|
106
|
-
self.workflow.add_node("first_tool_call", self.first_tool_call)
|
107
|
-
self.workflow.add_node(
|
108
|
-
"list_tables_tool", create_tool_node_with_fallback([self.list_tables_tool])
|
109
|
-
)
|
110
|
-
|
111
|
-
self.workflow.add_node("get_schema_tool", create_tool_node_with_fallback([self.get_schema_tool]))
|
112
|
-
|
113
|
-
model_get_schema = self.schema_llm.bind_tools(
|
114
|
-
[self.get_schema_tool]
|
115
|
-
)
|
116
|
-
self.workflow.add_node(
|
117
|
-
"model_get_schema",
|
118
|
-
lambda state: {
|
119
|
-
"messages": [model_get_schema.invoke(state["messages"])],
|
120
|
-
},
|
121
|
-
)
|
122
|
-
|
123
|
-
self.workflow.add_node("query_gen", self.query_gen_node)
|
124
|
-
self.workflow.add_node("correct_query", self.model_check_query)
|
125
|
-
|
126
|
-
self.workflow.add_node("execute_query", create_tool_node_with_fallback([self.db_query_tool]))
|
127
|
-
|
128
|
-
self.workflow.add_edge(START, "first_tool_call")
|
129
|
-
self.workflow.add_edge("first_tool_call", "list_tables_tool")
|
130
|
-
self.workflow.add_edge("list_tables_tool", "model_get_schema")
|
131
|
-
self.workflow.add_edge("model_get_schema", "get_schema_tool")
|
132
|
-
self.workflow.add_edge("get_schema_tool", "query_gen")
|
133
|
-
self.workflow.add_conditional_edges(
|
134
|
-
"query_gen",
|
135
|
-
self.should_continue,
|
136
|
-
)
|
137
|
-
self.workflow.add_edge("correct_query", "execute_query")
|
138
|
-
self.workflow.add_edge("execute_query", "query_gen")
|
139
|
-
|
140
|
-
@staticmethod
|
141
|
-
def should_continue(state: State) -> Literal[END, "correct_query", "query_gen"]:
|
142
|
-
messages = state["messages"]
|
143
|
-
last_message = messages[-1]
|
144
|
-
# If there is a tool call, then we finish
|
145
|
-
if getattr(last_message, "tool_calls", None):
|
146
|
-
return END
|
147
|
-
if last_message.content.startswith("Error:"):
|
148
|
-
return "query_gen"
|
149
|
-
else:
|
150
|
-
return "correct_query"
|
151
|
-
|
152
|
-
def init_query_check(self):
|
153
|
-
query_check_system = """You are a SQL expert with a strong attention to detail.
|
154
|
-
Double check the SQLite query for common mistakes, including:
|
155
|
-
- Using NOT IN with NULL values
|
156
|
-
- Using UNION when UNION ALL should have been used
|
157
|
-
- Using BETWEEN for exclusive ranges
|
158
|
-
- Data type mismatch in predicates
|
159
|
-
- Properly quoting identifiers
|
160
|
-
- Using the correct number of arguments for functions
|
161
|
-
- Casting to the correct data type
|
162
|
-
- Using the proper columns for joins
|
163
|
-
|
164
|
-
If there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.
|
165
|
-
|
166
|
-
You will call the appropriate tool to execute the query after running this check."""
|
167
|
-
|
168
|
-
query_check_prompt = ChatPromptTemplate.from_messages(
|
169
|
-
[("system", query_check_system), ("placeholder", "{messages}")]
|
65
|
+
self.agent = create_react_agent(
|
66
|
+
self.llm,
|
67
|
+
tools,
|
68
|
+
prompt=_agent_system_prompt.format(dialect=self.db.dialect),
|
69
|
+
checkpointer=False,
|
170
70
|
)
|
171
|
-
query_check = query_check_prompt | self.query_check_llm.bind_tools(
|
172
|
-
[self.db_query_tool]
|
173
|
-
)
|
174
|
-
return query_check
|
175
|
-
|
176
|
-
def first_tool_call(self, state: State) -> dict[str, list[AIMessage]]:
|
177
|
-
return {
|
178
|
-
"messages": [
|
179
|
-
AIMessage(
|
180
|
-
content="",
|
181
|
-
tool_calls=[
|
182
|
-
{
|
183
|
-
"name": "sql_db_list_tables",
|
184
|
-
"args": {},
|
185
|
-
"id": "tool_abcd123",
|
186
|
-
}
|
187
|
-
],
|
188
|
-
)
|
189
|
-
]
|
190
|
-
}
|
191
|
-
|
192
|
-
def model_check_query(self, state: State) -> dict[str, list[AIMessage]]:
|
193
|
-
"""
|
194
|
-
Use this tool to double-check if your query is correct before executing it.
|
195
|
-
"""
|
196
|
-
return {"messages": [self.query_check.invoke({"messages": [state["messages"][-1]]})]}
|
197
|
-
|
198
|
-
def init_query_gen(self):
|
199
|
-
# Add a node for a model to generate a query based on the question and schema
|
200
|
-
query_gen_system = """You are a SQL expert with a strong attention to detail.Given an input question, output a syntactically correct SQL query to run, then look at the results of the query and return the answer.DO NOT call any tool besides SubmitFinalAnswer to submit the final answer.When generating the query:Output the SQL query that answers the input question without a tool call.Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most 10 results.You can order the results by a relevant column to return the most interesting examples in the database.Never query for all the columns from a specific table, only ask for the relevant columns given the question.If you get an error while executing a query, rewrite the query and try again.If you get an empty result set, you should try to rewrite the query to get a non-empty result set. NEVER make stuff up if you don't have enough information to answer the query... just say you don't have enough information.If you have enough information to answer the input question, simply invoke the appropriate tool to submit the final answer to the user.DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database."""
|
201
|
-
query_gen_prompt = ChatPromptTemplate.from_messages(
|
202
|
-
[("system", query_gen_system), ("placeholder", "{messages}")]
|
203
|
-
)
|
204
|
-
query_gen = query_gen_prompt | self.query_gen_llm.bind_tools(
|
205
|
-
[SubmitFinalAnswer]
|
206
|
-
)
|
207
|
-
return query_gen
|
208
|
-
|
209
|
-
def query_gen_node(self, state: State) -> Any:
|
210
|
-
message = self.query_gen.invoke(state)
|
211
|
-
|
212
|
-
# Sometimes, the LLM will hallucinate and call the wrong tool. We need to catch this and return an error message.
|
213
|
-
tool_messages = []
|
214
|
-
if message.tool_calls:
|
215
|
-
for tc in message.tool_calls:
|
216
|
-
if tc["name"] != "SubmitFinalAnswer":
|
217
|
-
tool_messages.append(
|
218
|
-
ToolMessage(
|
219
|
-
content=f"Error: The wrong tool was called: {tc['name']}. Please fix your mistakes. Remember to only call SubmitFinalAnswer to submit the final answer. Generated queries should be outputted WITHOUT a tool call.",
|
220
|
-
tool_call_id=tc["id"],
|
221
|
-
)
|
222
|
-
)
|
223
|
-
else:
|
224
|
-
tool_messages = []
|
225
|
-
return {"messages": [message] + tool_messages}
|
226
71
|
|
227
72
|
def run(self, query: str) -> str:
|
228
|
-
messages = self.
|
229
|
-
|
230
|
-
})
|
231
|
-
return messages["messages"][-1].tool_calls[0]["args"]["final_answer"]
|
73
|
+
messages = self.agent.invoke({"messages": [HumanMessage(content=query)]})
|
74
|
+
return messages["messages"][-1].content
|
232
75
|
|
233
76
|
def arun(self, query: str) -> str:
|
234
77
|
return self.run(query)
|
@@ -24,7 +24,7 @@ from loguru import logger
|
|
24
24
|
|
25
25
|
class MultArgsSchemaTool(Tool):
|
26
26
|
|
27
|
-
def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]:
|
27
|
+
def _to_args_and_kwargs(self, tool_input: Union[str, Dict], tool_call_id: Optional[str]) -> Tuple[Tuple, Dict]:
|
28
28
|
# For backwards compatibility, if run_input is a string,
|
29
29
|
# pass as a positional argument.
|
30
30
|
if isinstance(tool_input, str):
|
@@ -1,5 +1,5 @@
|
|
1
1
|
from bisheng_langchain.chat_models import ChatQWen
|
2
|
-
from langchain.chains import LLMChain
|
2
|
+
from langchain.chains.llm import LLMChain
|
3
3
|
from langchain.prompts.chat import (
|
4
4
|
ChatPromptTemplate,
|
5
5
|
SystemMessagePromptTemplate,
|
@@ -15,22 +15,32 @@ human_template = """
|
|
15
15
|
"""
|
16
16
|
|
17
17
|
messages = [
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
SystemMessagePromptTemplate.from_template(system_template),
|
19
|
+
HumanMessagePromptTemplate.from_template(human_template),
|
20
|
+
]
|
21
21
|
title_extract_prompt = ChatPromptTemplate.from_messages(messages)
|
22
22
|
|
23
23
|
|
24
|
-
def extract_title(llm, text, max_length=7000) -> str:
|
25
|
-
|
24
|
+
def extract_title(llm, text, max_length=7000, abstract_prompt: str = None) -> str:
|
25
|
+
"""
|
26
|
+
此方法在bisheng_langchain模型的还有两处调用用,在不能提供abstract_propmpt的情况下
|
27
|
+
使用原来现有提示词.
|
28
|
+
"""
|
29
|
+
if abstract_prompt:
|
30
|
+
updated_messages = [
|
31
|
+
SystemMessagePromptTemplate.from_template(abstract_prompt),
|
32
|
+
HumanMessagePromptTemplate.from_template(human_template),
|
33
|
+
]
|
34
|
+
updated_title_extract_prompt = ChatPromptTemplate.from_messages(updated_messages)
|
35
|
+
chain = LLMChain(llm=llm, prompt=updated_title_extract_prompt)
|
36
|
+
else:
|
37
|
+
chain = LLMChain(llm=llm, prompt=title_extract_prompt)
|
26
38
|
ans = chain.run(context=text[:max_length])
|
27
|
-
return ans
|
39
|
+
return ans
|
28
40
|
|
29
41
|
|
30
|
-
if __name__ ==
|
31
|
-
llm = ChatQWen(model_name=
|
32
|
-
api_key='',
|
33
|
-
temperature=0.01)
|
42
|
+
if __name__ == "__main__":
|
43
|
+
llm = ChatQWen(model_name="qwen1.5-72b-chat", api_key="", temperature=0.01)
|
34
44
|
text = "江苏蔚蓝锂芯股份有限公司\n2021 年年度报告 \n2022 年 03 月\n\n 第一节 重要提示、目录和释义\n公司董事会、监事会及董事、监事、高级管理人员保证年度报告内容的真实、准确、完整,不存在虚假记载、误导性陈述或重大遗漏,并承担个别和连带的法律责任。\n公司负责人 CHEN KAI、主管会计工作负责人林文华及会计机构负责人(会计主管人员)张宗红声明:保证本年度报告中财务报告的真实、准确、完整。\n所有董事均已出席了审议本报告的董事会会议。"
|
35
45
|
ans = extract_title(llm, text)
|
36
|
-
print(ans)
|
46
|
+
print(ans)
|
@@ -106,6 +106,7 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
|
|
106
106
|
self._separator_rule = separator_rule or ['after' for _ in range(4)]
|
107
107
|
self.separator_rule = {one: self._separator_rule[index] for index, one in enumerate(separators)}
|
108
108
|
self._is_separator_regex = is_separator_regex
|
109
|
+
self._chunk_overlap = kwargs.get('chunk_overlap', 0)
|
109
110
|
|
110
111
|
def split_documents(self, documents: Iterable[Document]) -> List[Document]:
|
111
112
|
texts, metadatas = [], []
|
@@ -167,7 +168,6 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
|
|
167
168
|
documents = []
|
168
169
|
for i, text in enumerate(texts):
|
169
170
|
index = -1
|
170
|
-
# metadata = copy.deepcopy(_metadatas[i])
|
171
171
|
indexes = metadatas[i].get('indexes', [])
|
172
172
|
pages = metadatas[i].get('pages', [])
|
173
173
|
types = metadatas[i].get('types', [])
|
@@ -215,7 +215,7 @@ class ElemCharacterTextSplitter(RecursiveCharacterTextSplitter):
|
|
215
215
|
# for elem in box_no_duplicates:
|
216
216
|
# new_metadata['chunk_bboxes'].append(
|
217
217
|
# {'page': elem[0], 'bbox': new_metadata['bboxes'][elem[1]]})
|
218
|
-
|
219
218
|
new_doc = Document(page_content=chunk, metadata=new_metadata)
|
219
|
+
prev_document = new_doc
|
220
220
|
documents.append(new_doc)
|
221
221
|
return documents
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: bisheng-langchain
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.3.0
|
4
4
|
Summary: bisheng langchain modules
|
5
5
|
Home-page: https://github.com/dataelement/bisheng
|
6
6
|
Author: DataElem
|
@@ -15,23 +15,23 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
16
16
|
Requires-Python: >=3.6
|
17
17
|
Description-Content-Type: text/markdown
|
18
|
-
Requires-Dist: langchain
|
18
|
+
Requires-Dist: langchain==0.3.*
|
19
19
|
Requires-Dist: zhipuai
|
20
20
|
Requires-Dist: websocket-client
|
21
21
|
Requires-Dist: elasticsearch
|
22
|
-
Requires-Dist: opencv-python
|
23
|
-
Requires-Dist: Pillow
|
22
|
+
Requires-Dist: opencv-python==4.5.5.64
|
23
|
+
Requires-Dist: Pillow==9.5.0
|
24
24
|
Requires-Dist: bisheng-pyautogen
|
25
|
-
Requires-Dist: jieba
|
25
|
+
Requires-Dist: jieba==0.42.1
|
26
26
|
Requires-Dist: pydantic
|
27
|
-
Requires-Dist: pymupdf
|
28
|
-
Requires-Dist: shapely
|
29
|
-
Requires-Dist: filetype
|
30
|
-
Requires-Dist: langgraph
|
31
|
-
Requires-Dist: openai
|
32
|
-
Requires-Dist: langchain-openai
|
33
|
-
Requires-Dist: llama-index
|
34
|
-
Requires-Dist: bisheng-ragas
|
27
|
+
Requires-Dist: pymupdf==1.23.8
|
28
|
+
Requires-Dist: shapely==2.0.2
|
29
|
+
Requires-Dist: filetype==1.2.0
|
30
|
+
Requires-Dist: langgraph==0.3.*
|
31
|
+
Requires-Dist: openai==1.*
|
32
|
+
Requires-Dist: langchain-openai==0.3.*
|
33
|
+
Requires-Dist: llama-index==0.9.48
|
34
|
+
Requires-Dist: bisheng-ragas==1.*
|
35
35
|
|
36
36
|
## What is bisheng-langchain?
|
37
37
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
bisheng_langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
bisheng_langchain/text_splitter.py,sha256=
|
2
|
+
bisheng_langchain/text_splitter.py,sha256=HJFGo6g_JGxWCAat98VIlKAKfaIYaR6f-g7SJwvhcRM,8840
|
3
3
|
bisheng_langchain/agents/__init__.py,sha256=ctsKj77fS8qlkhz_9sS_AhCjFvFNxEpJ9KBYVrApLRg,226
|
4
4
|
bisheng_langchain/agents/chatglm_functions_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
bisheng_langchain/agents/chatglm_functions_agent/base.py,sha256=IxPf9_atUKy8UMgIDYrgmWBkrVfOLdyJSDfVcD_rsDg,13724
|
@@ -32,7 +32,7 @@ bisheng_langchain/chains/retrieval/retrieval_chain.py,sha256=7VLJ-IPVjKfmAVgVET4
|
|
32
32
|
bisheng_langchain/chains/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
33
|
bisheng_langchain/chains/router/multi_rule.py,sha256=BiFryj3-7rOxfttD-MyOkKWLCSGB9LVYd2rjOsIfQC8,375
|
34
34
|
bisheng_langchain/chains/router/rule_router.py,sha256=R2YRUnwn7s_7DbsSn27uPn4cIV0D-5iXEORXir0tNGM,1835
|
35
|
-
bisheng_langchain/chat_models/__init__.py,sha256=
|
35
|
+
bisheng_langchain/chat_models/__init__.py,sha256=toMWnhDOM_iEKpQ7PbnuIMDwrcLlythJc7GegodcDCY,586
|
36
36
|
bisheng_langchain/chat_models/host_llm.py,sha256=lyC7kll54wYazpkhDQaXkYIISUkdfU70f0Btp7tV0kQ,23536
|
37
37
|
bisheng_langchain/chat_models/minimax.py,sha256=V3phQY4qpL6lgwQOdmClvvqc7EPDSSrphjj935xAhOE,13860
|
38
38
|
bisheng_langchain/chat_models/proxy_llm.py,sha256=0S6HM3WZXRws6er9YaPVpNYVkVWDJeyI9ie-3FyDtTw,17044
|
@@ -54,7 +54,7 @@ bisheng_langchain/document_loaders/custom_kv.py,sha256=-7h7QqGUFPhpNYAUZBDmkr_pD
|
|
54
54
|
bisheng_langchain/document_loaders/elem_html.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
55
|
bisheng_langchain/document_loaders/elem_image.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
56
|
bisheng_langchain/document_loaders/elem_pdf.py,sha256=WpRIStBl1DUDa0NCd594gKU2NsgURRv5jnKSM71ZPI8,22273
|
57
|
-
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=
|
57
|
+
bisheng_langchain/document_loaders/elem_unstrcutured_loader.py,sha256=AIKnApzovohheEJFMolE7J-tGY_ClUfiGBb-SwYuC60,8697
|
58
58
|
bisheng_langchain/document_loaders/universal_kv.py,sha256=7z19Z_NwtILmtkbIURf4qMyEJGjlE-5CkhqF2KFGc7I,4134
|
59
59
|
bisheng_langchain/document_loaders/parsers/__init__.py,sha256=OOM_FJkwaU-zNS58fASw0TH8FNT6VXKb0VrvisgdrII,171
|
60
60
|
bisheng_langchain/document_loaders/parsers/ellm_client.py,sha256=Y_CRYwBr-gFArOirF1b76KyI5N8eVpsLeDiIsKtYkpU,1641
|
@@ -77,7 +77,7 @@ bisheng_langchain/gpts/load_tools.py,sha256=uQ-jcnkwRdy2l2S61WQWuodlGAbhHjEwJTTB
|
|
77
77
|
bisheng_langchain/gpts/message_types.py,sha256=7EJOx62j9E1U67jxWgxE_I7a8IjAvvKANknXkD2gFm0,213
|
78
78
|
bisheng_langchain/gpts/utils.py,sha256=t3YDxaJ0OYd6EKsek7PJFRYnsezwzEFK5oVU-PRbu5g,6671
|
79
79
|
bisheng_langchain/gpts/agent_types/__init__.py,sha256=88tFt1GfrfIqa4hCg0cMJk7rTeUmCSSdiVhR41CW4rM,381
|
80
|
-
bisheng_langchain/gpts/agent_types/llm_functions_agent.py,sha256=
|
80
|
+
bisheng_langchain/gpts/agent_types/llm_functions_agent.py,sha256=RgWSeeWl5JxEzzyF-336rq9NUDjJcF18LquZe4a-PPY,7126
|
81
81
|
bisheng_langchain/gpts/agent_types/llm_react_agent.py,sha256=rMddCaNVN0dXLVirvpxhXoqeLBDadf9xK3rnOU0uKL0,6523
|
82
82
|
bisheng_langchain/gpts/prompts/__init__.py,sha256=pOnXvk6_PjqAoLrh68sI9o3o6znKGxoLMVFP-0XTCJo,704
|
83
83
|
bisheng_langchain/gpts/prompts/assistant_prompt_base.py,sha256=Yp9M1XbZb5jHeBG_txcwWA84Euvl89t0g-GbJMa5Ur0,1133
|
@@ -89,7 +89,7 @@ bisheng_langchain/gpts/prompts/react_agent_prompt.py,sha256=MA5FReipAYfe6ypOvg_S
|
|
89
89
|
bisheng_langchain/gpts/prompts/select_tools_prompt.py,sha256=AyvVnrLEsQy7RHuGTPkcrMUxgA98Q0TzF-xweoc7GyY,1400
|
90
90
|
bisheng_langchain/gpts/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
91
91
|
bisheng_langchain/gpts/tools/api_tools/__init__.py,sha256=PovWXou3paSpV9xABCt4HBqcD5saW4gYg1W7eut8oFQ,4810
|
92
|
-
bisheng_langchain/gpts/tools/api_tools/base.py,sha256=
|
92
|
+
bisheng_langchain/gpts/tools/api_tools/base.py,sha256=jFAJRuFL3ZUzVuZFCgVBcjx_LyaLjQt_r_TH62ASMYc,3595
|
93
93
|
bisheng_langchain/gpts/tools/api_tools/firecrawl.py,sha256=iOpRvCVhZLb-0rpiz-QKBSEYYFi6Pr2ztYT8W6efV8A,2648
|
94
94
|
bisheng_langchain/gpts/tools/api_tools/flow.py,sha256=-Qk9xlzvnYWcj2dJtOcFkmXgxq4GANdoe7q0HWdj4tA,2443
|
95
95
|
bisheng_langchain/gpts/tools/api_tools/jina.py,sha256=OLK8KetHf4G8xozMEC7twaOQeaniON4IvyhdfWxro1k,1420
|
@@ -104,18 +104,18 @@ bisheng_langchain/gpts/tools/bing_search/tool.py,sha256=FlaeNEiOO52YjxpXu62efaMH
|
|
104
104
|
bisheng_langchain/gpts/tools/calculator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
105
|
bisheng_langchain/gpts/tools/calculator/tool.py,sha256=5FFL3YAYGQqC2L7zFP3LK6zApZ4GFI9MjUa5VdQ9nvY,695
|
106
106
|
bisheng_langchain/gpts/tools/code_interpreter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
|
-
bisheng_langchain/gpts/tools/code_interpreter/tool.py,sha256=
|
107
|
+
bisheng_langchain/gpts/tools/code_interpreter/tool.py,sha256=oUhlJzn5sQzk-GTxyzq_i89Is2IYaO4hoIYcoo9N_e4,11441
|
108
108
|
bisheng_langchain/gpts/tools/dalle_image_generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
109
109
|
bisheng_langchain/gpts/tools/dalle_image_generator/tool.py,sha256=m1c_1pVJ3cM8Qrp0wDXEgEQ_9XJo_CyCmcwRayBsIYE,7492
|
110
110
|
bisheng_langchain/gpts/tools/get_current_time/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
111
111
|
bisheng_langchain/gpts/tools/get_current_time/tool.py,sha256=Vyt88OdRTRdlF3ppHstzSyG-y_Vai3HSLvVQ_FBmzy8,788
|
112
112
|
bisheng_langchain/gpts/tools/message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
113
113
|
bisheng_langchain/gpts/tools/message/dingding.py,sha256=xvcUkRA-_-l7u6bATPH-78L6o7a1QBRZVz_3vBqybm0,1684
|
114
|
-
bisheng_langchain/gpts/tools/message/email.py,sha256=
|
114
|
+
bisheng_langchain/gpts/tools/message/email.py,sha256=QG-jIa7xQMnaudqjHWIsT-sm6z1M74Nz2rcXnl_Dtds,3451
|
115
115
|
bisheng_langchain/gpts/tools/message/feishu.py,sha256=B1kZVnaEVm5MCeufYXdA12idxu3fCPct2E4hAkh_NVc,4512
|
116
116
|
bisheng_langchain/gpts/tools/message/wechat.py,sha256=CxZOqShIxOIqxwXLyNHDsEoddFM9eOpfu80vuLJLOsI,1590
|
117
117
|
bisheng_langchain/gpts/tools/sql_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
118
|
-
bisheng_langchain/gpts/tools/sql_agent/tool.py,sha256=
|
118
|
+
bisheng_langchain/gpts/tools/sql_agent/tool.py,sha256=xWnwaW4bKFmQ-398gea9c7NuZShitKIww2AxJjoz8ic,4461
|
119
119
|
bisheng_langchain/input_output/__init__.py,sha256=sW_GB7MlrHYsqY1Meb_LeimQqNsMz1gH-00Tqb2BUyM,153
|
120
120
|
bisheng_langchain/input_output/input.py,sha256=hESoU9YuTtFvR6hvI1nXJRg42bZnj_vt3k1phIMG9mY,1077
|
121
121
|
bisheng_langchain/input_output/output.py,sha256=LB14UJu7egxN_iSCEBy2KPaZOgUmyS5armI9DNRE2U8,11525
|
@@ -125,8 +125,8 @@ bisheng_langchain/rag/__init__.py,sha256=Rm_cDxOJINt0H4bOeUo3JctPxaI6xKKXZcS-R_w
|
|
125
125
|
bisheng_langchain/rag/bisheng_rag_chain.py,sha256=75HCIkUAMGrD1E6BaSA9dhnug137PZvDnKUZxihStIE,6149
|
126
126
|
bisheng_langchain/rag/bisheng_rag_pipeline.py,sha256=neoBK3TtuQ07_WeuJCzYlvtsDQNepUa_68NT8VCgytw,13749
|
127
127
|
bisheng_langchain/rag/bisheng_rag_pipeline_v2.py,sha256=iOoF7mbLp9qDGPsV0fEmgph_Ba8VnECYvCPebXk8xmo,16144
|
128
|
-
bisheng_langchain/rag/bisheng_rag_tool.py,sha256=
|
129
|
-
bisheng_langchain/rag/extract_info.py,sha256=
|
128
|
+
bisheng_langchain/rag/bisheng_rag_tool.py,sha256=JAxsoASwaCaGHrFlAylYOZQZ9ZdnMcfYvAaZZKvIz0g,13676
|
129
|
+
bisheng_langchain/rag/extract_info.py,sha256=QEFlAaC_9-87dl4BtTz9ciQaQhfE_XjWUnQrwnUfRiw,2215
|
130
130
|
bisheng_langchain/rag/run_qa_gen_web.py,sha256=-fIvHNnD3lD0iNU5m0Me1GDwRjlcsB8tE5RnPtFRG2s,1840
|
131
131
|
bisheng_langchain/rag/run_rag_evaluate_web.py,sha256=a9vMhq-ZhEiHHr43uKUzKtjdk280uAP_UHQW_eOaQMw,2224
|
132
132
|
bisheng_langchain/rag/utils.py,sha256=ecl4sDR8iUrVCBRPAAT0hZOHkH50-TLS3567GLP1sRM,7122
|
@@ -166,7 +166,7 @@ bisheng_langchain/vectorstores/__init__.py,sha256=zCZgDe7LyQ0iDkfcm5UJ5NxwKQSRHn
|
|
166
166
|
bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=BxvT9FUTju4AZPtQFTbYLmIIKKw8bqcact5Cav_5H2I,15357
|
167
167
|
bisheng_langchain/vectorstores/milvus.py,sha256=jWq_lce-ihOz07D1kwj5ctPzElYexNCjJ-xSv-pK1CI,37172
|
168
168
|
bisheng_langchain/vectorstores/retriever.py,sha256=fNtk8qSwBo2Qrlt1NpZVXaNATW2tBywkyS0q0NtN5MI,4326
|
169
|
-
bisheng_langchain-1.
|
170
|
-
bisheng_langchain-1.
|
171
|
-
bisheng_langchain-1.
|
172
|
-
bisheng_langchain-1.
|
169
|
+
bisheng_langchain-1.3.0.dist-info/METADATA,sha256=Osl1j2o-wDqBmnqjsqVEWW-sNmiNHr-mTmRpHYbFy_g,2435
|
170
|
+
bisheng_langchain-1.3.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
171
|
+
bisheng_langchain-1.3.0.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
|
172
|
+
bisheng_langchain-1.3.0.dist-info/RECORD,,
|
File without changes
|