MindsDB 25.8.2.0__py3-none-any.whl → 25.8.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +3 -1
- mindsdb/api/a2a/agent.py +6 -16
- mindsdb/api/a2a/common/types.py +3 -4
- mindsdb/api/a2a/task_manager.py +24 -35
- mindsdb/api/a2a/utils.py +63 -0
- mindsdb/api/http/namespaces/agents.py +6 -7
- mindsdb/interfaces/agents/constants.py +16 -2
- mindsdb/interfaces/agents/langchain_agent.py +83 -18
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +7 -1
- mindsdb/interfaces/skills/skill_tool.py +7 -1
- mindsdb/interfaces/skills/sql_agent.py +6 -2
- mindsdb/utilities/fs.py +10 -4
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.8.3.0.dist-info}/METADATA +250 -250
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.8.3.0.dist-info}/RECORD +18 -18
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.8.3.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.8.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.8.3.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
__title__ = "MindsDB"
|
|
2
2
|
__package_name__ = "mindsdb"
|
|
3
|
-
__version__ = "25.8.
|
|
3
|
+
__version__ = "25.8.3.0"
|
|
4
4
|
__description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
|
|
5
5
|
__email__ = "jorge@mindsdb.com"
|
|
6
6
|
__author__ = "MindsDB Inc"
|
mindsdb/__main__.py
CHANGED
|
@@ -134,6 +134,9 @@ class TrunkProcessData:
|
|
|
134
134
|
|
|
135
135
|
def close_api_gracefully(trunc_processes_struct):
|
|
136
136
|
_stop_event.set()
|
|
137
|
+
|
|
138
|
+
delete_pid_file()
|
|
139
|
+
|
|
137
140
|
try:
|
|
138
141
|
for trunc_processes_data in trunc_processes_struct.values():
|
|
139
142
|
process = trunc_processes_data.process
|
|
@@ -600,7 +603,6 @@ if __name__ == "__main__":
|
|
|
600
603
|
],
|
|
601
604
|
return_exceptions=False,
|
|
602
605
|
)
|
|
603
|
-
delete_pid_file()
|
|
604
606
|
|
|
605
607
|
ioloop = asyncio.new_event_loop()
|
|
606
608
|
ioloop.run_until_complete(wait_apis_start())
|
mindsdb/api/a2a/agent.py
CHANGED
|
@@ -3,7 +3,7 @@ from typing import Any, AsyncIterable, Dict, List
|
|
|
3
3
|
import requests
|
|
4
4
|
import logging
|
|
5
5
|
import httpx
|
|
6
|
-
from mindsdb.api.a2a.utils import to_serializable
|
|
6
|
+
from mindsdb.api.a2a.utils import to_serializable, convert_a2a_message_to_qa_format
|
|
7
7
|
from mindsdb.api.a2a.constants import DEFAULT_STREAM_TIMEOUT
|
|
8
8
|
|
|
9
9
|
logger = logging.getLogger(__name__)
|
|
@@ -117,22 +117,12 @@ class MindsDBAgent:
|
|
|
117
117
|
"""Stream responses from the MindsDB agent (uses streaming API endpoint)."""
|
|
118
118
|
try:
|
|
119
119
|
logger.info(f"Using streaming API for query: {query[:100]}...")
|
|
120
|
-
|
|
120
|
+
# Create A2A message structure with history and current query
|
|
121
|
+
a2a_message = {"role": "user", "parts": [{"text": query}]}
|
|
121
122
|
if history:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
text = ""
|
|
126
|
-
for part in msg_dict.get("parts", []):
|
|
127
|
-
if part.get("type") == "text":
|
|
128
|
-
text = part.get("text", "")
|
|
129
|
-
break
|
|
130
|
-
if text:
|
|
131
|
-
if role == "user":
|
|
132
|
-
formatted_messages.append({"question": text, "answer": None})
|
|
133
|
-
elif role == "assistant" and formatted_messages:
|
|
134
|
-
formatted_messages[-1]["answer"] = text
|
|
135
|
-
formatted_messages.append({"question": query, "answer": None})
|
|
123
|
+
a2a_message["history"] = history
|
|
124
|
+
# Convert to Q&A format using centralized utility
|
|
125
|
+
formatted_messages = convert_a2a_message_to_qa_format(a2a_message)
|
|
136
126
|
logger.debug(f"Formatted messages for agent: {formatted_messages}")
|
|
137
127
|
streaming_response = self.streaming_invoke(formatted_messages, timeout=timeout)
|
|
138
128
|
async for chunk in streaming_response:
|
mindsdb/api/a2a/common/types.py
CHANGED
|
@@ -35,9 +35,7 @@ class FileContent(BaseModel):
|
|
|
35
35
|
if not (self.bytes or self.uri):
|
|
36
36
|
raise ValueError("Either 'bytes' or 'uri' must be present in the file data")
|
|
37
37
|
if self.bytes and self.uri:
|
|
38
|
-
raise ValueError(
|
|
39
|
-
"Only one of 'bytes' or 'uri' can be present in the file data"
|
|
40
|
-
)
|
|
38
|
+
raise ValueError("Only one of 'bytes' or 'uri' can be present in the file data")
|
|
41
39
|
return self
|
|
42
40
|
|
|
43
41
|
|
|
@@ -57,9 +55,10 @@ Part = Annotated[Union[TextPart, FilePart, DataPart], Field(discriminator="type"
|
|
|
57
55
|
|
|
58
56
|
|
|
59
57
|
class Message(BaseModel):
|
|
60
|
-
role: Literal["user", "agent"]
|
|
58
|
+
role: Literal["user", "agent", "assistant"]
|
|
61
59
|
parts: List[Part]
|
|
62
60
|
metadata: dict[str, Any] | None = None
|
|
61
|
+
history: Optional[List["Message"]] = None
|
|
63
62
|
|
|
64
63
|
|
|
65
64
|
class TaskStatus(BaseModel):
|
mindsdb/api/a2a/task_manager.py
CHANGED
|
@@ -18,7 +18,7 @@ from mindsdb.api.a2a.common.types import (
|
|
|
18
18
|
)
|
|
19
19
|
from mindsdb.api.a2a.common.server.task_manager import InMemoryTaskManager
|
|
20
20
|
from mindsdb.api.a2a.agent import MindsDBAgent
|
|
21
|
-
from mindsdb.api.a2a.utils import to_serializable
|
|
21
|
+
from mindsdb.api.a2a.utils import to_serializable, convert_a2a_message_to_qa_format
|
|
22
22
|
|
|
23
23
|
from typing import Union
|
|
24
24
|
import logging
|
|
@@ -94,22 +94,8 @@ class AgentTaskManager(InMemoryTaskManager):
|
|
|
94
94
|
|
|
95
95
|
agent = self._create_agent(agent_name)
|
|
96
96
|
|
|
97
|
-
# Get the history from the task
|
|
97
|
+
# Get the history from the task object (where it was properly extracted and stored)
|
|
98
98
|
history = task.history if task and task.history else []
|
|
99
|
-
logger.info(f"Using history with length {len(history)} for request")
|
|
100
|
-
|
|
101
|
-
# Log the history for debugging
|
|
102
|
-
logger.info(f"Conversation history for task {task_send_params.id}:")
|
|
103
|
-
for idx, msg in enumerate(history):
|
|
104
|
-
# Convert Message object to dict if needed
|
|
105
|
-
msg_dict = msg.dict() if hasattr(msg, "dict") else msg
|
|
106
|
-
role = msg_dict.get("role", "unknown")
|
|
107
|
-
text = ""
|
|
108
|
-
for part in msg_dict.get("parts", []):
|
|
109
|
-
if part.get("type") == "text":
|
|
110
|
-
text = part.get("text", "")
|
|
111
|
-
break
|
|
112
|
-
logger.info(f"Message {idx + 1} ({role}): {text[:100]}...")
|
|
113
99
|
|
|
114
100
|
if not streaming:
|
|
115
101
|
# If streaming is disabled, use invoke and return a single response
|
|
@@ -183,17 +169,16 @@ class AgentTaskManager(InMemoryTaskManager):
|
|
|
183
169
|
# If streaming is enabled (default), use the streaming implementation
|
|
184
170
|
try:
|
|
185
171
|
logger.debug(f"[TaskManager] Entering agent.stream() at {time.time()}")
|
|
186
|
-
#
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
)
|
|
196
|
-
async for item in agent.streaming_invoke(agent_messages, timeout=60):
|
|
172
|
+
# Create A2A message structure and convert using centralized utility
|
|
173
|
+
a2a_message = task_send_params.message.model_dump()
|
|
174
|
+
if history:
|
|
175
|
+
a2a_message["history"] = [msg.model_dump() if hasattr(msg, "model_dump") else msg for msg in history]
|
|
176
|
+
|
|
177
|
+
# Convert to Q&A format using centralized utility function
|
|
178
|
+
all_messages = convert_a2a_message_to_qa_format(a2a_message)
|
|
179
|
+
|
|
180
|
+
logger.debug(f"Sending {len(all_messages)} total messages to streaming agent")
|
|
181
|
+
async for item in agent.streaming_invoke(all_messages, timeout=60):
|
|
197
182
|
# Clean up: Remove verbose debug logs, keep only errors and essential info
|
|
198
183
|
if isinstance(item, dict) and "artifact" in item and "parts" in item["artifact"]:
|
|
199
184
|
item["artifact"]["parts"] = [to_serializable(p) for p in item["artifact"]["parts"]]
|
|
@@ -235,19 +220,23 @@ class AgentTaskManager(InMemoryTaskManager):
|
|
|
235
220
|
message = task_send_params.message
|
|
236
221
|
message_dict = message.dict() if hasattr(message, "dict") else message
|
|
237
222
|
|
|
238
|
-
# Get history from request if available
|
|
223
|
+
# Get history from request if available - check both locations
|
|
239
224
|
history = []
|
|
225
|
+
|
|
226
|
+
# First check if history is at top level (task_send_params.history)
|
|
240
227
|
if hasattr(task_send_params, "history") and task_send_params.history:
|
|
241
|
-
# Convert each history item to dict if needed
|
|
228
|
+
# Convert each history item to dict if needed
|
|
242
229
|
for item in task_send_params.history:
|
|
243
|
-
item_dict = item.
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
230
|
+
item_dict = item.model_dump() if hasattr(item, "model_dump") else item
|
|
231
|
+
history.append(item_dict)
|
|
232
|
+
# Also check if history is nested under message (message.history)
|
|
233
|
+
elif hasattr(task_send_params.message, "history") and task_send_params.message.history:
|
|
234
|
+
for item in task_send_params.message.history:
|
|
235
|
+
item_dict = item.model_dump() if hasattr(item, "model_dump") else item
|
|
247
236
|
history.append(item_dict)
|
|
248
237
|
|
|
249
|
-
#
|
|
250
|
-
|
|
238
|
+
# DO NOT add current message to history - it should be processed separately
|
|
239
|
+
# The current message will be extracted during streaming from task_send_params.message
|
|
251
240
|
|
|
252
241
|
# Create a new task
|
|
253
242
|
task = Task(
|
mindsdb/api/a2a/utils.py
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
from mindsdb.utilities.log import getLogger
|
|
3
|
+
|
|
4
|
+
logger = getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
|
|
1
7
|
def to_serializable(obj):
|
|
2
8
|
# Primitives
|
|
3
9
|
if isinstance(obj, (str, int, float, bool, type(None))):
|
|
@@ -19,3 +25,60 @@ def to_serializable(obj):
|
|
|
19
25
|
return [to_serializable(v) for v in obj]
|
|
20
26
|
# Fallback: string
|
|
21
27
|
return str(obj)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def convert_a2a_message_to_qa_format(a2a_message: Dict) -> List[Dict[str, str]]:
|
|
31
|
+
"""
|
|
32
|
+
Convert A2A message format to question/answer format.
|
|
33
|
+
|
|
34
|
+
This is the format that the langchain agent expects and ensure effective multi-turn conversation
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
a2a_message: A2A message containing history and current message parts
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
List of messages in question/answer format
|
|
41
|
+
"""
|
|
42
|
+
converted_messages = []
|
|
43
|
+
|
|
44
|
+
# Process conversation history first
|
|
45
|
+
if "history" in a2a_message:
|
|
46
|
+
for hist_msg in a2a_message["history"]:
|
|
47
|
+
if hist_msg.get("role") == "user":
|
|
48
|
+
# Extract text from parts
|
|
49
|
+
text = ""
|
|
50
|
+
for part in hist_msg.get("parts", []):
|
|
51
|
+
if part.get("type") == "text":
|
|
52
|
+
text = part.get("text", "")
|
|
53
|
+
break
|
|
54
|
+
# Create question with empty answer initially
|
|
55
|
+
converted_messages.append({"question": text, "answer": ""})
|
|
56
|
+
elif hist_msg.get("role") in ["agent", "assistant"]:
|
|
57
|
+
# Extract text from parts
|
|
58
|
+
text = ""
|
|
59
|
+
for part in hist_msg.get("parts", []):
|
|
60
|
+
if part.get("type") == "text":
|
|
61
|
+
text = part.get("text", "")
|
|
62
|
+
break
|
|
63
|
+
# Pair with the most recent question that has empty answer
|
|
64
|
+
paired = False
|
|
65
|
+
for i in range(len(converted_messages) - 1, -1, -1):
|
|
66
|
+
if converted_messages[i].get("answer") == "":
|
|
67
|
+
converted_messages[i]["answer"] = text
|
|
68
|
+
paired = True
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
if not paired:
|
|
72
|
+
logger.warning("Could not pair agent response with question (no empty answer found)")
|
|
73
|
+
|
|
74
|
+
logger.debug(f"Converted {len(a2a_message['history'])} A2A history messages to Q&A format")
|
|
75
|
+
|
|
76
|
+
# Add current message as final question with empty answer
|
|
77
|
+
current_text = ""
|
|
78
|
+
for part in a2a_message.get("parts", []):
|
|
79
|
+
if part.get("type") == "text":
|
|
80
|
+
current_text = part.get("text", "")
|
|
81
|
+
break
|
|
82
|
+
converted_messages.append({"question": current_text, "answer": ""})
|
|
83
|
+
|
|
84
|
+
return converted_messages
|
|
@@ -323,15 +323,16 @@ class AgentCompletionsStream(Resource):
|
|
|
323
323
|
@ns_conf.doc("agent_completions_stream")
|
|
324
324
|
@api_endpoint_metrics("POST", "/agents/agent/completions/stream")
|
|
325
325
|
def post(self, project_name, agent_name):
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
# Check for required parameters.
|
|
326
|
+
# Extract messages from request (HTTP format only)
|
|
329
327
|
if "messages" not in request.json:
|
|
330
|
-
logger.error("Missing 'messages' parameter in request body")
|
|
331
328
|
return http_error(
|
|
332
|
-
HTTPStatus.BAD_REQUEST,
|
|
329
|
+
HTTPStatus.BAD_REQUEST,
|
|
330
|
+
"Missing parameter",
|
|
331
|
+
'Must provide "messages" parameter in POST body',
|
|
333
332
|
)
|
|
334
333
|
|
|
334
|
+
messages = request.json["messages"]
|
|
335
|
+
|
|
335
336
|
session = SessionController()
|
|
336
337
|
try:
|
|
337
338
|
existing_agent = session.agents_controller.get_agent(agent_name, project_name=project_name)
|
|
@@ -346,8 +347,6 @@ class AgentCompletionsStream(Resource):
|
|
|
346
347
|
HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
|
|
347
348
|
)
|
|
348
349
|
|
|
349
|
-
messages = request.json["messages"]
|
|
350
|
-
|
|
351
350
|
try:
|
|
352
351
|
gen = _completion_event_generator(agent_name, messages, project_name)
|
|
353
352
|
logger.info(f"Starting streaming response for agent {agent_name}")
|
|
@@ -212,7 +212,14 @@ DEFAULT_TIKTOKEN_MODEL_NAME = os.getenv("DEFAULT_TIKTOKEN_MODEL_NAME", "gpt-4")
|
|
|
212
212
|
AGENT_CHUNK_POLLING_INTERVAL_SECONDS = os.getenv("AGENT_CHUNK_POLLING_INTERVAL_SECONDS", 1.0)
|
|
213
213
|
DEFAULT_TEXT2SQL_DATABASE = "mindsdb"
|
|
214
214
|
DEFAULT_AGENT_SYSTEM_PROMPT = """
|
|
215
|
-
You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
|
|
215
|
+
You are an AI assistant powered by MindsDB. You have access to conversation history and should use it to provide contextual responses. When answering questions, follow these guidelines:
|
|
216
|
+
|
|
217
|
+
**CONVERSATION CONTEXT:**
|
|
218
|
+
- You have access to previous messages in this conversation through your memory system
|
|
219
|
+
- When users ask about previous questions, topics, or context, refer to the conversation history
|
|
220
|
+
- Maintain conversational continuity and reference earlier parts of the conversation when relevant
|
|
221
|
+
- When asked to retrieve or list past user questions, examine your conversation memory to identify and list previous user queries
|
|
222
|
+
- You can reference specific past questions by their content or by their position in the conversation (e.g., "your first question", "the question you asked earlier about...")
|
|
216
223
|
|
|
217
224
|
1. For factual questions about specific topics, use the knowledge base tools in this sequence:
|
|
218
225
|
- First use kb_list_tool to see available knowledge bases
|
|
@@ -230,7 +237,14 @@ For factual questions, ALWAYS use the available tools to look up information rat
|
|
|
230
237
|
|
|
231
238
|
"""
|
|
232
239
|
|
|
233
|
-
MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
|
|
240
|
+
MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. You have access to conversation history and should use it to provide contextual responses. When answering questions, follow these guidelines:
|
|
241
|
+
|
|
242
|
+
**CONVERSATION CONTEXT:**
|
|
243
|
+
- You have access to previous messages in this conversation through your memory system
|
|
244
|
+
- When users ask about previous questions, topics, or context, refer to the conversation history
|
|
245
|
+
- Maintain conversational continuity and reference earlier parts of the conversation when relevant
|
|
246
|
+
- When asked to retrieve or list past user questions, examine your conversation memory to identify and list previous user queries
|
|
247
|
+
- You can reference specific past questions by their content or by their position in the conversation (e.g., "your first question", "the question you asked earlier about...")
|
|
234
248
|
|
|
235
249
|
1. For questions about database tables and their contents:
|
|
236
250
|
- Use the sql_db_query to query the tables directly
|
|
@@ -7,6 +7,7 @@ import re
|
|
|
7
7
|
import threading
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
|
+
import logging
|
|
10
11
|
|
|
11
12
|
from langchain.agents import AgentExecutor
|
|
12
13
|
from langchain.agents.initialize import initialize_agent
|
|
@@ -16,6 +17,7 @@ from langchain_writer import ChatWriter
|
|
|
16
17
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
17
18
|
from langchain_core.agents import AgentAction, AgentStep
|
|
18
19
|
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
20
|
+
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
|
19
21
|
|
|
20
22
|
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
|
21
23
|
from langchain_core.messages.base import BaseMessage
|
|
@@ -63,7 +65,6 @@ from mindsdb.interfaces.agents.constants import (
|
|
|
63
65
|
)
|
|
64
66
|
from mindsdb.interfaces.skills.skill_tool import skill_tool, SkillData
|
|
65
67
|
from langchain_anthropic import ChatAnthropic
|
|
66
|
-
from langchain_core.messages import SystemMessage
|
|
67
68
|
from langchain_openai import ChatOpenAI
|
|
68
69
|
|
|
69
70
|
from mindsdb.utilities.langfuse import LangfuseClientWrapper
|
|
@@ -297,6 +298,11 @@ class LangchainAgent:
|
|
|
297
298
|
if "prompt_template" in args:
|
|
298
299
|
logger.info(f"Using prompt template: {args['prompt_template'][:50]}...")
|
|
299
300
|
|
|
301
|
+
if "model_name" not in args:
|
|
302
|
+
raise ValueError(
|
|
303
|
+
"No model name provided for agent. Provide it in the model parameter or in the default model setup."
|
|
304
|
+
)
|
|
305
|
+
|
|
300
306
|
return args
|
|
301
307
|
|
|
302
308
|
def get_metadata(self) -> Dict:
|
|
@@ -345,15 +351,20 @@ class LangchainAgent:
|
|
|
345
351
|
args.update(params or {})
|
|
346
352
|
|
|
347
353
|
df = pd.DataFrame(messages)
|
|
354
|
+
logger.info(f"LangchainAgent.get_completion: Received {len(messages)} messages")
|
|
355
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
356
|
+
logger.debug(f"Messages DataFrame shape: {df.shape}")
|
|
357
|
+
logger.debug(f"Messages DataFrame columns: {df.columns.tolist()}")
|
|
358
|
+
logger.debug(f"Messages DataFrame content: {df.to_dict('records')}")
|
|
348
359
|
|
|
349
360
|
# Back compatibility for old models
|
|
350
361
|
self.provider = args.get("provider", get_llm_provider(args))
|
|
351
362
|
|
|
352
363
|
df = df.reset_index(drop=True)
|
|
353
364
|
agent = self.create_agent(df)
|
|
354
|
-
#
|
|
355
|
-
|
|
356
|
-
|
|
365
|
+
# Keep conversation history for context - don't nullify previous messages
|
|
366
|
+
|
|
367
|
+
# Only use the last message as the current prompt, but preserve history for agent memory
|
|
357
368
|
response = self.run_agent(df, agent, args)
|
|
358
369
|
|
|
359
370
|
# End the run completion span and update the metadata with tool usage
|
|
@@ -374,6 +385,12 @@ class LangchainAgent:
|
|
|
374
385
|
args = self.args
|
|
375
386
|
|
|
376
387
|
df = pd.DataFrame(messages)
|
|
388
|
+
logger.info(f"LangchainAgent._get_completion_stream: Received {len(messages)} messages")
|
|
389
|
+
# Check if we have the expected columns for conversation history
|
|
390
|
+
if "question" in df.columns and "answer" in df.columns:
|
|
391
|
+
logger.debug("DataFrame has question/answer columns for conversation history")
|
|
392
|
+
else:
|
|
393
|
+
logger.warning("DataFrame missing question/answer columns! Available columns: {df.columns.tolist()}")
|
|
377
394
|
|
|
378
395
|
self.embedding_model_provider = args.get("embedding_model_provider", get_embedding_model_provider(args))
|
|
379
396
|
# Back compatibility for old models
|
|
@@ -381,9 +398,8 @@ class LangchainAgent:
|
|
|
381
398
|
|
|
382
399
|
df = df.reset_index(drop=True)
|
|
383
400
|
agent = self.create_agent(df)
|
|
384
|
-
#
|
|
385
|
-
|
|
386
|
-
df.iloc[:-1, df.columns.get_loc(user_column)] = None
|
|
401
|
+
# Keep conversation history for context - don't nullify previous messages
|
|
402
|
+
# Only use the last message as the current prompt, but preserve history for agent memory
|
|
387
403
|
return self.stream_agent(df, agent, args)
|
|
388
404
|
|
|
389
405
|
def create_agent(self, df: pd.DataFrame) -> AgentExecutor:
|
|
@@ -403,7 +419,8 @@ class LangchainAgent:
|
|
|
403
419
|
# Prefer prediction prompt template over original if provided.
|
|
404
420
|
prompt_template = args["prompt_template"]
|
|
405
421
|
|
|
406
|
-
#
|
|
422
|
+
# Modern LangChain approach: Use memory but populate it correctly
|
|
423
|
+
# Create memory and populate with conversation history
|
|
407
424
|
memory = ConversationSummaryBufferMemory(
|
|
408
425
|
llm=llm,
|
|
409
426
|
input_key="input",
|
|
@@ -412,17 +429,41 @@ class LangchainAgent:
|
|
|
412
429
|
memory_key="chat_history",
|
|
413
430
|
)
|
|
414
431
|
|
|
432
|
+
# Add system message first
|
|
415
433
|
memory.chat_memory.messages.insert(0, SystemMessage(content=prompt_template))
|
|
416
|
-
|
|
434
|
+
|
|
417
435
|
user_column = args.get("user_column", USER_COLUMN)
|
|
418
436
|
assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
437
|
+
|
|
438
|
+
logger.info(f"Processing conversation history: {len(df)} total messages, {len(df[:-1])} history messages")
|
|
439
|
+
logger.debug(f"User column: {user_column}, Assistant column: {assistant_column}")
|
|
440
|
+
|
|
441
|
+
# Process history messages (all except the last one which is current message)
|
|
442
|
+
history_df = df[:-1]
|
|
443
|
+
if len(history_df) == 0:
|
|
444
|
+
logger.debug("No history rows to process - this is normal for first message")
|
|
445
|
+
|
|
446
|
+
history_count = 0
|
|
447
|
+
for i, row in enumerate(history_df.to_dict("records")):
|
|
448
|
+
question = row.get(user_column)
|
|
449
|
+
answer = row.get(assistant_column)
|
|
450
|
+
logger.debug(f"Converting history row {i}: question='{question}', answer='{answer}'")
|
|
451
|
+
|
|
452
|
+
# Add messages directly to memory's chat_memory.messages list (modern approach)
|
|
422
453
|
if isinstance(question, str) and len(question) > 0:
|
|
423
|
-
memory.chat_memory.
|
|
454
|
+
memory.chat_memory.messages.append(HumanMessage(content=question))
|
|
455
|
+
history_count += 1
|
|
456
|
+
logger.debug(f"Added HumanMessage to memory: {question}")
|
|
424
457
|
if isinstance(answer, str) and len(answer) > 0:
|
|
425
|
-
memory.chat_memory.
|
|
458
|
+
memory.chat_memory.messages.append(AIMessage(content=answer))
|
|
459
|
+
history_count += 1
|
|
460
|
+
logger.debug(f"Added AIMessage to memory: {answer}")
|
|
461
|
+
|
|
462
|
+
logger.info(f"Built conversation history with {history_count} history messages + system message")
|
|
463
|
+
logger.debug(f"Final memory messages count: {len(memory.chat_memory.messages)}")
|
|
464
|
+
|
|
465
|
+
# Store memory for agent use
|
|
466
|
+
self._conversation_memory = memory
|
|
426
467
|
|
|
427
468
|
agent_type = args.get("agent_type", DEFAULT_AGENT_TYPE)
|
|
428
469
|
agent_executor = initialize_agent(
|
|
@@ -562,7 +603,22 @@ AI: {response}"""
|
|
|
562
603
|
return {CONTEXT_COLUMN: [], ASSISTANT_COLUMN: ""}
|
|
563
604
|
try:
|
|
564
605
|
callbacks, context_callback = prepare_callbacks(self, args)
|
|
565
|
-
|
|
606
|
+
|
|
607
|
+
# Modern LangChain approach: Include conversation history + current message
|
|
608
|
+
if hasattr(self, "_conversation_messages") and self._conversation_messages:
|
|
609
|
+
# Add current user message to conversation history
|
|
610
|
+
full_messages = self._conversation_messages + [HumanMessage(content=prompt)]
|
|
611
|
+
logger.critical(f"🔍 INVOKING AGENT with {len(full_messages)} messages (including history)")
|
|
612
|
+
logger.debug(
|
|
613
|
+
f"Full conversation messages: {[type(msg).__name__ + ': ' + msg.content[:100] + '...' for msg in full_messages]}"
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
# For agents, we need to pass the input in the expected format
|
|
617
|
+
# The agent expects 'input' key with the current question, but conversation history should be in memory
|
|
618
|
+
result = agent_executor.invoke({"input": prompt}, config={"callbacks": callbacks})
|
|
619
|
+
else:
|
|
620
|
+
logger.warning("No conversation messages found - using simple prompt")
|
|
621
|
+
result = agent_executor.invoke({"input": prompt}, config={"callbacks": callbacks})
|
|
566
622
|
captured_context = context_callback.get_contexts()
|
|
567
623
|
output = result["output"] if isinstance(result, dict) and "output" in result else str(result)
|
|
568
624
|
return {CONTEXT_COLUMN: captured_context, ASSISTANT_COLUMN: output}
|
|
@@ -585,7 +641,14 @@ AI: {response}"""
|
|
|
585
641
|
agent_timeout_seconds = args.get("timeout", DEFAULT_AGENT_TIMEOUT_SECONDS)
|
|
586
642
|
|
|
587
643
|
with ContextThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
588
|
-
|
|
644
|
+
# Only process the last prompt (current question), not all prompts
|
|
645
|
+
# The previous prompts are conversation history and should only be used for context
|
|
646
|
+
if prompts:
|
|
647
|
+
current_prompt = prompts[-1] # Last prompt is the current question
|
|
648
|
+
futures = [executor.submit(_invoke_agent_executor_with_prompt, agent, current_prompt)]
|
|
649
|
+
else:
|
|
650
|
+
logger.error("No prompts found to process")
|
|
651
|
+
futures = []
|
|
589
652
|
try:
|
|
590
653
|
for future in as_completed(futures, timeout=agent_timeout_seconds):
|
|
591
654
|
result = future.result()
|
|
@@ -686,12 +749,14 @@ AI: {response}"""
|
|
|
686
749
|
|
|
687
750
|
callbacks, context_callback = prepare_callbacks(self, args)
|
|
688
751
|
|
|
689
|
-
|
|
752
|
+
# Use last prompt (current question) instead of first prompt (history)
|
|
753
|
+
current_prompt = prompts[-1] if prompts else ""
|
|
754
|
+
yield self.add_chunk_metadata({"type": "start", "prompt": current_prompt})
|
|
690
755
|
|
|
691
756
|
if not hasattr(agent_executor, "stream") or not callable(agent_executor.stream):
|
|
692
757
|
raise AttributeError("The agent_executor does not have a 'stream' method")
|
|
693
758
|
|
|
694
|
-
stream_iterator = self._stream_agent_executor(agent_executor,
|
|
759
|
+
stream_iterator = self._stream_agent_executor(agent_executor, current_prompt, callbacks)
|
|
695
760
|
for chunk in stream_iterator:
|
|
696
761
|
yield chunk
|
|
697
762
|
|
|
@@ -15,6 +15,7 @@ from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import (
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
18
|
+
include_tables_tools: bool = True
|
|
18
19
|
include_knowledge_base_tools: bool = True
|
|
19
20
|
|
|
20
21
|
def get_tools(self, prefix="") -> List[BaseTool]:
|
|
@@ -212,8 +213,13 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
212
213
|
)
|
|
213
214
|
|
|
214
215
|
# Return standard SQL tools and knowledge base tools
|
|
215
|
-
|
|
216
|
+
kb_tools = [
|
|
216
217
|
kb_list_tool,
|
|
217
218
|
kb_info_tool,
|
|
218
219
|
kb_query_tool,
|
|
219
220
|
]
|
|
221
|
+
|
|
222
|
+
if not self.include_tables_tools:
|
|
223
|
+
return kb_tools
|
|
224
|
+
else:
|
|
225
|
+
return sql_tools + kb_tools
|
|
@@ -347,7 +347,13 @@ class SkillToolController:
|
|
|
347
347
|
)
|
|
348
348
|
db = MindsDBSQL.custom_init(sql_agent=sql_agent)
|
|
349
349
|
should_include_kb_tools = include_knowledge_bases is not None and len(include_knowledge_bases) > 0
|
|
350
|
-
|
|
350
|
+
should_include_tables_tools = len(databases_struct) > 0 or len(tables_list) > 0
|
|
351
|
+
toolkit = MindsDBSQLToolkit(
|
|
352
|
+
db=db,
|
|
353
|
+
llm=llm,
|
|
354
|
+
include_tables_tools=should_include_tables_tools,
|
|
355
|
+
include_knowledge_base_tools=should_include_kb_tools,
|
|
356
|
+
)
|
|
351
357
|
return toolkit.get_tools()
|
|
352
358
|
|
|
353
359
|
def _make_retrieval_tools(self, skill: db.Skills, llm, embedding_model):
|
|
@@ -405,6 +405,7 @@ class SQLAgent:
|
|
|
405
405
|
tables_idx[tuple(table.parts)] = table
|
|
406
406
|
|
|
407
407
|
tables = []
|
|
408
|
+
not_found = []
|
|
408
409
|
for table_name in table_names:
|
|
409
410
|
if not table_name.strip():
|
|
410
411
|
continue
|
|
@@ -419,9 +420,12 @@ class SQLAgent:
|
|
|
419
420
|
table_identifier = tables_idx.get(tuple(table_parts))
|
|
420
421
|
|
|
421
422
|
if table_identifier is None:
|
|
422
|
-
|
|
423
|
-
|
|
423
|
+
not_found.append(table_name)
|
|
424
|
+
else:
|
|
425
|
+
tables.append(table_identifier)
|
|
424
426
|
|
|
427
|
+
if not_found:
|
|
428
|
+
raise ValueError(f"Tables: {', '.join(not_found)} not found in the database")
|
|
425
429
|
return tables
|
|
426
430
|
|
|
427
431
|
def get_knowledge_base_info(self, kb_names: Optional[List[str]] = None) -> str:
|
mindsdb/utilities/fs.py
CHANGED
|
@@ -133,6 +133,9 @@ def create_pid_file():
|
|
|
133
133
|
Create mindsdb process pid file. Check if previous process exists and is running
|
|
134
134
|
"""
|
|
135
135
|
|
|
136
|
+
if os.environ.get("USE_PIDFILE") != "1":
|
|
137
|
+
return
|
|
138
|
+
|
|
136
139
|
p = get_tmp_dir()
|
|
137
140
|
p.mkdir(parents=True, exist_ok=True)
|
|
138
141
|
pid_file = p.joinpath("pid")
|
|
@@ -141,11 +144,11 @@ def create_pid_file():
|
|
|
141
144
|
pid = pid_file.read_text().strip()
|
|
142
145
|
try:
|
|
143
146
|
psutil.Process(int(pid))
|
|
144
|
-
raise Exception(f"Found PID file with existing process: {pid}")
|
|
147
|
+
raise Exception(f"Found PID file with existing process: {pid} {pid_file}")
|
|
145
148
|
except (psutil.Error, ValueError):
|
|
146
149
|
...
|
|
147
150
|
|
|
148
|
-
logger.warning(f"Found existing PID file ({pid}), removing")
|
|
151
|
+
logger.warning(f"Found existing PID file {pid_file}({pid}), removing")
|
|
149
152
|
pid_file.unlink()
|
|
150
153
|
|
|
151
154
|
pid_file.write_text(str(os.getpid()))
|
|
@@ -155,15 +158,18 @@ def delete_pid_file():
|
|
|
155
158
|
"""
|
|
156
159
|
Remove existing process pid file if it matches current process
|
|
157
160
|
"""
|
|
161
|
+
|
|
162
|
+
if os.environ.get("USE_PIDFILE") != "1":
|
|
163
|
+
return
|
|
164
|
+
|
|
158
165
|
pid_file = get_tmp_dir().joinpath("pid")
|
|
159
166
|
|
|
160
167
|
if not pid_file.exists():
|
|
161
|
-
logger.warning("Mindsdb PID file does not exist")
|
|
162
168
|
return
|
|
163
169
|
|
|
164
170
|
pid = pid_file.read_text().strip()
|
|
165
171
|
if pid != str(os.getpid()):
|
|
166
|
-
logger.warning("Process id in PID file doesn't match mindsdb pid")
|
|
172
|
+
logger.warning(f"Process id in PID file ({pid_file}) doesn't match mindsdb pid")
|
|
167
173
|
return
|
|
168
174
|
|
|
169
175
|
pid_file.unlink()
|