MindsDB 25.7.4.0__py3-none-any.whl → 25.8.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +13 -1
- mindsdb/api/a2a/agent.py +6 -16
- mindsdb/api/a2a/common/types.py +3 -4
- mindsdb/api/a2a/task_manager.py +24 -35
- mindsdb/api/a2a/utils.py +63 -0
- mindsdb/api/executor/command_executor.py +9 -15
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
- mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
- mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
- mindsdb/api/executor/utilities/sql.py +30 -0
- mindsdb/api/http/initialize.py +2 -1
- mindsdb/api/http/namespaces/agents.py +6 -7
- mindsdb/api/http/namespaces/views.py +56 -72
- mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
- mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
- mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
- mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
- mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
- mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
- mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
- mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +1 -2
- mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
- mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
- mindsdb/integrations/handlers/salesforce_handler/constants.py +9 -2
- mindsdb/integrations/libs/llm/config.py +0 -14
- mindsdb/integrations/libs/llm/utils.py +0 -15
- mindsdb/integrations/utilities/files/file_reader.py +5 -19
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +83 -45
- mindsdb/interfaces/agents/constants.py +16 -3
- mindsdb/interfaces/agents/langchain_agent.py +84 -21
- mindsdb/interfaces/database/projects.py +111 -7
- mindsdb/interfaces/knowledge_base/controller.py +7 -1
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
- mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
- mindsdb/interfaces/query_context/context_controller.py +14 -15
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +7 -1
- mindsdb/interfaces/skills/skill_tool.py +7 -1
- mindsdb/interfaces/skills/sql_agent.py +6 -2
- mindsdb/utilities/config.py +2 -0
- mindsdb/utilities/fs.py +60 -17
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/METADATA +277 -262
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/RECORD +57 -56
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
- mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
- mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
- mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
- mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
- /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/top_level.txt +0 -0
|
@@ -145,11 +145,60 @@ class AgentsController:
|
|
|
145
145
|
|
|
146
146
|
return all_agents.all()
|
|
147
147
|
|
|
148
|
+
def _create_default_sql_skill(
|
|
149
|
+
self,
|
|
150
|
+
name,
|
|
151
|
+
project_name,
|
|
152
|
+
include_tables: List[str] = None,
|
|
153
|
+
include_knowledge_bases: List[str] = None,
|
|
154
|
+
):
|
|
155
|
+
# Create a default SQL skill
|
|
156
|
+
skill_name = f"{name}_sql_skill"
|
|
157
|
+
skill_params = {
|
|
158
|
+
"type": "sql",
|
|
159
|
+
"description": f"Auto-generated SQL skill for agent {name}",
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# Add restrictions provided
|
|
163
|
+
if include_tables:
|
|
164
|
+
skill_params["include_tables"] = include_tables
|
|
165
|
+
if include_knowledge_bases:
|
|
166
|
+
skill_params["include_knowledge_bases"] = include_knowledge_bases
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
# Check if skill already exists
|
|
170
|
+
existing_skill = self.skills_controller.get_skill(skill_name, project_name)
|
|
171
|
+
if existing_skill is None:
|
|
172
|
+
# Create the skill
|
|
173
|
+
skill_type = skill_params.pop("type")
|
|
174
|
+
self.skills_controller.add_skill(
|
|
175
|
+
name=skill_name, project_name=project_name, type=skill_type, params=skill_params
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
# Update the skill if parameters have changed
|
|
179
|
+
params_changed = False
|
|
180
|
+
|
|
181
|
+
# Check if skill parameters need to be updated
|
|
182
|
+
for param_key, param_value in skill_params.items():
|
|
183
|
+
if existing_skill.params.get(param_key) != param_value:
|
|
184
|
+
existing_skill.params[param_key] = param_value
|
|
185
|
+
params_changed = True
|
|
186
|
+
|
|
187
|
+
# Update the skill if needed
|
|
188
|
+
if params_changed:
|
|
189
|
+
flag_modified(existing_skill, "params")
|
|
190
|
+
db.session.commit()
|
|
191
|
+
|
|
192
|
+
except Exception as e:
|
|
193
|
+
raise ValueError(f"Failed to auto-create or update SQL skill: {str(e)}")
|
|
194
|
+
|
|
195
|
+
return skill_name
|
|
196
|
+
|
|
148
197
|
def add_agent(
|
|
149
198
|
self,
|
|
150
199
|
name: str,
|
|
151
200
|
project_name: str = None,
|
|
152
|
-
model_name: str = None,
|
|
201
|
+
model_name: Union[str, dict] = None,
|
|
153
202
|
skills: List[Union[str, dict]] = None,
|
|
154
203
|
provider: str = None,
|
|
155
204
|
params: Dict[str, Any] = None,
|
|
@@ -256,46 +305,13 @@ class AgentsController:
|
|
|
256
305
|
|
|
257
306
|
# Auto-create SQL skill if no skills are provided but include_tables or include_knowledge_bases params are provided
|
|
258
307
|
if not skills and (include_tables or include_knowledge_bases):
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
# Add restrictions provided
|
|
267
|
-
if include_tables:
|
|
268
|
-
skill_params["include_tables"] = include_tables
|
|
269
|
-
if include_knowledge_bases:
|
|
270
|
-
skill_params["include_knowledge_bases"] = include_knowledge_bases
|
|
271
|
-
|
|
272
|
-
try:
|
|
273
|
-
# Check if skill already exists
|
|
274
|
-
existing_skill = self.skills_controller.get_skill(skill_name, project_name)
|
|
275
|
-
if existing_skill is None:
|
|
276
|
-
# Create the skill
|
|
277
|
-
skill_type = skill_params.pop("type")
|
|
278
|
-
self.skills_controller.add_skill(
|
|
279
|
-
name=skill_name, project_name=project_name, type=skill_type, params=skill_params
|
|
280
|
-
)
|
|
281
|
-
else:
|
|
282
|
-
# Update the skill if parameters have changed
|
|
283
|
-
params_changed = False
|
|
284
|
-
|
|
285
|
-
# Check if skill parameters need to be updated
|
|
286
|
-
for param_key, param_value in skill_params.items():
|
|
287
|
-
if existing_skill.params.get(param_key) != param_value:
|
|
288
|
-
existing_skill.params[param_key] = param_value
|
|
289
|
-
params_changed = True
|
|
290
|
-
|
|
291
|
-
# Update the skill if needed
|
|
292
|
-
if params_changed:
|
|
293
|
-
flag_modified(existing_skill, "params")
|
|
294
|
-
db.session.commit()
|
|
295
|
-
|
|
296
|
-
skills = [skill_name]
|
|
297
|
-
except Exception as e:
|
|
298
|
-
raise ValueError(f"Failed to auto-create or update SQL skill: {str(e)}")
|
|
308
|
+
skill = self._create_default_sql_skill(
|
|
309
|
+
name,
|
|
310
|
+
project_name,
|
|
311
|
+
include_tables=include_tables,
|
|
312
|
+
include_knowledge_bases=include_knowledge_bases,
|
|
313
|
+
)
|
|
314
|
+
skills = [skill]
|
|
299
315
|
|
|
300
316
|
agent = db.Agents(
|
|
301
317
|
name=name,
|
|
@@ -351,7 +367,7 @@ class AgentsController:
|
|
|
351
367
|
agent_name: str,
|
|
352
368
|
project_name: str = default_project,
|
|
353
369
|
name: str = None,
|
|
354
|
-
model_name: str = None,
|
|
370
|
+
model_name: Union[str, dict] = None,
|
|
355
371
|
skills_to_add: List[Union[str, dict]] = None,
|
|
356
372
|
skills_to_remove: List[str] = None,
|
|
357
373
|
skills_to_rewrite: List[Union[str, dict]] = None,
|
|
@@ -365,7 +381,7 @@ class AgentsController:
|
|
|
365
381
|
agent_name (str): The name of the new agent, or existing agent to update
|
|
366
382
|
project_name (str): The containing project
|
|
367
383
|
name (str): The updated name of the agent
|
|
368
|
-
model_name (str): The name of the existing ML model the agent will use
|
|
384
|
+
model_name (str | dict): The name of the existing ML model the agent will use
|
|
369
385
|
skills_to_add (List[Union[str, dict]]): List of skill names to add to the agent, or list of dicts
|
|
370
386
|
with one of keys is "name", and other is additional parameters for relationship agent<>skill
|
|
371
387
|
skills_to_remove (List[str]): List of skill names to remove from the agent
|
|
@@ -394,6 +410,8 @@ class AgentsController:
|
|
|
394
410
|
existing_agent = self.get_agent(agent_name, project_name=project_name)
|
|
395
411
|
if existing_agent is None:
|
|
396
412
|
raise EntityNotExistsError(f"Agent with name not found: {agent_name}")
|
|
413
|
+
existing_params = existing_agent.params or {}
|
|
414
|
+
|
|
397
415
|
is_demo = (existing_agent.params or {}).get("is_demo", False)
|
|
398
416
|
if is_demo and (
|
|
399
417
|
(name is not None and name != agent_name)
|
|
@@ -413,12 +431,34 @@ class AgentsController:
|
|
|
413
431
|
existing_agent.name = name
|
|
414
432
|
|
|
415
433
|
if model_name or provider:
|
|
434
|
+
if isinstance(model_name, dict):
|
|
435
|
+
# move into params
|
|
436
|
+
existing_params["model"] = model_name
|
|
437
|
+
model_name = None
|
|
438
|
+
|
|
416
439
|
# check model and provider
|
|
417
440
|
model, provider = self.check_model_provider(model_name, provider)
|
|
418
441
|
# Update model and provider
|
|
419
442
|
existing_agent.model_name = model_name
|
|
420
443
|
existing_agent.provider = provider
|
|
421
444
|
|
|
445
|
+
if "data" in params:
|
|
446
|
+
if len(skills_to_add) > 0 or len(skills_to_remove) > 0:
|
|
447
|
+
raise ValueError(
|
|
448
|
+
"'data' parameter cannot be used with 'skills_to_remove' or 'skills_to_add' parameters"
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
include_knowledge_bases = params["data"].get("knowledge_bases")
|
|
452
|
+
include_tables = params["data"].get("tables")
|
|
453
|
+
|
|
454
|
+
skill = self._create_default_sql_skill(
|
|
455
|
+
agent_name,
|
|
456
|
+
project_name,
|
|
457
|
+
include_tables=include_tables,
|
|
458
|
+
include_knowledge_bases=include_knowledge_bases,
|
|
459
|
+
)
|
|
460
|
+
skills_to_rewrite = [{"name": skill}]
|
|
461
|
+
|
|
422
462
|
# check that all skills exist
|
|
423
463
|
skill_name_to_record_map = {}
|
|
424
464
|
for skill_meta in skills_to_add + skills_to_remove + skills_to_rewrite:
|
|
@@ -496,8 +536,6 @@ class AgentsController:
|
|
|
496
536
|
db.session.add(association)
|
|
497
537
|
|
|
498
538
|
if params is not None:
|
|
499
|
-
existing_params = existing_agent.params or {}
|
|
500
|
-
|
|
501
539
|
if params.get("data", {}).get("tables"):
|
|
502
540
|
new_table_entries = set(params["data"]["tables"]) - set(
|
|
503
541
|
existing_params.get("data", {}).get("tables", [])
|
|
@@ -26,7 +26,6 @@ OPEN_AI_CHAT_MODELS = (
|
|
|
26
26
|
SUPPORTED_PROVIDERS = {
|
|
27
27
|
"openai",
|
|
28
28
|
"anthropic",
|
|
29
|
-
"anyscale",
|
|
30
29
|
"litellm",
|
|
31
30
|
"ollama",
|
|
32
31
|
"nvidia_nim",
|
|
@@ -213,7 +212,14 @@ DEFAULT_TIKTOKEN_MODEL_NAME = os.getenv("DEFAULT_TIKTOKEN_MODEL_NAME", "gpt-4")
|
|
|
213
212
|
AGENT_CHUNK_POLLING_INTERVAL_SECONDS = os.getenv("AGENT_CHUNK_POLLING_INTERVAL_SECONDS", 1.0)
|
|
214
213
|
DEFAULT_TEXT2SQL_DATABASE = "mindsdb"
|
|
215
214
|
DEFAULT_AGENT_SYSTEM_PROMPT = """
|
|
216
|
-
You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
|
|
215
|
+
You are an AI assistant powered by MindsDB. You have access to conversation history and should use it to provide contextual responses. When answering questions, follow these guidelines:
|
|
216
|
+
|
|
217
|
+
**CONVERSATION CONTEXT:**
|
|
218
|
+
- You have access to previous messages in this conversation through your memory system
|
|
219
|
+
- When users ask about previous questions, topics, or context, refer to the conversation history
|
|
220
|
+
- Maintain conversational continuity and reference earlier parts of the conversation when relevant
|
|
221
|
+
- When asked to retrieve or list past user questions, examine your conversation memory to identify and list previous user queries
|
|
222
|
+
- You can reference specific past questions by their content or by their position in the conversation (e.g., "your first question", "the question you asked earlier about...")
|
|
217
223
|
|
|
218
224
|
1. For factual questions about specific topics, use the knowledge base tools in this sequence:
|
|
219
225
|
- First use kb_list_tool to see available knowledge bases
|
|
@@ -231,7 +237,14 @@ For factual questions, ALWAYS use the available tools to look up information rat
|
|
|
231
237
|
|
|
232
238
|
"""
|
|
233
239
|
|
|
234
|
-
MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
|
|
240
|
+
MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. You have access to conversation history and should use it to provide contextual responses. When answering questions, follow these guidelines:
|
|
241
|
+
|
|
242
|
+
**CONVERSATION CONTEXT:**
|
|
243
|
+
- You have access to previous messages in this conversation through your memory system
|
|
244
|
+
- When users ask about previous questions, topics, or context, refer to the conversation history
|
|
245
|
+
- Maintain conversational continuity and reference earlier parts of the conversation when relevant
|
|
246
|
+
- When asked to retrieve or list past user questions, examine your conversation memory to identify and list previous user queries
|
|
247
|
+
- You can reference specific past questions by their content or by their position in the conversation (e.g., "your first question", "the question you asked earlier about...")
|
|
235
248
|
|
|
236
249
|
1. For questions about database tables and their contents:
|
|
237
250
|
- Use the sql_db_query to query the tables directly
|
|
@@ -7,15 +7,17 @@ import re
|
|
|
7
7
|
import threading
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
|
+
import logging
|
|
10
11
|
|
|
11
12
|
from langchain.agents import AgentExecutor
|
|
12
13
|
from langchain.agents.initialize import initialize_agent
|
|
13
14
|
from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
|
|
14
|
-
from langchain_community.chat_models import
|
|
15
|
+
from langchain_community.chat_models import ChatLiteLLM, ChatOllama
|
|
15
16
|
from langchain_writer import ChatWriter
|
|
16
17
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
17
18
|
from langchain_core.agents import AgentAction, AgentStep
|
|
18
19
|
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
20
|
+
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
|
19
21
|
|
|
20
22
|
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
|
21
23
|
from langchain_core.messages.base import BaseMessage
|
|
@@ -63,7 +65,6 @@ from mindsdb.interfaces.agents.constants import (
|
|
|
63
65
|
)
|
|
64
66
|
from mindsdb.interfaces.skills.skill_tool import skill_tool, SkillData
|
|
65
67
|
from langchain_anthropic import ChatAnthropic
|
|
66
|
-
from langchain_core.messages import SystemMessage
|
|
67
68
|
from langchain_openai import ChatOpenAI
|
|
68
69
|
|
|
69
70
|
from mindsdb.utilities.langfuse import LangfuseClientWrapper
|
|
@@ -165,8 +166,6 @@ def create_chat_model(args: Dict):
|
|
|
165
166
|
except NotImplementedError:
|
|
166
167
|
chat_open_ai.tiktoken_model_name = DEFAULT_TIKTOKEN_MODEL_NAME
|
|
167
168
|
return chat_open_ai
|
|
168
|
-
if args["provider"] == "anyscale":
|
|
169
|
-
return ChatAnyscale(**model_kwargs)
|
|
170
169
|
if args["provider"] == "litellm":
|
|
171
170
|
return ChatLiteLLM(**model_kwargs)
|
|
172
171
|
if args["provider"] == "ollama":
|
|
@@ -299,6 +298,11 @@ class LangchainAgent:
|
|
|
299
298
|
if "prompt_template" in args:
|
|
300
299
|
logger.info(f"Using prompt template: {args['prompt_template'][:50]}...")
|
|
301
300
|
|
|
301
|
+
if "model_name" not in args:
|
|
302
|
+
raise ValueError(
|
|
303
|
+
"No model name provided for agent. Provide it in the model parameter or in the default model setup."
|
|
304
|
+
)
|
|
305
|
+
|
|
302
306
|
return args
|
|
303
307
|
|
|
304
308
|
def get_metadata(self) -> Dict:
|
|
@@ -347,15 +351,20 @@ class LangchainAgent:
|
|
|
347
351
|
args.update(params or {})
|
|
348
352
|
|
|
349
353
|
df = pd.DataFrame(messages)
|
|
354
|
+
logger.info(f"LangchainAgent.get_completion: Received {len(messages)} messages")
|
|
355
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
356
|
+
logger.debug(f"Messages DataFrame shape: {df.shape}")
|
|
357
|
+
logger.debug(f"Messages DataFrame columns: {df.columns.tolist()}")
|
|
358
|
+
logger.debug(f"Messages DataFrame content: {df.to_dict('records')}")
|
|
350
359
|
|
|
351
360
|
# Back compatibility for old models
|
|
352
361
|
self.provider = args.get("provider", get_llm_provider(args))
|
|
353
362
|
|
|
354
363
|
df = df.reset_index(drop=True)
|
|
355
364
|
agent = self.create_agent(df)
|
|
356
|
-
#
|
|
357
|
-
|
|
358
|
-
|
|
365
|
+
# Keep conversation history for context - don't nullify previous messages
|
|
366
|
+
|
|
367
|
+
# Only use the last message as the current prompt, but preserve history for agent memory
|
|
359
368
|
response = self.run_agent(df, agent, args)
|
|
360
369
|
|
|
361
370
|
# End the run completion span and update the metadata with tool usage
|
|
@@ -376,6 +385,12 @@ class LangchainAgent:
|
|
|
376
385
|
args = self.args
|
|
377
386
|
|
|
378
387
|
df = pd.DataFrame(messages)
|
|
388
|
+
logger.info(f"LangchainAgent._get_completion_stream: Received {len(messages)} messages")
|
|
389
|
+
# Check if we have the expected columns for conversation history
|
|
390
|
+
if "question" in df.columns and "answer" in df.columns:
|
|
391
|
+
logger.debug("DataFrame has question/answer columns for conversation history")
|
|
392
|
+
else:
|
|
393
|
+
logger.warning("DataFrame missing question/answer columns! Available columns: {df.columns.tolist()}")
|
|
379
394
|
|
|
380
395
|
self.embedding_model_provider = args.get("embedding_model_provider", get_embedding_model_provider(args))
|
|
381
396
|
# Back compatibility for old models
|
|
@@ -383,9 +398,8 @@ class LangchainAgent:
|
|
|
383
398
|
|
|
384
399
|
df = df.reset_index(drop=True)
|
|
385
400
|
agent = self.create_agent(df)
|
|
386
|
-
#
|
|
387
|
-
|
|
388
|
-
df.iloc[:-1, df.columns.get_loc(user_column)] = None
|
|
401
|
+
# Keep conversation history for context - don't nullify previous messages
|
|
402
|
+
# Only use the last message as the current prompt, but preserve history for agent memory
|
|
389
403
|
return self.stream_agent(df, agent, args)
|
|
390
404
|
|
|
391
405
|
def create_agent(self, df: pd.DataFrame) -> AgentExecutor:
|
|
@@ -405,7 +419,8 @@ class LangchainAgent:
|
|
|
405
419
|
# Prefer prediction prompt template over original if provided.
|
|
406
420
|
prompt_template = args["prompt_template"]
|
|
407
421
|
|
|
408
|
-
#
|
|
422
|
+
# Modern LangChain approach: Use memory but populate it correctly
|
|
423
|
+
# Create memory and populate with conversation history
|
|
409
424
|
memory = ConversationSummaryBufferMemory(
|
|
410
425
|
llm=llm,
|
|
411
426
|
input_key="input",
|
|
@@ -414,17 +429,41 @@ class LangchainAgent:
|
|
|
414
429
|
memory_key="chat_history",
|
|
415
430
|
)
|
|
416
431
|
|
|
432
|
+
# Add system message first
|
|
417
433
|
memory.chat_memory.messages.insert(0, SystemMessage(content=prompt_template))
|
|
418
|
-
|
|
434
|
+
|
|
419
435
|
user_column = args.get("user_column", USER_COLUMN)
|
|
420
436
|
assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
437
|
+
|
|
438
|
+
logger.info(f"Processing conversation history: {len(df)} total messages, {len(df[:-1])} history messages")
|
|
439
|
+
logger.debug(f"User column: {user_column}, Assistant column: {assistant_column}")
|
|
440
|
+
|
|
441
|
+
# Process history messages (all except the last one which is current message)
|
|
442
|
+
history_df = df[:-1]
|
|
443
|
+
if len(history_df) == 0:
|
|
444
|
+
logger.debug("No history rows to process - this is normal for first message")
|
|
445
|
+
|
|
446
|
+
history_count = 0
|
|
447
|
+
for i, row in enumerate(history_df.to_dict("records")):
|
|
448
|
+
question = row.get(user_column)
|
|
449
|
+
answer = row.get(assistant_column)
|
|
450
|
+
logger.debug(f"Converting history row {i}: question='{question}', answer='{answer}'")
|
|
451
|
+
|
|
452
|
+
# Add messages directly to memory's chat_memory.messages list (modern approach)
|
|
424
453
|
if isinstance(question, str) and len(question) > 0:
|
|
425
|
-
memory.chat_memory.
|
|
454
|
+
memory.chat_memory.messages.append(HumanMessage(content=question))
|
|
455
|
+
history_count += 1
|
|
456
|
+
logger.debug(f"Added HumanMessage to memory: {question}")
|
|
426
457
|
if isinstance(answer, str) and len(answer) > 0:
|
|
427
|
-
memory.chat_memory.
|
|
458
|
+
memory.chat_memory.messages.append(AIMessage(content=answer))
|
|
459
|
+
history_count += 1
|
|
460
|
+
logger.debug(f"Added AIMessage to memory: {answer}")
|
|
461
|
+
|
|
462
|
+
logger.info(f"Built conversation history with {history_count} history messages + system message")
|
|
463
|
+
logger.debug(f"Final memory messages count: {len(memory.chat_memory.messages)}")
|
|
464
|
+
|
|
465
|
+
# Store memory for agent use
|
|
466
|
+
self._conversation_memory = memory
|
|
428
467
|
|
|
429
468
|
agent_type = args.get("agent_type", DEFAULT_AGENT_TYPE)
|
|
430
469
|
agent_executor = initialize_agent(
|
|
@@ -564,7 +603,22 @@ AI: {response}"""
|
|
|
564
603
|
return {CONTEXT_COLUMN: [], ASSISTANT_COLUMN: ""}
|
|
565
604
|
try:
|
|
566
605
|
callbacks, context_callback = prepare_callbacks(self, args)
|
|
567
|
-
|
|
606
|
+
|
|
607
|
+
# Modern LangChain approach: Include conversation history + current message
|
|
608
|
+
if hasattr(self, "_conversation_messages") and self._conversation_messages:
|
|
609
|
+
# Add current user message to conversation history
|
|
610
|
+
full_messages = self._conversation_messages + [HumanMessage(content=prompt)]
|
|
611
|
+
logger.critical(f"🔍 INVOKING AGENT with {len(full_messages)} messages (including history)")
|
|
612
|
+
logger.debug(
|
|
613
|
+
f"Full conversation messages: {[type(msg).__name__ + ': ' + msg.content[:100] + '...' for msg in full_messages]}"
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
# For agents, we need to pass the input in the expected format
|
|
617
|
+
# The agent expects 'input' key with the current question, but conversation history should be in memory
|
|
618
|
+
result = agent_executor.invoke({"input": prompt}, config={"callbacks": callbacks})
|
|
619
|
+
else:
|
|
620
|
+
logger.warning("No conversation messages found - using simple prompt")
|
|
621
|
+
result = agent_executor.invoke({"input": prompt}, config={"callbacks": callbacks})
|
|
568
622
|
captured_context = context_callback.get_contexts()
|
|
569
623
|
output = result["output"] if isinstance(result, dict) and "output" in result else str(result)
|
|
570
624
|
return {CONTEXT_COLUMN: captured_context, ASSISTANT_COLUMN: output}
|
|
@@ -587,7 +641,14 @@ AI: {response}"""
|
|
|
587
641
|
agent_timeout_seconds = args.get("timeout", DEFAULT_AGENT_TIMEOUT_SECONDS)
|
|
588
642
|
|
|
589
643
|
with ContextThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
590
|
-
|
|
644
|
+
# Only process the last prompt (current question), not all prompts
|
|
645
|
+
# The previous prompts are conversation history and should only be used for context
|
|
646
|
+
if prompts:
|
|
647
|
+
current_prompt = prompts[-1] # Last prompt is the current question
|
|
648
|
+
futures = [executor.submit(_invoke_agent_executor_with_prompt, agent, current_prompt)]
|
|
649
|
+
else:
|
|
650
|
+
logger.error("No prompts found to process")
|
|
651
|
+
futures = []
|
|
591
652
|
try:
|
|
592
653
|
for future in as_completed(futures, timeout=agent_timeout_seconds):
|
|
593
654
|
result = future.result()
|
|
@@ -688,12 +749,14 @@ AI: {response}"""
|
|
|
688
749
|
|
|
689
750
|
callbacks, context_callback = prepare_callbacks(self, args)
|
|
690
751
|
|
|
691
|
-
|
|
752
|
+
# Use last prompt (current question) instead of first prompt (history)
|
|
753
|
+
current_prompt = prompts[-1] if prompts else ""
|
|
754
|
+
yield self.add_chunk_metadata({"type": "start", "prompt": current_prompt})
|
|
692
755
|
|
|
693
756
|
if not hasattr(agent_executor, "stream") or not callable(agent_executor.stream):
|
|
694
757
|
raise AttributeError("The agent_executor does not have a 'stream' method")
|
|
695
758
|
|
|
696
|
-
stream_iterator = self._stream_agent_executor(agent_executor,
|
|
759
|
+
stream_iterator = self._stream_agent_executor(agent_executor, current_prompt, callbacks)
|
|
697
760
|
for chunk in stream_iterator:
|
|
698
761
|
yield chunk
|
|
699
762
|
|
|
@@ -3,11 +3,12 @@ from copy import deepcopy
|
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
from collections import OrderedDict
|
|
5
5
|
|
|
6
|
+
import pandas as pd
|
|
6
7
|
import sqlalchemy as sa
|
|
7
8
|
import numpy as np
|
|
8
9
|
|
|
9
10
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
10
|
-
from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier
|
|
11
|
+
from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier, BinaryOperation
|
|
11
12
|
from mindsdb_sql_parser import parse_sql
|
|
12
13
|
|
|
13
14
|
from mindsdb.interfaces.storage import db
|
|
@@ -109,7 +110,19 @@ class Project:
|
|
|
109
110
|
"""
|
|
110
111
|
ViewController().delete(name, project_name=self.name, strict_case=strict_case)
|
|
111
112
|
|
|
112
|
-
def create_view(self, name: str, query: str):
|
|
113
|
+
def create_view(self, name: str, query: str, session):
|
|
114
|
+
ast_query = parse_sql(query)
|
|
115
|
+
|
|
116
|
+
if isinstance(ast_query, Select):
|
|
117
|
+
# check create view sql
|
|
118
|
+
ast_query.limit = Constant(1)
|
|
119
|
+
|
|
120
|
+
query_context_controller.set_context(query_context_controller.IGNORE_CONTEXT)
|
|
121
|
+
try:
|
|
122
|
+
SQLQuery(ast_query, session=session, database=self.name)
|
|
123
|
+
finally:
|
|
124
|
+
query_context_controller.release_context(query_context_controller.IGNORE_CONTEXT)
|
|
125
|
+
|
|
113
126
|
ViewController().add(name, query=query, project_name=self.name)
|
|
114
127
|
|
|
115
128
|
def update_view(self, name: str, query: str, strict_case: bool = False):
|
|
@@ -124,21 +137,112 @@ class Project:
|
|
|
124
137
|
view_meta["query_ast"] = parse_sql(view_meta["query"])
|
|
125
138
|
return view_meta
|
|
126
139
|
|
|
127
|
-
|
|
140
|
+
@staticmethod
|
|
141
|
+
def combine_view_select(view_query: Select, query: Select) -> Select:
|
|
142
|
+
"""
|
|
143
|
+
Create a combined query from view's query and outer query.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
# apply optimizations
|
|
147
|
+
if query.where is not None:
|
|
148
|
+
# Get conditions that can be duplicated into view's query
|
|
149
|
+
# It has to be simple condition with identifier and constant
|
|
150
|
+
# Also it shouldn't be under the OR condition
|
|
151
|
+
|
|
152
|
+
def get_conditions_to_move(node):
|
|
153
|
+
if not isinstance(node, BinaryOperation):
|
|
154
|
+
return []
|
|
155
|
+
op = node.op.upper()
|
|
156
|
+
if op == "AND":
|
|
157
|
+
conditions = []
|
|
158
|
+
conditions.extend(get_conditions_to_move(node.args[0]))
|
|
159
|
+
conditions.extend(get_conditions_to_move(node.args[1]))
|
|
160
|
+
return conditions
|
|
161
|
+
|
|
162
|
+
if op == "OR":
|
|
163
|
+
return []
|
|
164
|
+
if isinstance(node.args[0], (Identifier, Constant)) and isinstance(
|
|
165
|
+
node.args[1], (Identifier, Constant)
|
|
166
|
+
):
|
|
167
|
+
return [node]
|
|
168
|
+
|
|
169
|
+
conditions = get_conditions_to_move(query.where)
|
|
170
|
+
|
|
171
|
+
if conditions:
|
|
172
|
+
# analyse targets
|
|
173
|
+
# if target element has alias
|
|
174
|
+
# if element is not identifier or the name is not equal to alias:
|
|
175
|
+
# add alias to black list
|
|
176
|
+
# white list:
|
|
177
|
+
# all targets that are identifiers with no alias or equal to its alias
|
|
178
|
+
# condition can be moved if
|
|
179
|
+
# column is not in black list AND (query has star(*) OR column in white list)
|
|
180
|
+
|
|
181
|
+
has_star = False
|
|
182
|
+
white_list, black_list = [], []
|
|
183
|
+
for target in view_query.targets:
|
|
184
|
+
if isinstance(target, Star):
|
|
185
|
+
has_star = True
|
|
186
|
+
if isinstance(target, Identifier):
|
|
187
|
+
name = target.parts[-1].lower()
|
|
188
|
+
if target.alias is None or target.alias.parts[-1].lower() == name:
|
|
189
|
+
white_list.append(name)
|
|
190
|
+
elif target.alias is not None:
|
|
191
|
+
black_list.append(target.alias.parts[-1].lower())
|
|
192
|
+
|
|
193
|
+
view_where = view_query.where
|
|
194
|
+
for condition in conditions:
|
|
195
|
+
arg1, arg2 = condition.args
|
|
196
|
+
|
|
197
|
+
if isinstance(arg1, Identifier):
|
|
198
|
+
name = arg1.parts[-1].lower()
|
|
199
|
+
if name in black_list or not (has_star or name in white_list):
|
|
200
|
+
continue
|
|
201
|
+
if isinstance(arg2, Identifier):
|
|
202
|
+
name = arg2.parts[-1].lower()
|
|
203
|
+
if name in black_list or not (has_star or name in white_list):
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
# condition can be moved into view
|
|
207
|
+
condition2 = BinaryOperation(condition.op, [arg1, arg2])
|
|
208
|
+
if view_where is None:
|
|
209
|
+
view_where = condition2
|
|
210
|
+
else:
|
|
211
|
+
view_where = BinaryOperation("AND", args=[view_where, condition2])
|
|
212
|
+
|
|
213
|
+
# disable outer condition
|
|
214
|
+
condition.op = "="
|
|
215
|
+
condition.args = [Constant(0), Constant(0)]
|
|
216
|
+
|
|
217
|
+
view_query.where = view_where
|
|
218
|
+
|
|
219
|
+
# combine outer query with view's query
|
|
220
|
+
view_query.parentheses = True
|
|
221
|
+
query.from_table = view_query
|
|
222
|
+
return query
|
|
223
|
+
|
|
224
|
+
def query_view(self, query: Select, session) -> pd.DataFrame:
|
|
128
225
|
view_meta = self.get_view_meta(query)
|
|
129
226
|
|
|
130
227
|
query_context_controller.set_context("view", view_meta["id"])
|
|
131
|
-
|
|
228
|
+
query_applied = False
|
|
132
229
|
try:
|
|
133
|
-
|
|
230
|
+
view_query = view_meta["query_ast"]
|
|
231
|
+
if isinstance(view_query, Select):
|
|
232
|
+
view_query = self.combine_view_select(view_query, query)
|
|
233
|
+
query_applied = True
|
|
234
|
+
|
|
235
|
+
sqlquery = SQLQuery(view_query, session=session)
|
|
134
236
|
df = sqlquery.fetched_data.to_df()
|
|
135
237
|
finally:
|
|
136
238
|
query_context_controller.release_context("view", view_meta["id"])
|
|
137
239
|
|
|
138
240
|
# remove duplicated columns
|
|
139
241
|
df = df.loc[:, ~df.columns.duplicated()]
|
|
140
|
-
|
|
141
|
-
|
|
242
|
+
if query_applied:
|
|
243
|
+
return df
|
|
244
|
+
else:
|
|
245
|
+
return query_df(df, query, session=session)
|
|
142
246
|
|
|
143
247
|
@staticmethod
|
|
144
248
|
def _get_model_data(predictor_record, integraion_record, with_secrets: bool = True):
|
|
@@ -1139,8 +1139,14 @@ class KnowledgeBaseController:
|
|
|
1139
1139
|
else:
|
|
1140
1140
|
vector_db_name, vector_table_name = storage.parts
|
|
1141
1141
|
|
|
1142
|
+
data_node = self.session.datahub.get(vector_db_name)
|
|
1143
|
+
if data_node:
|
|
1144
|
+
vector_store_handler = data_node.integration_handler
|
|
1145
|
+
else:
|
|
1146
|
+
raise ValueError(
|
|
1147
|
+
f"Unable to find database named {vector_db_name}, please make sure {vector_db_name} is defined"
|
|
1148
|
+
)
|
|
1142
1149
|
# create table in vectordb before creating KB
|
|
1143
|
-
vector_store_handler = self.session.datahub.get(vector_db_name).integration_handler
|
|
1144
1150
|
vector_store_handler.create_table(vector_table_name)
|
|
1145
1151
|
if keyword_search_enabled:
|
|
1146
1152
|
vector_store_handler.add_full_text_index(vector_table_name, TableField.CONTENT.value)
|
|
@@ -4,8 +4,7 @@ import asyncio
|
|
|
4
4
|
from typing import List, Dict, Optional, Any
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
|
-
from
|
|
8
|
-
from langchain_core.documents import Document as LangchainDocument
|
|
7
|
+
from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
|
|
9
8
|
|
|
10
9
|
from mindsdb.integrations.utilities.rag.splitters.file_splitter import (
|
|
11
10
|
FileSplitter,
|
|
@@ -22,7 +21,6 @@ from mindsdb.interfaces.knowledge_base.preprocessing.models import (
|
|
|
22
21
|
)
|
|
23
22
|
from mindsdb.utilities import log
|
|
24
23
|
|
|
25
|
-
|
|
26
24
|
logger = log.getLogger(__name__)
|
|
27
25
|
|
|
28
26
|
_DEFAULT_CONTENT_COLUMN_NAME = "content"
|
|
@@ -49,11 +47,10 @@ class DocumentPreprocessor:
|
|
|
49
47
|
if self.splitter is None:
|
|
50
48
|
raise ValueError("Splitter not configured")
|
|
51
49
|
|
|
52
|
-
|
|
53
|
-
langchain_doc = LangchainDocument(page_content=doc.content, metadata=doc.metadata or {})
|
|
50
|
+
metadata = doc.metadata or {}
|
|
54
51
|
# Split and convert back to our Document type
|
|
55
|
-
|
|
56
|
-
return [Document(content=
|
|
52
|
+
split_texts = self.splitter.split_text(doc.content)
|
|
53
|
+
return [Document(content=text, metadata=metadata) for text in split_texts]
|
|
57
54
|
|
|
58
55
|
def _get_source(self) -> str:
|
|
59
56
|
"""Get the source identifier for this preprocessor"""
|
|
@@ -266,16 +263,15 @@ Please give a short succinct context to situate this chunk within the overall do
|
|
|
266
263
|
|
|
267
264
|
|
|
268
265
|
class TextChunkingPreprocessor(DocumentPreprocessor):
|
|
269
|
-
"""Default text chunking preprocessor using
|
|
266
|
+
"""Default text chunking preprocessor using TextSplitter"""
|
|
270
267
|
|
|
271
268
|
def __init__(self, config: Optional[TextChunkingConfig] = None):
|
|
272
269
|
"""Initialize with text chunking configuration"""
|
|
273
270
|
super().__init__()
|
|
274
271
|
self.config = config or TextChunkingConfig()
|
|
275
|
-
self.splitter =
|
|
272
|
+
self.splitter = TextSplitter(
|
|
276
273
|
chunk_size=self.config.chunk_size,
|
|
277
274
|
chunk_overlap=self.config.chunk_overlap,
|
|
278
|
-
length_function=self.config.length_function,
|
|
279
275
|
separators=self.config.separators,
|
|
280
276
|
)
|
|
281
277
|
|