MindsDB 25.7.4.0__py3-none-any.whl → 25.8.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (65) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +13 -1
  3. mindsdb/api/a2a/agent.py +6 -16
  4. mindsdb/api/a2a/common/types.py +3 -4
  5. mindsdb/api/a2a/task_manager.py +24 -35
  6. mindsdb/api/a2a/utils.py +63 -0
  7. mindsdb/api/executor/command_executor.py +9 -15
  8. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
  9. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
  10. mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
  11. mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
  12. mindsdb/api/executor/utilities/sql.py +30 -0
  13. mindsdb/api/http/initialize.py +2 -1
  14. mindsdb/api/http/namespaces/agents.py +6 -7
  15. mindsdb/api/http/namespaces/views.py +56 -72
  16. mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
  17. mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
  18. mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
  19. mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
  20. mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
  21. mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
  22. mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
  23. mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
  24. mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
  25. mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
  26. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
  27. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
  28. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
  29. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  30. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +1 -2
  31. mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
  32. mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
  33. mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
  34. mindsdb/integrations/handlers/salesforce_handler/constants.py +9 -2
  35. mindsdb/integrations/libs/llm/config.py +0 -14
  36. mindsdb/integrations/libs/llm/utils.py +0 -15
  37. mindsdb/integrations/utilities/files/file_reader.py +5 -19
  38. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
  39. mindsdb/interfaces/agents/agents_controller.py +83 -45
  40. mindsdb/interfaces/agents/constants.py +16 -3
  41. mindsdb/interfaces/agents/langchain_agent.py +84 -21
  42. mindsdb/interfaces/database/projects.py +111 -7
  43. mindsdb/interfaces/knowledge_base/controller.py +7 -1
  44. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
  45. mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
  46. mindsdb/interfaces/query_context/context_controller.py +14 -15
  47. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +7 -1
  48. mindsdb/interfaces/skills/skill_tool.py +7 -1
  49. mindsdb/interfaces/skills/sql_agent.py +6 -2
  50. mindsdb/utilities/config.py +2 -0
  51. mindsdb/utilities/fs.py +60 -17
  52. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/METADATA +277 -262
  53. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/RECORD +57 -56
  54. mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
  55. mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
  56. mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
  57. mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
  58. mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
  59. mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
  60. mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
  61. mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
  62. /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
  63. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/WHEEL +0 -0
  64. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/licenses/LICENSE +0 -0
  65. {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.3.0.dist-info}/top_level.txt +0 -0
@@ -145,11 +145,60 @@ class AgentsController:
145
145
 
146
146
  return all_agents.all()
147
147
 
148
+ def _create_default_sql_skill(
149
+ self,
150
+ name,
151
+ project_name,
152
+ include_tables: List[str] = None,
153
+ include_knowledge_bases: List[str] = None,
154
+ ):
155
+ # Create a default SQL skill
156
+ skill_name = f"{name}_sql_skill"
157
+ skill_params = {
158
+ "type": "sql",
159
+ "description": f"Auto-generated SQL skill for agent {name}",
160
+ }
161
+
162
+ # Add restrictions provided
163
+ if include_tables:
164
+ skill_params["include_tables"] = include_tables
165
+ if include_knowledge_bases:
166
+ skill_params["include_knowledge_bases"] = include_knowledge_bases
167
+
168
+ try:
169
+ # Check if skill already exists
170
+ existing_skill = self.skills_controller.get_skill(skill_name, project_name)
171
+ if existing_skill is None:
172
+ # Create the skill
173
+ skill_type = skill_params.pop("type")
174
+ self.skills_controller.add_skill(
175
+ name=skill_name, project_name=project_name, type=skill_type, params=skill_params
176
+ )
177
+ else:
178
+ # Update the skill if parameters have changed
179
+ params_changed = False
180
+
181
+ # Check if skill parameters need to be updated
182
+ for param_key, param_value in skill_params.items():
183
+ if existing_skill.params.get(param_key) != param_value:
184
+ existing_skill.params[param_key] = param_value
185
+ params_changed = True
186
+
187
+ # Update the skill if needed
188
+ if params_changed:
189
+ flag_modified(existing_skill, "params")
190
+ db.session.commit()
191
+
192
+ except Exception as e:
193
+ raise ValueError(f"Failed to auto-create or update SQL skill: {str(e)}")
194
+
195
+ return skill_name
196
+
148
197
  def add_agent(
149
198
  self,
150
199
  name: str,
151
200
  project_name: str = None,
152
- model_name: str = None,
201
+ model_name: Union[str, dict] = None,
153
202
  skills: List[Union[str, dict]] = None,
154
203
  provider: str = None,
155
204
  params: Dict[str, Any] = None,
@@ -256,46 +305,13 @@ class AgentsController:
256
305
 
257
306
  # Auto-create SQL skill if no skills are provided but include_tables or include_knowledge_bases params are provided
258
307
  if not skills and (include_tables or include_knowledge_bases):
259
- # Create a default SQL skill
260
- skill_name = f"{name}_sql_skill"
261
- skill_params = {
262
- "type": "sql",
263
- "description": f"Auto-generated SQL skill for agent {name}",
264
- }
265
-
266
- # Add restrictions provided
267
- if include_tables:
268
- skill_params["include_tables"] = include_tables
269
- if include_knowledge_bases:
270
- skill_params["include_knowledge_bases"] = include_knowledge_bases
271
-
272
- try:
273
- # Check if skill already exists
274
- existing_skill = self.skills_controller.get_skill(skill_name, project_name)
275
- if existing_skill is None:
276
- # Create the skill
277
- skill_type = skill_params.pop("type")
278
- self.skills_controller.add_skill(
279
- name=skill_name, project_name=project_name, type=skill_type, params=skill_params
280
- )
281
- else:
282
- # Update the skill if parameters have changed
283
- params_changed = False
284
-
285
- # Check if skill parameters need to be updated
286
- for param_key, param_value in skill_params.items():
287
- if existing_skill.params.get(param_key) != param_value:
288
- existing_skill.params[param_key] = param_value
289
- params_changed = True
290
-
291
- # Update the skill if needed
292
- if params_changed:
293
- flag_modified(existing_skill, "params")
294
- db.session.commit()
295
-
296
- skills = [skill_name]
297
- except Exception as e:
298
- raise ValueError(f"Failed to auto-create or update SQL skill: {str(e)}")
308
+ skill = self._create_default_sql_skill(
309
+ name,
310
+ project_name,
311
+ include_tables=include_tables,
312
+ include_knowledge_bases=include_knowledge_bases,
313
+ )
314
+ skills = [skill]
299
315
 
300
316
  agent = db.Agents(
301
317
  name=name,
@@ -351,7 +367,7 @@ class AgentsController:
351
367
  agent_name: str,
352
368
  project_name: str = default_project,
353
369
  name: str = None,
354
- model_name: str = None,
370
+ model_name: Union[str, dict] = None,
355
371
  skills_to_add: List[Union[str, dict]] = None,
356
372
  skills_to_remove: List[str] = None,
357
373
  skills_to_rewrite: List[Union[str, dict]] = None,
@@ -365,7 +381,7 @@ class AgentsController:
365
381
  agent_name (str): The name of the new agent, or existing agent to update
366
382
  project_name (str): The containing project
367
383
  name (str): The updated name of the agent
368
- model_name (str): The name of the existing ML model the agent will use
384
+ model_name (str | dict): The name of the existing ML model the agent will use
369
385
  skills_to_add (List[Union[str, dict]]): List of skill names to add to the agent, or list of dicts
370
386
  with one of keys is "name", and other is additional parameters for relationship agent<>skill
371
387
  skills_to_remove (List[str]): List of skill names to remove from the agent
@@ -394,6 +410,8 @@ class AgentsController:
394
410
  existing_agent = self.get_agent(agent_name, project_name=project_name)
395
411
  if existing_agent is None:
396
412
  raise EntityNotExistsError(f"Agent with name not found: {agent_name}")
413
+ existing_params = existing_agent.params or {}
414
+
397
415
  is_demo = (existing_agent.params or {}).get("is_demo", False)
398
416
  if is_demo and (
399
417
  (name is not None and name != agent_name)
@@ -413,12 +431,34 @@ class AgentsController:
413
431
  existing_agent.name = name
414
432
 
415
433
  if model_name or provider:
434
+ if isinstance(model_name, dict):
435
+ # move into params
436
+ existing_params["model"] = model_name
437
+ model_name = None
438
+
416
439
  # check model and provider
417
440
  model, provider = self.check_model_provider(model_name, provider)
418
441
  # Update model and provider
419
442
  existing_agent.model_name = model_name
420
443
  existing_agent.provider = provider
421
444
 
445
+ if "data" in params:
446
+ if len(skills_to_add) > 0 or len(skills_to_remove) > 0:
447
+ raise ValueError(
448
+ "'data' parameter cannot be used with 'skills_to_remove' or 'skills_to_add' parameters"
449
+ )
450
+
451
+ include_knowledge_bases = params["data"].get("knowledge_bases")
452
+ include_tables = params["data"].get("tables")
453
+
454
+ skill = self._create_default_sql_skill(
455
+ agent_name,
456
+ project_name,
457
+ include_tables=include_tables,
458
+ include_knowledge_bases=include_knowledge_bases,
459
+ )
460
+ skills_to_rewrite = [{"name": skill}]
461
+
422
462
  # check that all skills exist
423
463
  skill_name_to_record_map = {}
424
464
  for skill_meta in skills_to_add + skills_to_remove + skills_to_rewrite:
@@ -496,8 +536,6 @@ class AgentsController:
496
536
  db.session.add(association)
497
537
 
498
538
  if params is not None:
499
- existing_params = existing_agent.params or {}
500
-
501
539
  if params.get("data", {}).get("tables"):
502
540
  new_table_entries = set(params["data"]["tables"]) - set(
503
541
  existing_params.get("data", {}).get("tables", [])
@@ -26,7 +26,6 @@ OPEN_AI_CHAT_MODELS = (
26
26
  SUPPORTED_PROVIDERS = {
27
27
  "openai",
28
28
  "anthropic",
29
- "anyscale",
30
29
  "litellm",
31
30
  "ollama",
32
31
  "nvidia_nim",
@@ -213,7 +212,14 @@ DEFAULT_TIKTOKEN_MODEL_NAME = os.getenv("DEFAULT_TIKTOKEN_MODEL_NAME", "gpt-4")
213
212
  AGENT_CHUNK_POLLING_INTERVAL_SECONDS = os.getenv("AGENT_CHUNK_POLLING_INTERVAL_SECONDS", 1.0)
214
213
  DEFAULT_TEXT2SQL_DATABASE = "mindsdb"
215
214
  DEFAULT_AGENT_SYSTEM_PROMPT = """
216
- You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
215
+ You are an AI assistant powered by MindsDB. You have access to conversation history and should use it to provide contextual responses. When answering questions, follow these guidelines:
216
+
217
+ **CONVERSATION CONTEXT:**
218
+ - You have access to previous messages in this conversation through your memory system
219
+ - When users ask about previous questions, topics, or context, refer to the conversation history
220
+ - Maintain conversational continuity and reference earlier parts of the conversation when relevant
221
+ - When asked to retrieve or list past user questions, examine your conversation memory to identify and list previous user queries
222
+ - You can reference specific past questions by their content or by their position in the conversation (e.g., "your first question", "the question you asked earlier about...")
217
223
 
218
224
  1. For factual questions about specific topics, use the knowledge base tools in this sequence:
219
225
  - First use kb_list_tool to see available knowledge bases
@@ -231,7 +237,14 @@ For factual questions, ALWAYS use the available tools to look up information rat
231
237
 
232
238
  """
233
239
 
234
- MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
240
+ MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. You have access to conversation history and should use it to provide contextual responses. When answering questions, follow these guidelines:
241
+
242
+ **CONVERSATION CONTEXT:**
243
+ - You have access to previous messages in this conversation through your memory system
244
+ - When users ask about previous questions, topics, or context, refer to the conversation history
245
+ - Maintain conversational continuity and reference earlier parts of the conversation when relevant
246
+ - When asked to retrieve or list past user questions, examine your conversation memory to identify and list previous user queries
247
+ - You can reference specific past questions by their content or by their position in the conversation (e.g., "your first question", "the question you asked earlier about...")
235
248
 
236
249
  1. For questions about database tables and their contents:
237
250
  - Use the sql_db_query to query the tables directly
@@ -7,15 +7,17 @@ import re
7
7
  import threading
8
8
  import numpy as np
9
9
  import pandas as pd
10
+ import logging
10
11
 
11
12
  from langchain.agents import AgentExecutor
12
13
  from langchain.agents.initialize import initialize_agent
13
14
  from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
14
- from langchain_community.chat_models import ChatAnyscale, ChatLiteLLM, ChatOllama
15
+ from langchain_community.chat_models import ChatLiteLLM, ChatOllama
15
16
  from langchain_writer import ChatWriter
16
17
  from langchain_google_genai import ChatGoogleGenerativeAI
17
18
  from langchain_core.agents import AgentAction, AgentStep
18
19
  from langchain_core.callbacks.base import BaseCallbackHandler
20
+ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
19
21
 
20
22
  from langchain_nvidia_ai_endpoints import ChatNVIDIA
21
23
  from langchain_core.messages.base import BaseMessage
@@ -63,7 +65,6 @@ from mindsdb.interfaces.agents.constants import (
63
65
  )
64
66
  from mindsdb.interfaces.skills.skill_tool import skill_tool, SkillData
65
67
  from langchain_anthropic import ChatAnthropic
66
- from langchain_core.messages import SystemMessage
67
68
  from langchain_openai import ChatOpenAI
68
69
 
69
70
  from mindsdb.utilities.langfuse import LangfuseClientWrapper
@@ -165,8 +166,6 @@ def create_chat_model(args: Dict):
165
166
  except NotImplementedError:
166
167
  chat_open_ai.tiktoken_model_name = DEFAULT_TIKTOKEN_MODEL_NAME
167
168
  return chat_open_ai
168
- if args["provider"] == "anyscale":
169
- return ChatAnyscale(**model_kwargs)
170
169
  if args["provider"] == "litellm":
171
170
  return ChatLiteLLM(**model_kwargs)
172
171
  if args["provider"] == "ollama":
@@ -299,6 +298,11 @@ class LangchainAgent:
299
298
  if "prompt_template" in args:
300
299
  logger.info(f"Using prompt template: {args['prompt_template'][:50]}...")
301
300
 
301
+ if "model_name" not in args:
302
+ raise ValueError(
303
+ "No model name provided for agent. Provide it in the model parameter or in the default model setup."
304
+ )
305
+
302
306
  return args
303
307
 
304
308
  def get_metadata(self) -> Dict:
@@ -347,15 +351,20 @@ class LangchainAgent:
347
351
  args.update(params or {})
348
352
 
349
353
  df = pd.DataFrame(messages)
354
+ logger.info(f"LangchainAgent.get_completion: Received {len(messages)} messages")
355
+ if logger.isEnabledFor(logging.DEBUG):
356
+ logger.debug(f"Messages DataFrame shape: {df.shape}")
357
+ logger.debug(f"Messages DataFrame columns: {df.columns.tolist()}")
358
+ logger.debug(f"Messages DataFrame content: {df.to_dict('records')}")
350
359
 
351
360
  # Back compatibility for old models
352
361
  self.provider = args.get("provider", get_llm_provider(args))
353
362
 
354
363
  df = df.reset_index(drop=True)
355
364
  agent = self.create_agent(df)
356
- # Use last message as prompt, remove other questions.
357
- user_column = args.get("user_column", USER_COLUMN)
358
- df.iloc[:-1, df.columns.get_loc(user_column)] = None
365
+ # Keep conversation history for context - don't nullify previous messages
366
+
367
+ # Only use the last message as the current prompt, but preserve history for agent memory
359
368
  response = self.run_agent(df, agent, args)
360
369
 
361
370
  # End the run completion span and update the metadata with tool usage
@@ -376,6 +385,12 @@ class LangchainAgent:
376
385
  args = self.args
377
386
 
378
387
  df = pd.DataFrame(messages)
388
+ logger.info(f"LangchainAgent._get_completion_stream: Received {len(messages)} messages")
389
+ # Check if we have the expected columns for conversation history
390
+ if "question" in df.columns and "answer" in df.columns:
391
+ logger.debug("DataFrame has question/answer columns for conversation history")
392
+ else:
393
+ logger.warning("DataFrame missing question/answer columns! Available columns: {df.columns.tolist()}")
379
394
 
380
395
  self.embedding_model_provider = args.get("embedding_model_provider", get_embedding_model_provider(args))
381
396
  # Back compatibility for old models
@@ -383,9 +398,8 @@ class LangchainAgent:
383
398
 
384
399
  df = df.reset_index(drop=True)
385
400
  agent = self.create_agent(df)
386
- # Use last message as prompt, remove other questions.
387
- user_column = args.get("user_column", USER_COLUMN)
388
- df.iloc[:-1, df.columns.get_loc(user_column)] = None
401
+ # Keep conversation history for context - don't nullify previous messages
402
+ # Only use the last message as the current prompt, but preserve history for agent memory
389
403
  return self.stream_agent(df, agent, args)
390
404
 
391
405
  def create_agent(self, df: pd.DataFrame) -> AgentExecutor:
@@ -405,7 +419,8 @@ class LangchainAgent:
405
419
  # Prefer prediction prompt template over original if provided.
406
420
  prompt_template = args["prompt_template"]
407
421
 
408
- # Set up memory.
422
+ # Modern LangChain approach: Use memory but populate it correctly
423
+ # Create memory and populate with conversation history
409
424
  memory = ConversationSummaryBufferMemory(
410
425
  llm=llm,
411
426
  input_key="input",
@@ -414,17 +429,41 @@ class LangchainAgent:
414
429
  memory_key="chat_history",
415
430
  )
416
431
 
432
+ # Add system message first
417
433
  memory.chat_memory.messages.insert(0, SystemMessage(content=prompt_template))
418
- # User - Assistant conversation. All except the last message.
434
+
419
435
  user_column = args.get("user_column", USER_COLUMN)
420
436
  assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
421
- for row in df[:-1].to_dict("records"):
422
- question = row[user_column]
423
- answer = row[assistant_column]
437
+
438
+ logger.info(f"Processing conversation history: {len(df)} total messages, {len(df[:-1])} history messages")
439
+ logger.debug(f"User column: {user_column}, Assistant column: {assistant_column}")
440
+
441
+ # Process history messages (all except the last one which is current message)
442
+ history_df = df[:-1]
443
+ if len(history_df) == 0:
444
+ logger.debug("No history rows to process - this is normal for first message")
445
+
446
+ history_count = 0
447
+ for i, row in enumerate(history_df.to_dict("records")):
448
+ question = row.get(user_column)
449
+ answer = row.get(assistant_column)
450
+ logger.debug(f"Converting history row {i}: question='{question}', answer='{answer}'")
451
+
452
+ # Add messages directly to memory's chat_memory.messages list (modern approach)
424
453
  if isinstance(question, str) and len(question) > 0:
425
- memory.chat_memory.add_user_message(question)
454
+ memory.chat_memory.messages.append(HumanMessage(content=question))
455
+ history_count += 1
456
+ logger.debug(f"Added HumanMessage to memory: {question}")
426
457
  if isinstance(answer, str) and len(answer) > 0:
427
- memory.chat_memory.add_ai_message(answer)
458
+ memory.chat_memory.messages.append(AIMessage(content=answer))
459
+ history_count += 1
460
+ logger.debug(f"Added AIMessage to memory: {answer}")
461
+
462
+ logger.info(f"Built conversation history with {history_count} history messages + system message")
463
+ logger.debug(f"Final memory messages count: {len(memory.chat_memory.messages)}")
464
+
465
+ # Store memory for agent use
466
+ self._conversation_memory = memory
428
467
 
429
468
  agent_type = args.get("agent_type", DEFAULT_AGENT_TYPE)
430
469
  agent_executor = initialize_agent(
@@ -564,7 +603,22 @@ AI: {response}"""
564
603
  return {CONTEXT_COLUMN: [], ASSISTANT_COLUMN: ""}
565
604
  try:
566
605
  callbacks, context_callback = prepare_callbacks(self, args)
567
- result = agent_executor.invoke(prompt, config={"callbacks": callbacks})
606
+
607
+ # Modern LangChain approach: Include conversation history + current message
608
+ if hasattr(self, "_conversation_messages") and self._conversation_messages:
609
+ # Add current user message to conversation history
610
+ full_messages = self._conversation_messages + [HumanMessage(content=prompt)]
611
+ logger.critical(f"🔍 INVOKING AGENT with {len(full_messages)} messages (including history)")
612
+ logger.debug(
613
+ f"Full conversation messages: {[type(msg).__name__ + ': ' + msg.content[:100] + '...' for msg in full_messages]}"
614
+ )
615
+
616
+ # For agents, we need to pass the input in the expected format
617
+ # The agent expects 'input' key with the current question, but conversation history should be in memory
618
+ result = agent_executor.invoke({"input": prompt}, config={"callbacks": callbacks})
619
+ else:
620
+ logger.warning("No conversation messages found - using simple prompt")
621
+ result = agent_executor.invoke({"input": prompt}, config={"callbacks": callbacks})
568
622
  captured_context = context_callback.get_contexts()
569
623
  output = result["output"] if isinstance(result, dict) and "output" in result else str(result)
570
624
  return {CONTEXT_COLUMN: captured_context, ASSISTANT_COLUMN: output}
@@ -587,7 +641,14 @@ AI: {response}"""
587
641
  agent_timeout_seconds = args.get("timeout", DEFAULT_AGENT_TIMEOUT_SECONDS)
588
642
 
589
643
  with ContextThreadPoolExecutor(max_workers=max_workers) as executor:
590
- futures = [executor.submit(_invoke_agent_executor_with_prompt, agent, prompt) for prompt in prompts]
644
+ # Only process the last prompt (current question), not all prompts
645
+ # The previous prompts are conversation history and should only be used for context
646
+ if prompts:
647
+ current_prompt = prompts[-1] # Last prompt is the current question
648
+ futures = [executor.submit(_invoke_agent_executor_with_prompt, agent, current_prompt)]
649
+ else:
650
+ logger.error("No prompts found to process")
651
+ futures = []
591
652
  try:
592
653
  for future in as_completed(futures, timeout=agent_timeout_seconds):
593
654
  result = future.result()
@@ -688,12 +749,14 @@ AI: {response}"""
688
749
 
689
750
  callbacks, context_callback = prepare_callbacks(self, args)
690
751
 
691
- yield self.add_chunk_metadata({"type": "start", "prompt": prompts[0]})
752
+ # Use last prompt (current question) instead of first prompt (history)
753
+ current_prompt = prompts[-1] if prompts else ""
754
+ yield self.add_chunk_metadata({"type": "start", "prompt": current_prompt})
692
755
 
693
756
  if not hasattr(agent_executor, "stream") or not callable(agent_executor.stream):
694
757
  raise AttributeError("The agent_executor does not have a 'stream' method")
695
758
 
696
- stream_iterator = self._stream_agent_executor(agent_executor, prompts[0], callbacks)
759
+ stream_iterator = self._stream_agent_executor(agent_executor, current_prompt, callbacks)
697
760
  for chunk in stream_iterator:
698
761
  yield chunk
699
762
 
@@ -3,11 +3,12 @@ from copy import deepcopy
3
3
  from typing import List, Optional
4
4
  from collections import OrderedDict
5
5
 
6
+ import pandas as pd
6
7
  import sqlalchemy as sa
7
8
  import numpy as np
8
9
 
9
10
  from mindsdb_sql_parser.ast.base import ASTNode
10
- from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier
11
+ from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier, BinaryOperation
11
12
  from mindsdb_sql_parser import parse_sql
12
13
 
13
14
  from mindsdb.interfaces.storage import db
@@ -109,7 +110,19 @@ class Project:
109
110
  """
110
111
  ViewController().delete(name, project_name=self.name, strict_case=strict_case)
111
112
 
112
- def create_view(self, name: str, query: str):
113
+ def create_view(self, name: str, query: str, session):
114
+ ast_query = parse_sql(query)
115
+
116
+ if isinstance(ast_query, Select):
117
+ # check create view sql
118
+ ast_query.limit = Constant(1)
119
+
120
+ query_context_controller.set_context(query_context_controller.IGNORE_CONTEXT)
121
+ try:
122
+ SQLQuery(ast_query, session=session, database=self.name)
123
+ finally:
124
+ query_context_controller.release_context(query_context_controller.IGNORE_CONTEXT)
125
+
113
126
  ViewController().add(name, query=query, project_name=self.name)
114
127
 
115
128
  def update_view(self, name: str, query: str, strict_case: bool = False):
@@ -124,21 +137,112 @@ class Project:
124
137
  view_meta["query_ast"] = parse_sql(view_meta["query"])
125
138
  return view_meta
126
139
 
127
- def query_view(self, query, session):
140
+ @staticmethod
141
+ def combine_view_select(view_query: Select, query: Select) -> Select:
142
+ """
143
+ Create a combined query from view's query and outer query.
144
+ """
145
+
146
+ # apply optimizations
147
+ if query.where is not None:
148
+ # Get conditions that can be duplicated into view's query
149
+ # It has to be simple condition with identifier and constant
150
+ # Also it shouldn't be under the OR condition
151
+
152
+ def get_conditions_to_move(node):
153
+ if not isinstance(node, BinaryOperation):
154
+ return []
155
+ op = node.op.upper()
156
+ if op == "AND":
157
+ conditions = []
158
+ conditions.extend(get_conditions_to_move(node.args[0]))
159
+ conditions.extend(get_conditions_to_move(node.args[1]))
160
+ return conditions
161
+
162
+ if op == "OR":
163
+ return []
164
+ if isinstance(node.args[0], (Identifier, Constant)) and isinstance(
165
+ node.args[1], (Identifier, Constant)
166
+ ):
167
+ return [node]
168
+
169
+ conditions = get_conditions_to_move(query.where)
170
+
171
+ if conditions:
172
+ # analyse targets
173
+ # if target element has alias
174
+ # if element is not identifier or the name is not equal to alias:
175
+ # add alias to black list
176
+ # white list:
177
+ # all targets that are identifiers with no alias or equal to its alias
178
+ # condition can be moved if
179
+ # column is not in black list AND (query has star(*) OR column in white list)
180
+
181
+ has_star = False
182
+ white_list, black_list = [], []
183
+ for target in view_query.targets:
184
+ if isinstance(target, Star):
185
+ has_star = True
186
+ if isinstance(target, Identifier):
187
+ name = target.parts[-1].lower()
188
+ if target.alias is None or target.alias.parts[-1].lower() == name:
189
+ white_list.append(name)
190
+ elif target.alias is not None:
191
+ black_list.append(target.alias.parts[-1].lower())
192
+
193
+ view_where = view_query.where
194
+ for condition in conditions:
195
+ arg1, arg2 = condition.args
196
+
197
+ if isinstance(arg1, Identifier):
198
+ name = arg1.parts[-1].lower()
199
+ if name in black_list or not (has_star or name in white_list):
200
+ continue
201
+ if isinstance(arg2, Identifier):
202
+ name = arg2.parts[-1].lower()
203
+ if name in black_list or not (has_star or name in white_list):
204
+ continue
205
+
206
+ # condition can be moved into view
207
+ condition2 = BinaryOperation(condition.op, [arg1, arg2])
208
+ if view_where is None:
209
+ view_where = condition2
210
+ else:
211
+ view_where = BinaryOperation("AND", args=[view_where, condition2])
212
+
213
+ # disable outer condition
214
+ condition.op = "="
215
+ condition.args = [Constant(0), Constant(0)]
216
+
217
+ view_query.where = view_where
218
+
219
+ # combine outer query with view's query
220
+ view_query.parentheses = True
221
+ query.from_table = view_query
222
+ return query
223
+
224
+ def query_view(self, query: Select, session) -> pd.DataFrame:
128
225
  view_meta = self.get_view_meta(query)
129
226
 
130
227
  query_context_controller.set_context("view", view_meta["id"])
131
-
228
+ query_applied = False
132
229
  try:
133
- sqlquery = SQLQuery(view_meta["query_ast"], session=session)
230
+ view_query = view_meta["query_ast"]
231
+ if isinstance(view_query, Select):
232
+ view_query = self.combine_view_select(view_query, query)
233
+ query_applied = True
234
+
235
+ sqlquery = SQLQuery(view_query, session=session)
134
236
  df = sqlquery.fetched_data.to_df()
135
237
  finally:
136
238
  query_context_controller.release_context("view", view_meta["id"])
137
239
 
138
240
  # remove duplicated columns
139
241
  df = df.loc[:, ~df.columns.duplicated()]
140
-
141
- return query_df(df, query, session=session)
242
+ if query_applied:
243
+ return df
244
+ else:
245
+ return query_df(df, query, session=session)
142
246
 
143
247
  @staticmethod
144
248
  def _get_model_data(predictor_record, integraion_record, with_secrets: bool = True):
@@ -1139,8 +1139,14 @@ class KnowledgeBaseController:
1139
1139
  else:
1140
1140
  vector_db_name, vector_table_name = storage.parts
1141
1141
 
1142
+ data_node = self.session.datahub.get(vector_db_name)
1143
+ if data_node:
1144
+ vector_store_handler = data_node.integration_handler
1145
+ else:
1146
+ raise ValueError(
1147
+ f"Unable to find database named {vector_db_name}, please make sure {vector_db_name} is defined"
1148
+ )
1142
1149
  # create table in vectordb before creating KB
1143
- vector_store_handler = self.session.datahub.get(vector_db_name).integration_handler
1144
1150
  vector_store_handler.create_table(vector_table_name)
1145
1151
  if keyword_search_enabled:
1146
1152
  vector_store_handler.add_full_text_index(vector_table_name, TableField.CONTENT.value)
@@ -4,8 +4,7 @@ import asyncio
4
4
  from typing import List, Dict, Optional, Any
5
5
 
6
6
  import pandas as pd
7
- from langchain_text_splitters import RecursiveCharacterTextSplitter
8
- from langchain_core.documents import Document as LangchainDocument
7
+ from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
9
8
 
10
9
  from mindsdb.integrations.utilities.rag.splitters.file_splitter import (
11
10
  FileSplitter,
@@ -22,7 +21,6 @@ from mindsdb.interfaces.knowledge_base.preprocessing.models import (
22
21
  )
23
22
  from mindsdb.utilities import log
24
23
 
25
-
26
24
  logger = log.getLogger(__name__)
27
25
 
28
26
  _DEFAULT_CONTENT_COLUMN_NAME = "content"
@@ -49,11 +47,10 @@ class DocumentPreprocessor:
49
47
  if self.splitter is None:
50
48
  raise ValueError("Splitter not configured")
51
49
 
52
- # Convert to langchain Document for splitting
53
- langchain_doc = LangchainDocument(page_content=doc.content, metadata=doc.metadata or {})
50
+ metadata = doc.metadata or {}
54
51
  # Split and convert back to our Document type
55
- split_docs = self.splitter.split_documents([langchain_doc])
56
- return [Document(content=split_doc.page_content, metadata=split_doc.metadata) for split_doc in split_docs]
52
+ split_texts = self.splitter.split_text(doc.content)
53
+ return [Document(content=text, metadata=metadata) for text in split_texts]
57
54
 
58
55
  def _get_source(self) -> str:
59
56
  """Get the source identifier for this preprocessor"""
@@ -266,16 +263,15 @@ Please give a short succinct context to situate this chunk within the overall do
266
263
 
267
264
 
268
265
  class TextChunkingPreprocessor(DocumentPreprocessor):
269
- """Default text chunking preprocessor using RecursiveCharacterTextSplitter"""
266
+ """Default text chunking preprocessor using TextSplitter"""
270
267
 
271
268
  def __init__(self, config: Optional[TextChunkingConfig] = None):
272
269
  """Initialize with text chunking configuration"""
273
270
  super().__init__()
274
271
  self.config = config or TextChunkingConfig()
275
- self.splitter = RecursiveCharacterTextSplitter(
272
+ self.splitter = TextSplitter(
276
273
  chunk_size=self.config.chunk_size,
277
274
  chunk_overlap=self.config.chunk_overlap,
278
- length_function=self.config.length_function,
279
275
  separators=self.config.separators,
280
276
  )
281
277