MindsDB 25.8.2.0__py3-none-any.whl → 25.9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +5 -45
- mindsdb/api/a2a/__init__.py +52 -0
- mindsdb/api/a2a/agent.py +17 -28
- mindsdb/api/a2a/common/server/server.py +17 -36
- mindsdb/api/a2a/common/server/task_manager.py +14 -28
- mindsdb/api/a2a/common/types.py +3 -4
- mindsdb/api/a2a/task_manager.py +43 -55
- mindsdb/api/a2a/utils.py +63 -0
- mindsdb/api/common/middleware.py +106 -0
- mindsdb/api/http/initialize.py +13 -15
- mindsdb/api/http/namespaces/agents.py +6 -7
- mindsdb/api/http/namespaces/auth.py +6 -14
- mindsdb/api/http/namespaces/config.py +0 -2
- mindsdb/api/http/namespaces/default.py +74 -106
- mindsdb/api/http/start.py +25 -44
- mindsdb/api/litellm/start.py +11 -10
- mindsdb/api/mcp/__init__.py +165 -0
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +33 -64
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +86 -85
- mindsdb/integrations/handlers/crate_handler/crate_handler.py +3 -7
- mindsdb/integrations/handlers/derby_handler/derby_handler.py +32 -34
- mindsdb/integrations/handlers/documentdb_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py +12 -13
- mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +45 -44
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +101 -95
- mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +129 -129
- mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +59 -43
- mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +38 -39
- mindsdb/integrations/handlers/informix_handler/informix_handler.py +5 -18
- mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +22 -28
- mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +3 -7
- mindsdb/integrations/handlers/mongodb_handler/mongodb_handler.py +53 -67
- mindsdb/integrations/handlers/mongodb_handler/requirements.txt +1 -0
- mindsdb/{api/mongo/utilities → integrations/handlers/mongodb_handler/utils}/mongodb_ast.py +43 -68
- mindsdb/{api/mongo/utilities → integrations/handlers/mongodb_handler/utils}/mongodb_parser.py +17 -25
- mindsdb/{api/mongo/utilities → integrations/handlers/mongodb_handler/utils}/mongodb_query.py +10 -16
- mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_render.py +43 -69
- mindsdb/integrations/libs/base.py +1 -1
- mindsdb/interfaces/agents/constants.py +17 -2
- mindsdb/interfaces/agents/langchain_agent.py +83 -18
- mindsdb/interfaces/knowledge_base/controller.py +3 -1
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +7 -1
- mindsdb/interfaces/skills/skill_tool.py +7 -1
- mindsdb/interfaces/skills/sql_agent.py +6 -2
- mindsdb/utilities/config.py +3 -155
- mindsdb/utilities/fs.py +10 -4
- mindsdb/utilities/log.py +0 -25
- mindsdb/utilities/starters.py +0 -39
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.9.1.0.dist-info}/METADATA +265 -263
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.9.1.0.dist-info}/RECORD +54 -98
- mindsdb/api/a2a/__main__.py +0 -144
- mindsdb/api/a2a/run_a2a.py +0 -86
- mindsdb/api/common/check_auth.py +0 -42
- mindsdb/api/http/gunicorn_wrapper.py +0 -17
- mindsdb/api/mcp/start.py +0 -205
- mindsdb/api/mongo/__init__.py +0 -0
- mindsdb/api/mongo/classes/__init__.py +0 -5
- mindsdb/api/mongo/classes/query_sql.py +0 -19
- mindsdb/api/mongo/classes/responder.py +0 -45
- mindsdb/api/mongo/classes/responder_collection.py +0 -34
- mindsdb/api/mongo/classes/scram.py +0 -86
- mindsdb/api/mongo/classes/session.py +0 -23
- mindsdb/api/mongo/functions/__init__.py +0 -19
- mindsdb/api/mongo/responders/__init__.py +0 -73
- mindsdb/api/mongo/responders/add_shard.py +0 -13
- mindsdb/api/mongo/responders/aggregate.py +0 -90
- mindsdb/api/mongo/responders/buildinfo.py +0 -17
- mindsdb/api/mongo/responders/coll_stats.py +0 -63
- mindsdb/api/mongo/responders/company_id.py +0 -25
- mindsdb/api/mongo/responders/connection_status.py +0 -22
- mindsdb/api/mongo/responders/count.py +0 -21
- mindsdb/api/mongo/responders/db_stats.py +0 -32
- mindsdb/api/mongo/responders/delete.py +0 -105
- mindsdb/api/mongo/responders/describe.py +0 -23
- mindsdb/api/mongo/responders/end_sessions.py +0 -13
- mindsdb/api/mongo/responders/find.py +0 -175
- mindsdb/api/mongo/responders/get_cmd_line_opts.py +0 -18
- mindsdb/api/mongo/responders/get_free_monitoring_status.py +0 -14
- mindsdb/api/mongo/responders/get_parameter.py +0 -23
- mindsdb/api/mongo/responders/getlog.py +0 -14
- mindsdb/api/mongo/responders/host_info.py +0 -28
- mindsdb/api/mongo/responders/insert.py +0 -270
- mindsdb/api/mongo/responders/is_master.py +0 -20
- mindsdb/api/mongo/responders/is_master_lower.py +0 -13
- mindsdb/api/mongo/responders/list_collections.py +0 -55
- mindsdb/api/mongo/responders/list_databases.py +0 -37
- mindsdb/api/mongo/responders/list_indexes.py +0 -22
- mindsdb/api/mongo/responders/ping.py +0 -13
- mindsdb/api/mongo/responders/recv_chunk_start.py +0 -13
- mindsdb/api/mongo/responders/replsetgetstatus.py +0 -13
- mindsdb/api/mongo/responders/sasl_continue.py +0 -34
- mindsdb/api/mongo/responders/sasl_start.py +0 -33
- mindsdb/api/mongo/responders/update_range_deletions.py +0 -12
- mindsdb/api/mongo/responders/whatsmyuri.py +0 -18
- mindsdb/api/mongo/server.py +0 -388
- mindsdb/api/mongo/start.py +0 -15
- mindsdb/api/mongo/utilities/__init__.py +0 -0
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.9.1.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.9.1.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.8.2.0.dist-info → mindsdb-25.9.1.0.dist-info}/top_level.txt +0 -0
|
@@ -208,11 +208,19 @@ DEFAULT_TEMPERATURE = 0.0
|
|
|
208
208
|
USER_COLUMN = "question"
|
|
209
209
|
DEFAULT_EMBEDDINGS_MODEL_PROVIDER = "openai"
|
|
210
210
|
DEFAULT_EMBEDDINGS_MODEL_CLASS = OpenAIEmbeddings
|
|
211
|
+
MAX_INSERT_BATCH_SIZE = 50_000
|
|
211
212
|
DEFAULT_TIKTOKEN_MODEL_NAME = os.getenv("DEFAULT_TIKTOKEN_MODEL_NAME", "gpt-4")
|
|
212
213
|
AGENT_CHUNK_POLLING_INTERVAL_SECONDS = os.getenv("AGENT_CHUNK_POLLING_INTERVAL_SECONDS", 1.0)
|
|
213
214
|
DEFAULT_TEXT2SQL_DATABASE = "mindsdb"
|
|
214
215
|
DEFAULT_AGENT_SYSTEM_PROMPT = """
|
|
215
|
-
You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
|
|
216
|
+
You are an AI assistant powered by MindsDB. You have access to conversation history and should use it to provide contextual responses. When answering questions, follow these guidelines:
|
|
217
|
+
|
|
218
|
+
**CONVERSATION CONTEXT:**
|
|
219
|
+
- You have access to previous messages in this conversation through your memory system
|
|
220
|
+
- When users ask about previous questions, topics, or context, refer to the conversation history
|
|
221
|
+
- Maintain conversational continuity and reference earlier parts of the conversation when relevant
|
|
222
|
+
- When asked to retrieve or list past user questions, examine your conversation memory to identify and list previous user queries
|
|
223
|
+
- You can reference specific past questions by their content or by their position in the conversation (e.g., "your first question", "the question you asked earlier about...")
|
|
216
224
|
|
|
217
225
|
1. For factual questions about specific topics, use the knowledge base tools in this sequence:
|
|
218
226
|
- First use kb_list_tool to see available knowledge bases
|
|
@@ -230,7 +238,14 @@ For factual questions, ALWAYS use the available tools to look up information rat
|
|
|
230
238
|
|
|
231
239
|
"""
|
|
232
240
|
|
|
233
|
-
MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
|
|
241
|
+
MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. You have access to conversation history and should use it to provide contextual responses. When answering questions, follow these guidelines:
|
|
242
|
+
|
|
243
|
+
**CONVERSATION CONTEXT:**
|
|
244
|
+
- You have access to previous messages in this conversation through your memory system
|
|
245
|
+
- When users ask about previous questions, topics, or context, refer to the conversation history
|
|
246
|
+
- Maintain conversational continuity and reference earlier parts of the conversation when relevant
|
|
247
|
+
- When asked to retrieve or list past user questions, examine your conversation memory to identify and list previous user queries
|
|
248
|
+
- You can reference specific past questions by their content or by their position in the conversation (e.g., "your first question", "the question you asked earlier about...")
|
|
234
249
|
|
|
235
250
|
1. For questions about database tables and their contents:
|
|
236
251
|
- Use the sql_db_query to query the tables directly
|
|
@@ -7,6 +7,7 @@ import re
|
|
|
7
7
|
import threading
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
|
+
import logging
|
|
10
11
|
|
|
11
12
|
from langchain.agents import AgentExecutor
|
|
12
13
|
from langchain.agents.initialize import initialize_agent
|
|
@@ -16,6 +17,7 @@ from langchain_writer import ChatWriter
|
|
|
16
17
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
17
18
|
from langchain_core.agents import AgentAction, AgentStep
|
|
18
19
|
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
20
|
+
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
|
19
21
|
|
|
20
22
|
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
|
21
23
|
from langchain_core.messages.base import BaseMessage
|
|
@@ -63,7 +65,6 @@ from mindsdb.interfaces.agents.constants import (
|
|
|
63
65
|
)
|
|
64
66
|
from mindsdb.interfaces.skills.skill_tool import skill_tool, SkillData
|
|
65
67
|
from langchain_anthropic import ChatAnthropic
|
|
66
|
-
from langchain_core.messages import SystemMessage
|
|
67
68
|
from langchain_openai import ChatOpenAI
|
|
68
69
|
|
|
69
70
|
from mindsdb.utilities.langfuse import LangfuseClientWrapper
|
|
@@ -297,6 +298,11 @@ class LangchainAgent:
|
|
|
297
298
|
if "prompt_template" in args:
|
|
298
299
|
logger.info(f"Using prompt template: {args['prompt_template'][:50]}...")
|
|
299
300
|
|
|
301
|
+
if "model_name" not in args:
|
|
302
|
+
raise ValueError(
|
|
303
|
+
"No model name provided for agent. Provide it in the model parameter or in the default model setup."
|
|
304
|
+
)
|
|
305
|
+
|
|
300
306
|
return args
|
|
301
307
|
|
|
302
308
|
def get_metadata(self) -> Dict:
|
|
@@ -345,15 +351,20 @@ class LangchainAgent:
|
|
|
345
351
|
args.update(params or {})
|
|
346
352
|
|
|
347
353
|
df = pd.DataFrame(messages)
|
|
354
|
+
logger.info(f"LangchainAgent.get_completion: Received {len(messages)} messages")
|
|
355
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
356
|
+
logger.debug(f"Messages DataFrame shape: {df.shape}")
|
|
357
|
+
logger.debug(f"Messages DataFrame columns: {df.columns.tolist()}")
|
|
358
|
+
logger.debug(f"Messages DataFrame content: {df.to_dict('records')}")
|
|
348
359
|
|
|
349
360
|
# Back compatibility for old models
|
|
350
361
|
self.provider = args.get("provider", get_llm_provider(args))
|
|
351
362
|
|
|
352
363
|
df = df.reset_index(drop=True)
|
|
353
364
|
agent = self.create_agent(df)
|
|
354
|
-
#
|
|
355
|
-
|
|
356
|
-
|
|
365
|
+
# Keep conversation history for context - don't nullify previous messages
|
|
366
|
+
|
|
367
|
+
# Only use the last message as the current prompt, but preserve history for agent memory
|
|
357
368
|
response = self.run_agent(df, agent, args)
|
|
358
369
|
|
|
359
370
|
# End the run completion span and update the metadata with tool usage
|
|
@@ -374,6 +385,12 @@ class LangchainAgent:
|
|
|
374
385
|
args = self.args
|
|
375
386
|
|
|
376
387
|
df = pd.DataFrame(messages)
|
|
388
|
+
logger.info(f"LangchainAgent._get_completion_stream: Received {len(messages)} messages")
|
|
389
|
+
# Check if we have the expected columns for conversation history
|
|
390
|
+
if "question" in df.columns and "answer" in df.columns:
|
|
391
|
+
logger.debug("DataFrame has question/answer columns for conversation history")
|
|
392
|
+
else:
|
|
393
|
+
logger.warning("DataFrame missing question/answer columns! Available columns: {df.columns.tolist()}")
|
|
377
394
|
|
|
378
395
|
self.embedding_model_provider = args.get("embedding_model_provider", get_embedding_model_provider(args))
|
|
379
396
|
# Back compatibility for old models
|
|
@@ -381,9 +398,8 @@ class LangchainAgent:
|
|
|
381
398
|
|
|
382
399
|
df = df.reset_index(drop=True)
|
|
383
400
|
agent = self.create_agent(df)
|
|
384
|
-
#
|
|
385
|
-
|
|
386
|
-
df.iloc[:-1, df.columns.get_loc(user_column)] = None
|
|
401
|
+
# Keep conversation history for context - don't nullify previous messages
|
|
402
|
+
# Only use the last message as the current prompt, but preserve history for agent memory
|
|
387
403
|
return self.stream_agent(df, agent, args)
|
|
388
404
|
|
|
389
405
|
def create_agent(self, df: pd.DataFrame) -> AgentExecutor:
|
|
@@ -403,7 +419,8 @@ class LangchainAgent:
|
|
|
403
419
|
# Prefer prediction prompt template over original if provided.
|
|
404
420
|
prompt_template = args["prompt_template"]
|
|
405
421
|
|
|
406
|
-
#
|
|
422
|
+
# Modern LangChain approach: Use memory but populate it correctly
|
|
423
|
+
# Create memory and populate with conversation history
|
|
407
424
|
memory = ConversationSummaryBufferMemory(
|
|
408
425
|
llm=llm,
|
|
409
426
|
input_key="input",
|
|
@@ -412,17 +429,41 @@ class LangchainAgent:
|
|
|
412
429
|
memory_key="chat_history",
|
|
413
430
|
)
|
|
414
431
|
|
|
432
|
+
# Add system message first
|
|
415
433
|
memory.chat_memory.messages.insert(0, SystemMessage(content=prompt_template))
|
|
416
|
-
|
|
434
|
+
|
|
417
435
|
user_column = args.get("user_column", USER_COLUMN)
|
|
418
436
|
assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
437
|
+
|
|
438
|
+
logger.info(f"Processing conversation history: {len(df)} total messages, {len(df[:-1])} history messages")
|
|
439
|
+
logger.debug(f"User column: {user_column}, Assistant column: {assistant_column}")
|
|
440
|
+
|
|
441
|
+
# Process history messages (all except the last one which is current message)
|
|
442
|
+
history_df = df[:-1]
|
|
443
|
+
if len(history_df) == 0:
|
|
444
|
+
logger.debug("No history rows to process - this is normal for first message")
|
|
445
|
+
|
|
446
|
+
history_count = 0
|
|
447
|
+
for i, row in enumerate(history_df.to_dict("records")):
|
|
448
|
+
question = row.get(user_column)
|
|
449
|
+
answer = row.get(assistant_column)
|
|
450
|
+
logger.debug(f"Converting history row {i}: question='{question}', answer='{answer}'")
|
|
451
|
+
|
|
452
|
+
# Add messages directly to memory's chat_memory.messages list (modern approach)
|
|
422
453
|
if isinstance(question, str) and len(question) > 0:
|
|
423
|
-
memory.chat_memory.
|
|
454
|
+
memory.chat_memory.messages.append(HumanMessage(content=question))
|
|
455
|
+
history_count += 1
|
|
456
|
+
logger.debug(f"Added HumanMessage to memory: {question}")
|
|
424
457
|
if isinstance(answer, str) and len(answer) > 0:
|
|
425
|
-
memory.chat_memory.
|
|
458
|
+
memory.chat_memory.messages.append(AIMessage(content=answer))
|
|
459
|
+
history_count += 1
|
|
460
|
+
logger.debug(f"Added AIMessage to memory: {answer}")
|
|
461
|
+
|
|
462
|
+
logger.info(f"Built conversation history with {history_count} history messages + system message")
|
|
463
|
+
logger.debug(f"Final memory messages count: {len(memory.chat_memory.messages)}")
|
|
464
|
+
|
|
465
|
+
# Store memory for agent use
|
|
466
|
+
self._conversation_memory = memory
|
|
426
467
|
|
|
427
468
|
agent_type = args.get("agent_type", DEFAULT_AGENT_TYPE)
|
|
428
469
|
agent_executor = initialize_agent(
|
|
@@ -562,7 +603,22 @@ AI: {response}"""
|
|
|
562
603
|
return {CONTEXT_COLUMN: [], ASSISTANT_COLUMN: ""}
|
|
563
604
|
try:
|
|
564
605
|
callbacks, context_callback = prepare_callbacks(self, args)
|
|
565
|
-
|
|
606
|
+
|
|
607
|
+
# Modern LangChain approach: Include conversation history + current message
|
|
608
|
+
if hasattr(self, "_conversation_messages") and self._conversation_messages:
|
|
609
|
+
# Add current user message to conversation history
|
|
610
|
+
full_messages = self._conversation_messages + [HumanMessage(content=prompt)]
|
|
611
|
+
logger.critical(f"🔍 INVOKING AGENT with {len(full_messages)} messages (including history)")
|
|
612
|
+
logger.debug(
|
|
613
|
+
f"Full conversation messages: {[type(msg).__name__ + ': ' + msg.content[:100] + '...' for msg in full_messages]}"
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
# For agents, we need to pass the input in the expected format
|
|
617
|
+
# The agent expects 'input' key with the current question, but conversation history should be in memory
|
|
618
|
+
result = agent_executor.invoke({"input": prompt}, config={"callbacks": callbacks})
|
|
619
|
+
else:
|
|
620
|
+
logger.warning("No conversation messages found - using simple prompt")
|
|
621
|
+
result = agent_executor.invoke({"input": prompt}, config={"callbacks": callbacks})
|
|
566
622
|
captured_context = context_callback.get_contexts()
|
|
567
623
|
output = result["output"] if isinstance(result, dict) and "output" in result else str(result)
|
|
568
624
|
return {CONTEXT_COLUMN: captured_context, ASSISTANT_COLUMN: output}
|
|
@@ -585,7 +641,14 @@ AI: {response}"""
|
|
|
585
641
|
agent_timeout_seconds = args.get("timeout", DEFAULT_AGENT_TIMEOUT_SECONDS)
|
|
586
642
|
|
|
587
643
|
with ContextThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
588
|
-
|
|
644
|
+
# Only process the last prompt (current question), not all prompts
|
|
645
|
+
# The previous prompts are conversation history and should only be used for context
|
|
646
|
+
if prompts:
|
|
647
|
+
current_prompt = prompts[-1] # Last prompt is the current question
|
|
648
|
+
futures = [executor.submit(_invoke_agent_executor_with_prompt, agent, current_prompt)]
|
|
649
|
+
else:
|
|
650
|
+
logger.error("No prompts found to process")
|
|
651
|
+
futures = []
|
|
589
652
|
try:
|
|
590
653
|
for future in as_completed(futures, timeout=agent_timeout_seconds):
|
|
591
654
|
result = future.result()
|
|
@@ -686,12 +749,14 @@ AI: {response}"""
|
|
|
686
749
|
|
|
687
750
|
callbacks, context_callback = prepare_callbacks(self, args)
|
|
688
751
|
|
|
689
|
-
|
|
752
|
+
# Use last prompt (current question) instead of first prompt (history)
|
|
753
|
+
current_prompt = prompts[-1] if prompts else ""
|
|
754
|
+
yield self.add_chunk_metadata({"type": "start", "prompt": current_prompt})
|
|
690
755
|
|
|
691
756
|
if not hasattr(agent_executor, "stream") or not callable(agent_executor.stream):
|
|
692
757
|
raise AttributeError("The agent_executor does not have a 'stream' method")
|
|
693
758
|
|
|
694
|
-
stream_iterator = self._stream_agent_executor(agent_executor,
|
|
759
|
+
stream_iterator = self._stream_agent_executor(agent_executor, current_prompt, callbacks)
|
|
695
760
|
for chunk in stream_iterator:
|
|
696
761
|
yield chunk
|
|
697
762
|
|
|
@@ -29,7 +29,7 @@ from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embeddi
|
|
|
29
29
|
construct_model_from_args,
|
|
30
30
|
)
|
|
31
31
|
|
|
32
|
-
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
|
|
32
|
+
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS, MAX_INSERT_BATCH_SIZE
|
|
33
33
|
from mindsdb.interfaces.agents.langchain_agent import create_chat_model, get_llm_provider
|
|
34
34
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
35
35
|
from mindsdb.interfaces.variables.variables_controller import variables_controller
|
|
@@ -493,6 +493,8 @@ class KnowledgeBaseTable:
|
|
|
493
493
|
"""Process and insert raw data rows"""
|
|
494
494
|
if not rows:
|
|
495
495
|
return
|
|
496
|
+
if len(rows) > MAX_INSERT_BATCH_SIZE:
|
|
497
|
+
raise ValueError("Input data is too large, please load data in batches")
|
|
496
498
|
|
|
497
499
|
df = pd.DataFrame(rows)
|
|
498
500
|
|
|
@@ -15,6 +15,7 @@ from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import (
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
18
|
+
include_tables_tools: bool = True
|
|
18
19
|
include_knowledge_base_tools: bool = True
|
|
19
20
|
|
|
20
21
|
def get_tools(self, prefix="") -> List[BaseTool]:
|
|
@@ -212,8 +213,13 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
212
213
|
)
|
|
213
214
|
|
|
214
215
|
# Return standard SQL tools and knowledge base tools
|
|
215
|
-
|
|
216
|
+
kb_tools = [
|
|
216
217
|
kb_list_tool,
|
|
217
218
|
kb_info_tool,
|
|
218
219
|
kb_query_tool,
|
|
219
220
|
]
|
|
221
|
+
|
|
222
|
+
if not self.include_tables_tools:
|
|
223
|
+
return kb_tools
|
|
224
|
+
else:
|
|
225
|
+
return sql_tools + kb_tools
|
|
@@ -347,7 +347,13 @@ class SkillToolController:
|
|
|
347
347
|
)
|
|
348
348
|
db = MindsDBSQL.custom_init(sql_agent=sql_agent)
|
|
349
349
|
should_include_kb_tools = include_knowledge_bases is not None and len(include_knowledge_bases) > 0
|
|
350
|
-
|
|
350
|
+
should_include_tables_tools = len(databases_struct) > 0 or len(tables_list) > 0
|
|
351
|
+
toolkit = MindsDBSQLToolkit(
|
|
352
|
+
db=db,
|
|
353
|
+
llm=llm,
|
|
354
|
+
include_tables_tools=should_include_tables_tools,
|
|
355
|
+
include_knowledge_base_tools=should_include_kb_tools,
|
|
356
|
+
)
|
|
351
357
|
return toolkit.get_tools()
|
|
352
358
|
|
|
353
359
|
def _make_retrieval_tools(self, skill: db.Skills, llm, embedding_model):
|
|
@@ -405,6 +405,7 @@ class SQLAgent:
|
|
|
405
405
|
tables_idx[tuple(table.parts)] = table
|
|
406
406
|
|
|
407
407
|
tables = []
|
|
408
|
+
not_found = []
|
|
408
409
|
for table_name in table_names:
|
|
409
410
|
if not table_name.strip():
|
|
410
411
|
continue
|
|
@@ -419,9 +420,12 @@ class SQLAgent:
|
|
|
419
420
|
table_identifier = tables_idx.get(tuple(table_parts))
|
|
420
421
|
|
|
421
422
|
if table_identifier is None:
|
|
422
|
-
|
|
423
|
-
|
|
423
|
+
not_found.append(table_name)
|
|
424
|
+
else:
|
|
425
|
+
tables.append(table_identifier)
|
|
424
426
|
|
|
427
|
+
if not_found:
|
|
428
|
+
raise ValueError(f"Tables: {', '.join(not_found)} not found in the database")
|
|
425
429
|
return tables
|
|
426
430
|
|
|
427
431
|
def get_knowledge_base_info(self, kb_names: Optional[List[str]] = None) -> str:
|
mindsdb/utilities/config.py
CHANGED
|
@@ -3,13 +3,14 @@ import sys
|
|
|
3
3
|
import json
|
|
4
4
|
import argparse
|
|
5
5
|
import datetime
|
|
6
|
+
import logging
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from copy import deepcopy
|
|
8
|
-
import multiprocessing as mp
|
|
9
9
|
|
|
10
10
|
from appdirs import user_data_dir
|
|
11
11
|
|
|
12
12
|
# NOTE do not `import from mindsdb` here
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
def _merge_key_recursive(target_dict, source_dict, key):
|
|
@@ -171,14 +172,6 @@ class Config:
|
|
|
171
172
|
"restart_on_failure": True,
|
|
172
173
|
"max_restart_count": 1,
|
|
173
174
|
"max_restart_interval_seconds": 60,
|
|
174
|
-
"server": {
|
|
175
|
-
"type": "waitress", # MINDSDB_HTTP_SERVER_TYPE MINDSDB_DEFAULT_SERVER
|
|
176
|
-
"config": {
|
|
177
|
-
"threads": 16,
|
|
178
|
-
"max_request_body_size": (1 << 30) * 10, # 10GB
|
|
179
|
-
"inbuf_overflow": (1 << 30) * 10,
|
|
180
|
-
},
|
|
181
|
-
},
|
|
182
175
|
},
|
|
183
176
|
"mysql": {
|
|
184
177
|
"host": api_host,
|
|
@@ -189,29 +182,11 @@ class Config:
|
|
|
189
182
|
"max_restart_count": 1,
|
|
190
183
|
"max_restart_interval_seconds": 60,
|
|
191
184
|
},
|
|
192
|
-
"mongodb": {"host": api_host, "port": "47336", "database": "mindsdb"},
|
|
193
185
|
"postgres": {"host": api_host, "port": "55432", "database": "mindsdb"},
|
|
194
|
-
"mcp": {
|
|
195
|
-
"host": api_host,
|
|
196
|
-
"port": "47337",
|
|
197
|
-
"enabled": True,
|
|
198
|
-
"restart_on_failure": True,
|
|
199
|
-
"max_restart_count": 1,
|
|
200
|
-
"max_restart_interval_seconds": 60,
|
|
201
|
-
},
|
|
202
186
|
"litellm": {
|
|
203
187
|
"host": "0.0.0.0", # API server binds to all interfaces by default
|
|
204
188
|
"port": "8000",
|
|
205
189
|
},
|
|
206
|
-
"a2a": {
|
|
207
|
-
"host": api_host,
|
|
208
|
-
"port": 47338,
|
|
209
|
-
"mindsdb_host": "localhost",
|
|
210
|
-
"mindsdb_port": 47334,
|
|
211
|
-
"agent_name": "my_agent",
|
|
212
|
-
"project_name": "mindsdb",
|
|
213
|
-
"enabled": False,
|
|
214
|
-
},
|
|
215
190
|
},
|
|
216
191
|
"cache": {"type": "local"},
|
|
217
192
|
"ml_task_queue": {"type": "local"},
|
|
@@ -250,7 +225,7 @@ class Config:
|
|
|
250
225
|
"""Collect config values from env vars to self._env_config"""
|
|
251
226
|
self._env_config = {
|
|
252
227
|
"logging": {"handlers": {"console": {}, "file": {}}},
|
|
253
|
-
"api": {"http": {
|
|
228
|
+
"api": {"http": {}},
|
|
254
229
|
"auth": {},
|
|
255
230
|
"paths": {},
|
|
256
231
|
"permanent_storage": {},
|
|
@@ -298,21 +273,6 @@ class Config:
|
|
|
298
273
|
self._env_config["auth"]["password"] = http_password
|
|
299
274
|
# endregion
|
|
300
275
|
|
|
301
|
-
# region permanent session lifetime
|
|
302
|
-
for env_name in (
|
|
303
|
-
"MINDSDB_HTTP_PERMANENT_SESSION_LIFETIME",
|
|
304
|
-
"FLASK_PERMANENT_SESSION_LIFETIME",
|
|
305
|
-
):
|
|
306
|
-
env_value = os.environ.get(env_name)
|
|
307
|
-
if isinstance(env_value, str):
|
|
308
|
-
try:
|
|
309
|
-
permanent_session_lifetime = int(env_value)
|
|
310
|
-
except Exception:
|
|
311
|
-
raise ValueError(f"Warning: Can't cast env var {env_name} value to int: {env_value}")
|
|
312
|
-
self._env_config["auth"]["http_permanent_session_lifetime"] = permanent_session_lifetime
|
|
313
|
-
break
|
|
314
|
-
# endregion
|
|
315
|
-
|
|
316
276
|
# region logging
|
|
317
277
|
if os.environ.get("MINDSDB_LOG_LEVEL", "") != "":
|
|
318
278
|
self._env_config["logging"]["handlers"]["console"]["level"] = os.environ["MINDSDB_LOG_LEVEL"]
|
|
@@ -325,35 +285,6 @@ class Config:
|
|
|
325
285
|
self._env_config["logging"]["handlers"]["file"]["enabled"] = True
|
|
326
286
|
# endregion
|
|
327
287
|
|
|
328
|
-
# region server type
|
|
329
|
-
server_type = os.environ.get("MINDSDB_HTTP_SERVER_TYPE", "").lower()
|
|
330
|
-
if server_type == "":
|
|
331
|
-
server_type = os.environ.get("MINDSDB_DEFAULT_SERVER", "").lower()
|
|
332
|
-
if server_type != "":
|
|
333
|
-
if server_type == "waitress":
|
|
334
|
-
self._env_config["api"]["http"]["server"]["type"] = "waitress"
|
|
335
|
-
self._default_config["api"]["http"]["server"]["config"] = {}
|
|
336
|
-
self._env_config["api"]["http"]["server"]["config"] = {
|
|
337
|
-
"threads": 16,
|
|
338
|
-
"max_request_body_size": (1 << 30) * 10, # 10GB
|
|
339
|
-
"inbuf_overflow": (1 << 30) * 10,
|
|
340
|
-
}
|
|
341
|
-
elif server_type == "flask":
|
|
342
|
-
self._env_config["api"]["http"]["server"]["type"] = "flask"
|
|
343
|
-
self._default_config["api"]["http"]["server"]["config"] = {}
|
|
344
|
-
self._env_config["api"]["http"]["server"]["config"] = {}
|
|
345
|
-
elif server_type == "gunicorn":
|
|
346
|
-
self._env_config["api"]["http"]["server"]["type"] = "gunicorn"
|
|
347
|
-
self._default_config["api"]["http"]["server"]["config"] = {}
|
|
348
|
-
self._env_config["api"]["http"]["server"]["config"] = {
|
|
349
|
-
"workers": min(mp.cpu_count(), 4),
|
|
350
|
-
"timeout": 600,
|
|
351
|
-
"reuse_port": True,
|
|
352
|
-
"preload_app": True,
|
|
353
|
-
"threads": 4,
|
|
354
|
-
}
|
|
355
|
-
# endregion
|
|
356
|
-
|
|
357
288
|
if os.environ.get("MINDSDB_DB_CON", "") != "":
|
|
358
289
|
self._env_config["storage_db"] = os.environ["MINDSDB_DB_CON"]
|
|
359
290
|
|
|
@@ -373,32 +304,6 @@ class Config:
|
|
|
373
304
|
if os.environ.get("MINDSDB_DATA_CATALOG_ENABLED", "").lower() in ("1", "true"):
|
|
374
305
|
self._env_config["data_catalog"] = {"enabled": True}
|
|
375
306
|
|
|
376
|
-
# region vars: a2a configuration
|
|
377
|
-
a2a_config = {}
|
|
378
|
-
if os.environ.get("MINDSDB_A2A_HOST"):
|
|
379
|
-
a2a_config["host"] = os.environ.get("MINDSDB_A2A_HOST")
|
|
380
|
-
if os.environ.get("MINDSDB_A2A_PORT"):
|
|
381
|
-
a2a_config["port"] = int(os.environ.get("MINDSDB_A2A_PORT"))
|
|
382
|
-
if os.environ.get("MINDSDB_HOST"):
|
|
383
|
-
a2a_config["mindsdb_host"] = os.environ.get("MINDSDB_HOST")
|
|
384
|
-
if os.environ.get("MINDSDB_PORT"):
|
|
385
|
-
a2a_config["mindsdb_port"] = int(os.environ.get("MINDSDB_PORT"))
|
|
386
|
-
if os.environ.get("MINDSDB_AGENT_NAME"):
|
|
387
|
-
a2a_config["agent_name"] = os.environ.get("MINDSDB_AGENT_NAME")
|
|
388
|
-
if os.environ.get("MINDSDB_PROJECT_NAME"):
|
|
389
|
-
a2a_config["project_name"] = os.environ.get("MINDSDB_PROJECT_NAME")
|
|
390
|
-
if os.environ.get("MINDSDB_A2A_ENABLED") is not None:
|
|
391
|
-
a2a_config["enabled"] = os.environ.get("MINDSDB_A2A_ENABLED").lower() in (
|
|
392
|
-
"true",
|
|
393
|
-
"1",
|
|
394
|
-
"yes",
|
|
395
|
-
"y",
|
|
396
|
-
)
|
|
397
|
-
|
|
398
|
-
if a2a_config:
|
|
399
|
-
self._env_config["api"]["a2a"] = a2a_config
|
|
400
|
-
# endregion
|
|
401
|
-
|
|
402
307
|
def fetch_auto_config(self) -> bool:
|
|
403
308
|
"""Load dict readed from config.auto.json to `auto_config`.
|
|
404
309
|
Do it only if `auto_config` was not loaded before or config.auto.json been changed.
|
|
@@ -462,38 +367,6 @@ class Config:
|
|
|
462
367
|
_merge_configs(new_config, self._auto_config or {})
|
|
463
368
|
_merge_configs(new_config, self._env_config or {})
|
|
464
369
|
|
|
465
|
-
# Apply command-line arguments for A2A
|
|
466
|
-
a2a_config = {}
|
|
467
|
-
|
|
468
|
-
# Check for A2A command-line arguments
|
|
469
|
-
if hasattr(self.cmd_args, "a2a_host") and self.cmd_args.a2a_host is not None:
|
|
470
|
-
a2a_config["host"] = self.cmd_args.a2a_host
|
|
471
|
-
|
|
472
|
-
if hasattr(self.cmd_args, "a2a_port") and self.cmd_args.a2a_port is not None:
|
|
473
|
-
a2a_config["port"] = self.cmd_args.a2a_port
|
|
474
|
-
|
|
475
|
-
if hasattr(self.cmd_args, "mindsdb_host") and self.cmd_args.mindsdb_host is not None:
|
|
476
|
-
a2a_config["mindsdb_host"] = self.cmd_args.mindsdb_host
|
|
477
|
-
|
|
478
|
-
if hasattr(self.cmd_args, "mindsdb_port") and self.cmd_args.mindsdb_port is not None:
|
|
479
|
-
a2a_config["mindsdb_port"] = self.cmd_args.mindsdb_port
|
|
480
|
-
|
|
481
|
-
if hasattr(self.cmd_args, "agent_name") and self.cmd_args.agent_name is not None:
|
|
482
|
-
a2a_config["agent_name"] = self.cmd_args.agent_name
|
|
483
|
-
|
|
484
|
-
if hasattr(self.cmd_args, "project_name") and self.cmd_args.project_name is not None:
|
|
485
|
-
a2a_config["project_name"] = self.cmd_args.project_name
|
|
486
|
-
|
|
487
|
-
# Merge command-line args config with highest priority
|
|
488
|
-
if a2a_config:
|
|
489
|
-
_merge_configs(new_config, {"api": {"a2a": a2a_config}})
|
|
490
|
-
|
|
491
|
-
# Ensure A2A port is never 0, which would prevent the A2A API from starting
|
|
492
|
-
a2a_config = new_config["api"].get("a2a")
|
|
493
|
-
if a2a_config is not None and isinstance(a2a_config, dict):
|
|
494
|
-
if "port" in a2a_config and (a2a_config["port"] == 0 or a2a_config["port"] is None):
|
|
495
|
-
a2a_config["port"] = 47338 # Use the default port value
|
|
496
|
-
|
|
497
370
|
# region create dirs
|
|
498
371
|
for key, value in new_config["paths"].items():
|
|
499
372
|
if isinstance(value, str):
|
|
@@ -550,12 +423,6 @@ class Config:
|
|
|
550
423
|
if "log" in self._config:
|
|
551
424
|
logger.warning("The 'log' config option is no longer supported. Use 'logging' instead.")
|
|
552
425
|
|
|
553
|
-
if os.environ.get("MINDSDB_DEFAULT_SERVER", "") != "":
|
|
554
|
-
logger.warning(
|
|
555
|
-
"Env variable 'MINDSDB_DEFAULT_SERVER' is going to be deprecated soon. "
|
|
556
|
-
"Use 'MINDSDB_HTTP_SERVER_TYPE' instead."
|
|
557
|
-
)
|
|
558
|
-
|
|
559
426
|
file_upload_domains = self._config.get("file_upload_domains")
|
|
560
427
|
if isinstance(file_upload_domains, list) and len(file_upload_domains) > 0:
|
|
561
428
|
allowed_origins = self._config["url_file_upload"]["allowed_origins"]
|
|
@@ -566,14 +433,6 @@ class Config:
|
|
|
566
433
|
'use config["url_file_upload"]["allowed_origins"] instead.'
|
|
567
434
|
)
|
|
568
435
|
|
|
569
|
-
for env_name in ("MINDSDB_HTTP_SERVER_TYPE", "MINDSDB_DEFAULT_SERVER"):
|
|
570
|
-
env_value = os.environ.get(env_name, "")
|
|
571
|
-
if env_value.lower() not in ("waitress", "flask", "gunicorn", ""):
|
|
572
|
-
logger.warning(
|
|
573
|
-
f"The value '{env_value}' of the environment variable {env_name} is not valid. "
|
|
574
|
-
"It must be one of the following: 'waitress', 'flask', or 'gunicorn'."
|
|
575
|
-
)
|
|
576
|
-
|
|
577
436
|
@property
|
|
578
437
|
def cmd_args(self):
|
|
579
438
|
if self._cmd_args is None:
|
|
@@ -624,17 +483,6 @@ class Config:
|
|
|
624
483
|
help="Project containing the agent (default: mindsdb)",
|
|
625
484
|
)
|
|
626
485
|
|
|
627
|
-
# A2A specific arguments
|
|
628
|
-
parser.add_argument("--a2a-host", type=str, default=None, help="A2A server host")
|
|
629
|
-
parser.add_argument("--a2a-port", type=int, default=None, help="A2A server port")
|
|
630
|
-
parser.add_argument("--mindsdb-host", type=str, default=None, help="MindsDB server host")
|
|
631
|
-
parser.add_argument("--mindsdb-port", type=int, default=None, help="MindsDB server port")
|
|
632
|
-
parser.add_argument(
|
|
633
|
-
"--agent-name",
|
|
634
|
-
type=str,
|
|
635
|
-
default=None,
|
|
636
|
-
help="MindsDB agent name to connect to",
|
|
637
|
-
)
|
|
638
486
|
parser.add_argument("--project-name", type=str, default=None, help="MindsDB project name")
|
|
639
487
|
parser.add_argument("--update-gui", action="store_true", default=False, help="Update GUI and exit")
|
|
640
488
|
|
mindsdb/utilities/fs.py
CHANGED
|
@@ -133,6 +133,9 @@ def create_pid_file():
|
|
|
133
133
|
Create mindsdb process pid file. Check if previous process exists and is running
|
|
134
134
|
"""
|
|
135
135
|
|
|
136
|
+
if os.environ.get("USE_PIDFILE") != "1":
|
|
137
|
+
return
|
|
138
|
+
|
|
136
139
|
p = get_tmp_dir()
|
|
137
140
|
p.mkdir(parents=True, exist_ok=True)
|
|
138
141
|
pid_file = p.joinpath("pid")
|
|
@@ -141,11 +144,11 @@ def create_pid_file():
|
|
|
141
144
|
pid = pid_file.read_text().strip()
|
|
142
145
|
try:
|
|
143
146
|
psutil.Process(int(pid))
|
|
144
|
-
raise Exception(f"Found PID file with existing process: {pid}")
|
|
147
|
+
raise Exception(f"Found PID file with existing process: {pid} {pid_file}")
|
|
145
148
|
except (psutil.Error, ValueError):
|
|
146
149
|
...
|
|
147
150
|
|
|
148
|
-
logger.warning(f"Found existing PID file ({pid}), removing")
|
|
151
|
+
logger.warning(f"Found existing PID file {pid_file}({pid}), removing")
|
|
149
152
|
pid_file.unlink()
|
|
150
153
|
|
|
151
154
|
pid_file.write_text(str(os.getpid()))
|
|
@@ -155,15 +158,18 @@ def delete_pid_file():
|
|
|
155
158
|
"""
|
|
156
159
|
Remove existing process pid file if it matches current process
|
|
157
160
|
"""
|
|
161
|
+
|
|
162
|
+
if os.environ.get("USE_PIDFILE") != "1":
|
|
163
|
+
return
|
|
164
|
+
|
|
158
165
|
pid_file = get_tmp_dir().joinpath("pid")
|
|
159
166
|
|
|
160
167
|
if not pid_file.exists():
|
|
161
|
-
logger.warning("Mindsdb PID file does not exist")
|
|
162
168
|
return
|
|
163
169
|
|
|
164
170
|
pid = pid_file.read_text().strip()
|
|
165
171
|
if pid != str(os.getpid()):
|
|
166
|
-
logger.warning("Process id in PID file doesn't match mindsdb pid")
|
|
172
|
+
logger.warning(f"Process id in PID file ({pid_file}) doesn't match mindsdb pid")
|
|
167
173
|
return
|
|
168
174
|
|
|
169
175
|
pid_file.unlink()
|
mindsdb/utilities/log.py
CHANGED
|
@@ -99,31 +99,6 @@ def get_handlers_config(process_name: str) -> dict:
|
|
|
99
99
|
return handlers_config
|
|
100
100
|
|
|
101
101
|
|
|
102
|
-
def get_uvicorn_logging_config(process_name: str) -> dict:
|
|
103
|
-
"""Generate a logging configuration dictionary for Uvicorn using MindsDB's logging settings.
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
process_name (str): The name of the process to include in log file names and handlers.
|
|
107
|
-
|
|
108
|
-
Returns:
|
|
109
|
-
dict: A dictionary suitable for use with logging.config.dictConfig, configured for Uvicorn logging.
|
|
110
|
-
"""
|
|
111
|
-
handlers_config = get_handlers_config(process_name)
|
|
112
|
-
mindsdb_log_level = get_mindsdb_log_level()
|
|
113
|
-
return {
|
|
114
|
-
"version": 1,
|
|
115
|
-
"formatters": FORMATTERS,
|
|
116
|
-
"handlers": handlers_config,
|
|
117
|
-
"loggers": {
|
|
118
|
-
"uvicorn": {
|
|
119
|
-
"handlers": list(handlers_config.keys()),
|
|
120
|
-
"level": mindsdb_log_level,
|
|
121
|
-
"propagate": False,
|
|
122
|
-
}
|
|
123
|
-
},
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
|
|
127
102
|
def configure_logging(process_name: str = None):
|
|
128
103
|
handlers_config = get_handlers_config(process_name)
|
|
129
104
|
mindsdb_log_level = get_mindsdb_log_level()
|