PyPI - MindsDB - Versions diffs - 25.7.3.0__py3-none-any.whl → 25.7.4.0__py3-none-any.whl - Mend

MindsDB 25.7.3.0py3-none-any.whl → 25.7.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (61) hide show

mindsdb/__about__.py +1 -1
mindsdb/api/a2a/common/server/server.py +16 -6
mindsdb/api/executor/command_executor.py +206 -135
mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
mindsdb/api/executor/planner/plan_join.py +3 -0
mindsdb/api/executor/planner/plan_join_ts.py +117 -100
mindsdb/api/executor/planner/query_planner.py +1 -0
mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
mindsdb/api/http/initialize.py +16 -43
mindsdb/api/http/namespaces/agents.py +23 -20
mindsdb/api/http/namespaces/chatbots.py +83 -120
mindsdb/api/http/namespaces/file.py +1 -1
mindsdb/api/http/namespaces/jobs.py +38 -60
mindsdb/api/http/namespaces/tree.py +69 -61
mindsdb/api/mcp/start.py +2 -0
mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -76
mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +5 -2
mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
mindsdb/integrations/handlers/salesforce_handler/constants.py +208 -0
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +141 -80
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +0 -1
mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
mindsdb/integrations/utilities/handler_utils.py +32 -12
mindsdb/interfaces/agents/agents_controller.py +167 -108
mindsdb/interfaces/agents/langchain_agent.py +10 -3
mindsdb/interfaces/data_catalog/data_catalog_loader.py +4 -4
mindsdb/interfaces/database/database.py +38 -13
mindsdb/interfaces/database/integrations.py +20 -5
mindsdb/interfaces/database/projects.py +63 -16
mindsdb/interfaces/database/views.py +86 -60
mindsdb/interfaces/jobs/jobs_controller.py +103 -110
mindsdb/interfaces/knowledge_base/controller.py +26 -5
mindsdb/interfaces/knowledge_base/evaluate.py +2 -1
mindsdb/interfaces/knowledge_base/executor.py +24 -0
mindsdb/interfaces/query_context/context_controller.py +100 -133
mindsdb/interfaces/skills/skills_controller.py +18 -6
mindsdb/interfaces/storage/db.py +40 -6
mindsdb/interfaces/variables/variables_controller.py +8 -15
mindsdb/utilities/config.py +3 -3
mindsdb/utilities/functions.py +72 -60
mindsdb/utilities/log.py +38 -6
mindsdb/utilities/ps.py +7 -7
{mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/METADATA +246 -247
{mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/RECORD +61 -60
{mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/WHEEL +0 -0
{mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/top_level.txt +0 -0

mindsdb/integrations/handlers/langchain_handler/langchain_handler.py CHANGED Viewed

@@ -13,7 +13,10 @@ import pandas as pd
 from mindsdb.interfaces.agents.safe_output_parser import SafeOutputParser
 from mindsdb.interfaces.agents.langchain_agent import (
-    get_llm_provider, get_embedding_model_provider, create_chat_model, get_chat_model_params
+    get_llm_provider,
+    get_embedding_model_provider,
+    create_chat_model,
+    get_chat_model_params,
 )
 from mindsdb.interfaces.agents.constants import (
@@ -24,19 +27,21 @@ from mindsdb.interfaces.agents.constants import (
     DEFAULT_MAX_TOKENS,
     DEFAULT_MODEL_NAME,
     USER_COLUMN,
-    ASSISTANT_COLUMN
+    ASSISTANT_COLUMN,
 )
 from mindsdb.integrations.utilities.rag.settings import DEFAULT_RAG_PROMPT_TEMPLATE
 from mindsdb.integrations.handlers.langchain_handler.tools import setup_tools
 from mindsdb.integrations.libs.base import BaseMLEngine
 from mindsdb.interfaces.storage.model_fs import HandlerStorage, ModelStorage
-from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
-from mindsdb.integrations.handlers.openai_handler.constants import CHAT_MODELS  # noqa, for dependency checker
+from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
+    construct_model_from_args,
+)
+from mindsdb.integrations.handlers.openai_handler.constants import CHAT_MODELS  # noqa: F401 - for dependency checker
 from mindsdb.utilities import log
 from mindsdb.utilities.context_executor import ContextThreadPoolExecutor
-_PARSING_ERROR_PREFIXES = ['An output parsing error occured', 'Could not parse LLM output']
+_PARSING_ERROR_PREFIXES = ["An output parsing error occured", "Could not parse LLM output"]
 logger = log.getLogger(__name__)
@@ -58,13 +63,10 @@ class LangChainHandler(BaseMLEngine):
         - python_repl
         - serper.dev search
     """
-    name = 'langchain'
-    def __init__(
-            self,
-            model_storage: ModelStorage,
-            engine_storage: HandlerStorage,
-            **kwargs):
+    name = "langchain"
+    def __init__(self, model_storage: ModelStorage, engine_storage: HandlerStorage, **kwargs):
         super().__init__(model_storage, engine_storage, **kwargs)
         # if True, the target column name does not have to be specified at creation time.
         self.generative = True
@@ -81,77 +83,78 @@ class LangChainHandler(BaseMLEngine):
                 #
                 # Ideally, in the future, we would write a parser that is more robust and flexible than the one Langchain uses.
                 # Response is wrapped in ``
-                logger.info('Handling parsing error, salvaging response...')
-                response_output = response.split('`')
+                logger.info("Handling parsing error, salvaging response...")
+                response_output = response.split("`")
                 if len(response_output) >= 2:
                     response = response_output[-2]
                 # Wrap response in Langchain conversational react format.
-                langchain_react_formatted_response = f'''Thought: Do I need to use a tool? No
-AI: {response}'''
+                langchain_react_formatted_response = f"""Thought: Do I need to use a tool? No
+AI: {response}"""
                 return langchain_react_formatted_response
-        return f'Agent failed with error:\n{str(error)}...'
+        return f"Agent failed with error:\n{str(error)}..."
     def create(self, target: str, args: Dict = None, **kwargs):
-        self.default_agent_tools = args.get('tools', self.default_agent_tools)
-        args = args['using']
-        args['target'] = target
-        args['model_name'] = args.get('model_name', DEFAULT_MODEL_NAME)
-        args['provider'] = args.get('provider', get_llm_provider(args))
-        args['embedding_model_provider'] = args.get('embedding_model', get_embedding_model_provider(args))
-        if args.get('mode') == 'retrieval':
+        self.default_agent_tools = args.get("tools", self.default_agent_tools)
+        args = args["using"]
+        args["target"] = target
+        args["model_name"] = args.get("model_name", DEFAULT_MODEL_NAME)
+        args["provider"] = args.get("provider", get_llm_provider(args))
+        args["embedding_model_provider"] = args.get("embedding_model", get_embedding_model_provider(args))
+        if args.get("mode") == "retrieval":
             # use default prompt template for retrieval i.e. RAG if not provided
             if "prompt_template" not in args:
                 args["prompt_template"] = DEFAULT_RAG_PROMPT_TEMPLATE
-        self.model_storage.json_set('args', args)
+        self.model_storage.json_set("args", args)
     @staticmethod
     def create_validation(_, args: Dict = None, **kwargs):
-        if 'using' not in args:
+        if "using" not in args:
             raise Exception("LangChain engine requires a USING clause! Refer to its documentation for more details.")
         else:
-            args = args['using']
-        if 'prompt_template' not in args:
-            if not args.get('mode') == 'retrieval':
-                raise ValueError('Please provide a `prompt_template` for this engine.')
+            args = args["using"]
+        if "prompt_template" not in args:
+            if not args.get("mode") == "retrieval":
+                raise ValueError("Please provide a `prompt_template` for this engine.")
     def predict(self, df: pd.DataFrame, args: Dict = None) -> pd.DataFrame:
         """
         Dispatch is performed depending on the underlying model type. Currently, only the default text completion
         is supported.
         """
-        pred_args = args['predict_params'] if args else {}
-        args = self.model_storage.json_get('args')
-        if 'prompt_template' not in args and 'prompt_template' not in pred_args:
+        pred_args = args["predict_params"] if args else {}
+        args = self.model_storage.json_get("args")
+        if "prompt_template" not in args and "prompt_template" not in pred_args:
             raise ValueError("This model expects a `prompt_template`, please provide one.")
         # Back compatibility for old models
-        args['provider'] = args.get('provider', get_llm_provider(args))
-        args['embedding_model_provider'] = args.get('embedding_model', get_embedding_model_provider(args))
+        args["provider"] = args.get("provider", get_llm_provider(args))
+        args["embedding_model_provider"] = args.get("embedding_model", get_embedding_model_provider(args))
         df = df.reset_index(drop=True)
-        if pred_args.get('mode') == 'chat_model':
+        if pred_args.get("mode") == "chat_model":
             return self.call_llm(df, args, pred_args)
         agent = self.create_agent(df, args, pred_args)
         # Use last message as prompt, remove other questions.
-        user_column = args.get('user_column', USER_COLUMN)
+        user_column = args.get("user_column", USER_COLUMN)
         if user_column not in df.columns:
             raise Exception(
-                f"Expected user input in column `{user_column}`, which is not found in the input data. Either provide the column, or redefine the expected column at model creation (`USING user_column = 'value'`)")  # noqa
+                f"Expected user input in column `{user_column}`, which is not found in the input data. Either provide the column, or redefine the expected column at model creation (`USING user_column = 'value'`)"
+            )  # noqa
         df.iloc[:-1, df.columns.get_loc(user_column)] = None
         return self.run_agent(df, agent, args, pred_args)
     def call_llm(self, df, args=None, pred_args=None):
         llm = create_chat_model({**args, **pred_args})
-        user_column = args.get('user_column', USER_COLUMN)
-        assistant_column = args.get('assistant_column', ASSISTANT_COLUMN)
+        user_column = args.get("user_column", USER_COLUMN)
+        assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
         question = df[user_column].iloc[-1]
-        resp = llm([HumanMessage(question)], stop=['\nObservation:', '\n\tObservation:'])
+        resp = llm([HumanMessage(question)], stop=["\nObservation:", "\n\tObservation:"])
         return pd.DataFrame([resp.content], columns=[assistant_column])
@@ -162,25 +165,22 @@ AI: {response}'''
         model_kwargs = get_chat_model_params({**args, **pred_args})
         llm = create_chat_model({**args, **pred_args})
-        tools = setup_tools(llm,
-                            model_kwargs,
-                            pred_args,
-                            self.default_agent_tools)
+        tools = setup_tools(llm, model_kwargs, pred_args, self.default_agent_tools)
         # Prefer prediction prompt template over original if provided.
-        prompt_template = pred_args.get('prompt_template', args['prompt_template'])
-        if 'context' in pred_args:
-            prompt_template += '\n\n' + 'Useful information:\n' + pred_args['context'] + '\n'
+        prompt_template = pred_args.get("prompt_template", args["prompt_template"])
+        if "context" in pred_args:
+            prompt_template += "\n\n" + "Useful information:\n" + pred_args["context"] + "\n"
         # Set up memory.
-        memory = ConversationSummaryBufferMemory(llm=llm,
-                                                 max_token_limit=model_kwargs.get('max_tokens', DEFAULT_MAX_TOKENS),
-                                                 memory_key='chat_history')
+        memory = ConversationSummaryBufferMemory(
+            llm=llm, max_token_limit=model_kwargs.get("max_tokens", DEFAULT_MAX_TOKENS), memory_key="chat_history"
+        )
         memory.chat_memory.messages.insert(0, SystemMessage(content=prompt_template))
         # User - Assistant conversation. All except the last message.
-        user_column = args.get('user_column', USER_COLUMN)
-        assistant_column = args.get('assistant_column', ASSISTANT_COLUMN)
-        for row in df[:-1].to_dict('records'):
+        user_column = args.get("user_column", USER_COLUMN)
+        assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
+        for row in df[:-1].to_dict("records"):
             question = row[user_column]
             answer = row[assistant_column]
             if question:
@@ -188,45 +188,47 @@ AI: {response}'''
             if answer:
                 memory.chat_memory.add_ai_message(answer)
-        agent_type = args.get('agent_type', DEFAULT_AGENT_TYPE)
+        agent_type = args.get("agent_type", DEFAULT_AGENT_TYPE)
         agent_executor = initialize_agent(
             tools,
             llm,
             agent=agent_type,
             # Use custom output parser to handle flaky LLMs that don't ALWAYS conform to output format.
-            agent_kwargs={'output_parser': SafeOutputParser()},
+            agent_kwargs={"output_parser": SafeOutputParser()},
             # Calls the agent’s LLM Chain one final time to generate a final answer based on the previous steps
-            early_stopping_method='generate',
+            early_stopping_method="generate",
             handle_parsing_errors=self._handle_parsing_errors,
             # Timeout per agent invocation.
-            max_execution_time=pred_args.get('timeout_seconds', args.get('timeout_seconds', DEFAULT_AGENT_TIMEOUT_SECONDS)),
-            max_iterations=pred_args.get('max_iterations', args.get('max_iterations', DEFAULT_MAX_ITERATIONS)),
+            max_execution_time=pred_args.get(
+                "timeout_seconds", args.get("timeout_seconds", DEFAULT_AGENT_TIMEOUT_SECONDS)
+            ),
+            max_iterations=pred_args.get("max_iterations", args.get("max_iterations", DEFAULT_MAX_ITERATIONS)),
             memory=memory,
-            verbose=pred_args.get('verbose', args.get('verbose', True))
+            verbose=pred_args.get("verbose", args.get("verbose", True)),
         )
         return agent_executor
     def run_agent(self, df: pd.DataFrame, agent: AgentExecutor, args: Dict, pred_args: Dict) -> pd.DataFrame:
         # Prefer prediction time prompt template, if available.
-        base_template = pred_args.get('prompt_template', args['prompt_template'])
+        base_template = pred_args.get("prompt_template", args["prompt_template"])
         input_variables = []
         matches = list(re.finditer("{{(.*?)}}", base_template))
         for m in matches:
-            input_variables.append(m[0].replace('{', '').replace('}', ''))
+            input_variables.append(m[0].replace("{", "").replace("}", ""))
         empty_prompt_ids = np.where(df[input_variables].isna().all(axis=1).values)[0]
-        base_template = base_template.replace('{{', '{').replace('}}', '}')
+        base_template = base_template.replace("{{", "{").replace("}}", "}")
         prompts = []
-        user_column = args.get('user_column', USER_COLUMN)
+        user_column = args.get("user_column", USER_COLUMN)
         for i, row in df.iterrows():
             if i not in empty_prompt_ids:
                 prompt = PromptTemplate(input_variables=input_variables, template=base_template)
                 kwargs = {}
                 for col in input_variables:
-                    kwargs[col] = row[col] if row[col] is not None else ''  # add empty quote if data is missing
+                    kwargs[col] = row[col] if row[col] is not None else ""  # add empty quote if data is missing
                 prompts.append(prompt.format(**kwargs))
             elif row.get(user_column):
                 # Just add prompt
@@ -234,32 +236,37 @@ AI: {response}'''
         def _invoke_agent_executor_with_prompt(agent_executor, prompt):
             if not prompt:
-                return ''
+                return ""
             try:
                 answer = agent_executor.invoke(prompt)
             except Exception as e:
                 answer = str(e)
                 if not answer.startswith("Could not parse LLM output: `"):
                     raise e
-                answer = {'output': answer.removeprefix("Could not parse LLM output: `").removesuffix("`")}
+                answer = {"output": answer.removeprefix("Could not parse LLM output: `").removesuffix("`")}
-            if 'output' not in answer:
+            if "output" not in answer:
                 # This should never happen unless Langchain changes invoke output format, but just in case.
                 return agent_executor.run(prompt)
-            return answer['output']
+            return answer["output"]
         completions = []
         # max_workers defaults to number of processors on the machine multiplied by 5.
         # https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
-        max_workers = args.get('max_workers', None)
-        agent_timeout_seconds = args.get('timeout', DEFAULT_AGENT_TIMEOUT_SECONDS)
+        max_workers = args.get("max_workers", None)
+        agent_timeout_seconds = args.get("timeout", DEFAULT_AGENT_TIMEOUT_SECONDS)
         executor = ContextThreadPoolExecutor(max_workers=max_workers)
         futures = [executor.submit(_invoke_agent_executor_with_prompt, agent, prompt) for prompt in prompts]
         try:
             for future in as_completed(futures, timeout=agent_timeout_seconds):
                 completions.append(future.result())
         except TimeoutError:
-            completions.append("I'm sorry! I couldn't come up with a response in time. Please try again.")
+            completions.append(
+                f"I'm sorry! I couldn't generate a response within the allotted time ({agent_timeout_seconds} seconds). "
+                "If you need more time for processing, you can adjust the timeout settings. "
+                "Please refer to the documentation for instructions on how to change the timeout value. "
+                "Feel free to try your request again."
+            )
         # Can't use ThreadPoolExecutor as context manager since we need wait=False.
         executor.shutdown(wait=False)
@@ -267,13 +274,13 @@ AI: {response}'''
         for i in sorted(empty_prompt_ids)[:-1]:
             completions.insert(i, None)
-        pred_df = pd.DataFrame(completions, columns=[args['target']])
+        pred_df = pd.DataFrame(completions, columns=[args["target"]])
         return pred_df
     def describe(self, attribute: Optional[str] = None) -> pd.DataFrame:
-        tables = ['info']
-        return pd.DataFrame(tables, columns=['tables'])
+        tables = ["info"]
+        return pd.DataFrame(tables, columns=["tables"])
     def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
-        raise NotImplementedError('Fine-tuning is not supported for LangChain models')
+        raise NotImplementedError("Fine-tuning is not supported for LangChain models")

mindsdb/integrations/handlers/lightwood_handler/requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-lightwood>=25.5.2.2
-lightwood[extra]>=25.5.2.2
-lightwood[xai]>=25.5.2.2
-type_infer==0.0.22
+lightwood>=25.7.5.1
+lightwood[extra]>=25.7.5.1
+lightwood[xai]>=25.7.5.1
+type_infer==0.0.23

mindsdb/integrations/handlers/litellm_handler/litellm_handler.py CHANGED Viewed

@@ -87,6 +87,7 @@ class LiteLLMHandler(BaseMLEngine):
         # check engine_storage for api_key
         input_args.update({k: v for k, v in ml_engine_args.items()})
+        input_args["target"] = target
         # validate args
         export_args = CompletionParameters(**input_args).model_dump()
@@ -104,6 +105,8 @@ class LiteLLMHandler(BaseMLEngine):
         # validate args
         args = CompletionParameters(**input_args).model_dump()
+        target = args.pop("target")
         # build messages
         self._build_messages(args, df)
@@ -113,12 +116,12 @@ class LiteLLMHandler(BaseMLEngine):
         if len(args["messages"]) > 1:
             # if more than one message, use batch completion
             responses = batch_completion(**args)
-            return pd.DataFrame({"result": [response.choices[0].message.content for response in responses]})
+            return pd.DataFrame({target: [response.choices[0].message.content for response in responses]})
         # run completion
         response = completion(**args)
-        return pd.DataFrame({"result": [response.choices[0].message.content]})
+        return pd.DataFrame({target: [response.choices[0].message.content]})
     @staticmethod
     def _prompt_to_messages(prompt: str, **kwargs) -> List[Dict]:

mindsdb/integrations/handlers/litellm_handler/settings.py CHANGED Viewed

@@ -31,7 +31,8 @@ class CompletionParameters(BaseModel):
     # set api_base, api_version, api_key
     base_url: Optional[str] = None  # Base URL of the API.
     api_version: Optional[str] = None  # Version of the API to be used.
-    api_key: str  # API key for authentication.
+    api_key: Optional[str] = None  # API key for authentication.
+    target: Optional[str] = None  # the name of output column
     class Config:
         extra = Extra.forbid

MindsDB 25.7.3.0__py3-none-any.whl → 25.7.4.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.7.3.0py3-none-any.whl → 25.7.4.0py3-none-any.whl