PyPI - MindsDB - Versions diffs - 25.7.4.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl - Mend

MindsDB 25.7.4.0py3-none-any.whl → 25.8.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (57) hide show

mindsdb/interfaces/agents/agents_controller.py CHANGED Viewed

@@ -145,11 +145,60 @@ class AgentsController:
         return all_agents.all()
+    def _create_default_sql_skill(
+        self,
+        name,
+        project_name,
+        include_tables: List[str] = None,
+        include_knowledge_bases: List[str] = None,
+    ):
+        # Create a default SQL skill
+        skill_name = f"{name}_sql_skill"
+        skill_params = {
+            "type": "sql",
+            "description": f"Auto-generated SQL skill for agent {name}",
+        }
+        # Add restrictions provided
+        if include_tables:
+            skill_params["include_tables"] = include_tables
+        if include_knowledge_bases:
+            skill_params["include_knowledge_bases"] = include_knowledge_bases
+        try:
+            # Check if skill already exists
+            existing_skill = self.skills_controller.get_skill(skill_name, project_name)
+            if existing_skill is None:
+                # Create the skill
+                skill_type = skill_params.pop("type")
+                self.skills_controller.add_skill(
+                    name=skill_name, project_name=project_name, type=skill_type, params=skill_params
+                )
+            else:
+                # Update the skill if parameters have changed
+                params_changed = False
+                # Check if skill parameters need to be updated
+                for param_key, param_value in skill_params.items():
+                    if existing_skill.params.get(param_key) != param_value:
+                        existing_skill.params[param_key] = param_value
+                        params_changed = True
+                # Update the skill if needed
+                if params_changed:
+                    flag_modified(existing_skill, "params")
+                    db.session.commit()
+        except Exception as e:
+            raise ValueError(f"Failed to auto-create or update SQL skill: {str(e)}")
+        return skill_name
     def add_agent(
         self,
         name: str,
         project_name: str = None,
-        model_name: str = None,
+        model_name: Union[str, dict] = None,
         skills: List[Union[str, dict]] = None,
         provider: str = None,
         params: Dict[str, Any] = None,
@@ -256,46 +305,13 @@ class AgentsController:
         # Auto-create SQL skill if no skills are provided but include_tables or include_knowledge_bases params are provided
         if not skills and (include_tables or include_knowledge_bases):
-            # Create a default SQL skill
-            skill_name = f"{name}_sql_skill"
-            skill_params = {
-                "type": "sql",
-                "description": f"Auto-generated SQL skill for agent {name}",
-            }
-            # Add restrictions provided
-            if include_tables:
-                skill_params["include_tables"] = include_tables
-            if include_knowledge_bases:
-                skill_params["include_knowledge_bases"] = include_knowledge_bases
-            try:
-                # Check if skill already exists
-                existing_skill = self.skills_controller.get_skill(skill_name, project_name)
-                if existing_skill is None:
-                    # Create the skill
-                    skill_type = skill_params.pop("type")
-                    self.skills_controller.add_skill(
-                        name=skill_name, project_name=project_name, type=skill_type, params=skill_params
-                    )
-                else:
-                    # Update the skill if parameters have changed
-                    params_changed = False
-                    # Check if skill parameters need to be updated
-                    for param_key, param_value in skill_params.items():
-                        if existing_skill.params.get(param_key) != param_value:
-                            existing_skill.params[param_key] = param_value
-                            params_changed = True
-                    # Update the skill if needed
-                    if params_changed:
-                        flag_modified(existing_skill, "params")
-                        db.session.commit()
-                skills = [skill_name]
-            except Exception as e:
-                raise ValueError(f"Failed to auto-create or update SQL skill: {str(e)}")
+            skill = self._create_default_sql_skill(
+                name,
+                project_name,
+                include_tables=include_tables,
+                include_knowledge_bases=include_knowledge_bases,
+            )
+            skills = [skill]
         agent = db.Agents(
             name=name,
@@ -351,7 +367,7 @@ class AgentsController:
         agent_name: str,
         project_name: str = default_project,
         name: str = None,
-        model_name: str = None,
+        model_name: Union[str, dict] = None,
         skills_to_add: List[Union[str, dict]] = None,
         skills_to_remove: List[str] = None,
         skills_to_rewrite: List[Union[str, dict]] = None,
@@ -365,7 +381,7 @@ class AgentsController:
             agent_name (str): The name of the new agent, or existing agent to update
             project_name (str): The containing project
             name (str): The updated name of the agent
-            model_name (str): The name of the existing ML model the agent will use
+            model_name (str | dict): The name of the existing ML model the agent will use
             skills_to_add (List[Union[str, dict]]): List of skill names to add to the agent, or list of dicts
                  with one of keys is "name", and other is additional parameters for relationship agent<>skill
             skills_to_remove (List[str]): List of skill names to remove from the agent
@@ -394,6 +410,8 @@ class AgentsController:
         existing_agent = self.get_agent(agent_name, project_name=project_name)
         if existing_agent is None:
             raise EntityNotExistsError(f"Agent with name not found: {agent_name}")
+        existing_params = existing_agent.params or {}
         is_demo = (existing_agent.params or {}).get("is_demo", False)
         if is_demo and (
             (name is not None and name != agent_name)
@@ -413,12 +431,34 @@ class AgentsController:
             existing_agent.name = name
         if model_name or provider:
+            if isinstance(model_name, dict):
+                # move into params
+                existing_params["model"] = model_name
+                model_name = None
             # check model and provider
             model, provider = self.check_model_provider(model_name, provider)
             # Update model and provider
             existing_agent.model_name = model_name
             existing_agent.provider = provider
+        if "data" in params:
+            if len(skills_to_add) > 0 or len(skills_to_remove) > 0:
+                raise ValueError(
+                    "'data' parameter cannot be used with 'skills_to_remove' or 'skills_to_add' parameters"
+                )
+            include_knowledge_bases = params["data"].get("knowledge_bases")
+            include_tables = params["data"].get("tables")
+            skill = self._create_default_sql_skill(
+                agent_name,
+                project_name,
+                include_tables=include_tables,
+                include_knowledge_bases=include_knowledge_bases,
+            )
+            skills_to_rewrite = [{"name": skill}]
         # check that all skills exist
         skill_name_to_record_map = {}
         for skill_meta in skills_to_add + skills_to_remove + skills_to_rewrite:
@@ -496,8 +536,6 @@ class AgentsController:
                 db.session.add(association)
         if params is not None:
-            existing_params = existing_agent.params or {}
             if params.get("data", {}).get("tables"):
                 new_table_entries = set(params["data"]["tables"]) - set(
                     existing_params.get("data", {}).get("tables", [])

mindsdb/interfaces/agents/constants.py CHANGED Viewed

@@ -26,7 +26,6 @@ OPEN_AI_CHAT_MODELS = (
 SUPPORTED_PROVIDERS = {
     "openai",
     "anthropic",
-    "anyscale",
     "litellm",
     "ollama",
     "nvidia_nim",

mindsdb/interfaces/agents/langchain_agent.py CHANGED Viewed

@@ -11,7 +11,7 @@ import pandas as pd
 from langchain.agents import AgentExecutor
 from langchain.agents.initialize import initialize_agent
 from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
-from langchain_community.chat_models import ChatAnyscale, ChatLiteLLM, ChatOllama
+from langchain_community.chat_models import ChatLiteLLM, ChatOllama
 from langchain_writer import ChatWriter
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_core.agents import AgentAction, AgentStep
@@ -165,8 +165,6 @@ def create_chat_model(args: Dict):
         except NotImplementedError:
             chat_open_ai.tiktoken_model_name = DEFAULT_TIKTOKEN_MODEL_NAME
         return chat_open_ai
-    if args["provider"] == "anyscale":
-        return ChatAnyscale(**model_kwargs)
     if args["provider"] == "litellm":
         return ChatLiteLLM(**model_kwargs)
     if args["provider"] == "ollama":

mindsdb/interfaces/database/projects.py CHANGED Viewed

@@ -3,11 +3,12 @@ from copy import deepcopy
 from typing import List, Optional
 from collections import OrderedDict
+import pandas as pd
 import sqlalchemy as sa
 import numpy as np
 from mindsdb_sql_parser.ast.base import ASTNode
-from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier
+from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier, BinaryOperation
 from mindsdb_sql_parser import parse_sql
 from mindsdb.interfaces.storage import db
@@ -109,7 +110,19 @@ class Project:
         """
         ViewController().delete(name, project_name=self.name, strict_case=strict_case)
-    def create_view(self, name: str, query: str):
+    def create_view(self, name: str, query: str, session):
+        ast_query = parse_sql(query)
+        if isinstance(ast_query, Select):
+            # check create view sql
+            ast_query.limit = Constant(1)
+            query_context_controller.set_context(query_context_controller.IGNORE_CONTEXT)
+            try:
+                SQLQuery(ast_query, session=session, database=self.name)
+            finally:
+                query_context_controller.release_context(query_context_controller.IGNORE_CONTEXT)
         ViewController().add(name, query=query, project_name=self.name)
     def update_view(self, name: str, query: str, strict_case: bool = False):
@@ -124,21 +137,112 @@ class Project:
         view_meta["query_ast"] = parse_sql(view_meta["query"])
         return view_meta
-    def query_view(self, query, session):
+    @staticmethod
+    def combine_view_select(view_query: Select, query: Select) -> Select:
+        """
+        Create a combined query from view's query and outer query.
+        """
+        # apply optimizations
+        if query.where is not None:
+            # Get conditions that can be duplicated into view's query
+            # It has to be simple condition with identifier and constant
+            # Also it shouldn't be under the OR condition
+            def get_conditions_to_move(node):
+                if not isinstance(node, BinaryOperation):
+                    return []
+                op = node.op.upper()
+                if op == "AND":
+                    conditions = []
+                    conditions.extend(get_conditions_to_move(node.args[0]))
+                    conditions.extend(get_conditions_to_move(node.args[1]))
+                    return conditions
+                if op == "OR":
+                    return []
+                if isinstance(node.args[0], (Identifier, Constant)) and isinstance(
+                    node.args[1], (Identifier, Constant)
+                ):
+                    return [node]
+            conditions = get_conditions_to_move(query.where)
+            if conditions:
+                # analyse targets
+                # if target element has alias
+                #    if element is not identifier or the name is not equal to alias:
+                #         add alias to black list
+                # white list:
+                #     all targets that are identifiers with no alias or equal to its alias
+                # condition can be moved if
+                #     column is not in black list AND (query has star(*) OR column in white list)
+                has_star = False
+                white_list, black_list = [], []
+                for target in view_query.targets:
+                    if isinstance(target, Star):
+                        has_star = True
+                    if isinstance(target, Identifier):
+                        name = target.parts[-1].lower()
+                        if target.alias is None or target.alias.parts[-1].lower() == name:
+                            white_list.append(name)
+                    elif target.alias is not None:
+                        black_list.append(target.alias.parts[-1].lower())
+                view_where = view_query.where
+                for condition in conditions:
+                    arg1, arg2 = condition.args
+                    if isinstance(arg1, Identifier):
+                        name = arg1.parts[-1].lower()
+                        if name in black_list or not (has_star or name in white_list):
+                            continue
+                    if isinstance(arg2, Identifier):
+                        name = arg2.parts[-1].lower()
+                        if name in black_list or not (has_star or name in white_list):
+                            continue
+                    # condition can be moved into view
+                    condition2 = BinaryOperation(condition.op, [arg1, arg2])
+                    if view_where is None:
+                        view_where = condition2
+                    else:
+                        view_where = BinaryOperation("AND", args=[view_where, condition2])
+                    # disable outer condition
+                    condition.op = "="
+                    condition.args = [Constant(0), Constant(0)]
+                view_query.where = view_where
+        # combine outer query with view's query
+        view_query.parentheses = True
+        query.from_table = view_query
+        return query
+    def query_view(self, query: Select, session) -> pd.DataFrame:
         view_meta = self.get_view_meta(query)
         query_context_controller.set_context("view", view_meta["id"])
+        query_applied = False
         try:
-            sqlquery = SQLQuery(view_meta["query_ast"], session=session)
+            view_query = view_meta["query_ast"]
+            if isinstance(view_query, Select):
+                view_query = self.combine_view_select(view_query, query)
+                query_applied = True
+            sqlquery = SQLQuery(view_query, session=session)
             df = sqlquery.fetched_data.to_df()
         finally:
             query_context_controller.release_context("view", view_meta["id"])
         # remove duplicated columns
         df = df.loc[:, ~df.columns.duplicated()]
-        return query_df(df, query, session=session)
+        if query_applied:
+            return df
+        else:
+            return query_df(df, query, session=session)
     @staticmethod
     def _get_model_data(predictor_record, integraion_record, with_secrets: bool = True):

mindsdb/interfaces/knowledge_base/controller.py CHANGED Viewed

@@ -1139,8 +1139,14 @@ class KnowledgeBaseController:
         else:
             vector_db_name, vector_table_name = storage.parts
+        data_node = self.session.datahub.get(vector_db_name)
+        if data_node:
+            vector_store_handler = data_node.integration_handler
+        else:
+            raise ValueError(
+                f"Unable to find database named {vector_db_name}, please make sure {vector_db_name} is defined"
+            )
         # create table in vectordb before creating KB
-        vector_store_handler = self.session.datahub.get(vector_db_name).integration_handler
         vector_store_handler.create_table(vector_table_name)
         if keyword_search_enabled:
             vector_store_handler.add_full_text_index(vector_table_name, TableField.CONTENT.value)

mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py CHANGED Viewed

@@ -4,8 +4,7 @@ import asyncio
 from typing import List, Dict, Optional, Any
 import pandas as pd
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_core.documents import Document as LangchainDocument
+from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
 from mindsdb.integrations.utilities.rag.splitters.file_splitter import (
     FileSplitter,
@@ -22,7 +21,6 @@ from mindsdb.interfaces.knowledge_base.preprocessing.models import (
 )
 from mindsdb.utilities import log
 logger = log.getLogger(__name__)
 _DEFAULT_CONTENT_COLUMN_NAME = "content"
@@ -49,11 +47,10 @@ class DocumentPreprocessor:
         if self.splitter is None:
             raise ValueError("Splitter not configured")
-        # Convert to langchain Document for splitting
-        langchain_doc = LangchainDocument(page_content=doc.content, metadata=doc.metadata or {})
+        metadata = doc.metadata or {}
         # Split and convert back to our Document type
-        split_docs = self.splitter.split_documents([langchain_doc])
-        return [Document(content=split_doc.page_content, metadata=split_doc.metadata) for split_doc in split_docs]
+        split_texts = self.splitter.split_text(doc.content)
+        return [Document(content=text, metadata=metadata) for text in split_texts]
     def _get_source(self) -> str:
         """Get the source identifier for this preprocessor"""
@@ -266,16 +263,15 @@ Please give a short succinct context to situate this chunk within the overall do
 class TextChunkingPreprocessor(DocumentPreprocessor):
-    """Default text chunking preprocessor using RecursiveCharacterTextSplitter"""
+    """Default text chunking preprocessor using TextSplitter"""
     def __init__(self, config: Optional[TextChunkingConfig] = None):
         """Initialize with text chunking configuration"""
         super().__init__()
         self.config = config or TextChunkingConfig()
-        self.splitter = RecursiveCharacterTextSplitter(
+        self.splitter = TextSplitter(
             chunk_size=self.config.chunk_size,
             chunk_overlap=self.config.chunk_overlap,
-            length_function=self.config.length_function,
             separators=self.config.separators,
         )

mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py ADDED Viewed

@@ -0,0 +1,73 @@
+from typing import List
+class TextSplitter:
+    def __init__(
+        self,
+        chunk_size: int = 1000,
+        chunk_overlap: int = 200,
+        separators: List[str] = None,
+        k_range: float = 0.5,
+        k_ratio: float = 1,
+    ):
+        """
+        Split text into chunks. The logic:
+         - Get a piece of text with chunk_size and try to find the separator at the end of the piece.
+         - The allowed range to find the separator is defined by k_range and k_ratio using formula:
+            k_range * chunk_size / (num * k_ratio + 1)
+            num - is number of a separator from the list
+         - if the separator is not in the rage: switch to the next separator
+         - if the found separator is in the middle of the sentence, use overlapping:
+            - the found text is the current chunk
+            - repeat the search with less strict k_range and k_ratio
+            - the found text will be the beginning of the next chunk
+        :param chunk_size: size of the chunk, which must not be exceeded
+        :param separators: list of separators in order of priority
+        :param k_range: defines the range to look for the separator
+        :param k_ratio: defines how much to shrink the range for the next separator
+        """
+        if separators is None:
+            separators = ["\n\n", "\n", ". ", " ", ""]
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.separators = separators
+        self.k_range = k_range
+        self.k_ratio = k_ratio
+    def split_text(self, text: str) -> List[str]:
+        chunks = []
+        while True:
+            if len(text) < self.chunk_size:
+                chunks.append(text)
+                break
+            sep, chunk, shift = self.get_next_chunk(text, self.k_range, self.k_ratio)
+            chunks.append(chunk)
+            text = text[shift:]
+        return chunks
+    def get_next_chunk(self, text: str, k_range: float, k_ratio: float):
+        # returns chunk with separator and shift for the next search iteration
+        chunk = text[: self.chunk_size]
+        # positions = []
+        for i, sep in enumerate(self.separators):
+            pos = chunk.rfind(sep)
+            vpos = self.chunk_size - pos
+            if vpos < k_range * self.chunk_size / (i * k_ratio + 1):
+                shift = len(sep) + pos
+                if sep.strip(" ") == "":
+                    # overlapping
+                    sep2, _, shift2 = self.get_next_chunk(text, k_range * 1.5, 0)
+                    if sep2.strip(" ") != "":
+                        # use shift of previous separator
+                        if shift - shift2 < self.chunk_overlap:
+                            shift = shift2
+                return sep, chunk[:pos], shift
+        raise RuntimeError("Cannot split text")

mindsdb/interfaces/query_context/context_controller.py CHANGED Viewed

@@ -45,7 +45,7 @@ class RunningQuery:
             for df in dn.query_stream(query2, fetch_size=self.batch_size):
                 max_track_value = self.get_max_track_value(df)
                 yield df
-                self.set_progress(df, max_track_value)
+                self.set_progress(max_track_value=max_track_value)
         else:
             while True:
@@ -59,7 +59,7 @@ class RunningQuery:
                 max_track_value = self.get_max_track_value(df)
                 yield df
-                self.set_progress(df, max_track_value)
+                self.set_progress(max_track_value=max_track_value)
     def get_partition_query(self, step_num: int, query: Select, stream=False) -> Select:
         """
@@ -178,24 +178,23 @@ class RunningQuery:
             # stream mode
             return None
-    def set_progress(self, df: pd.DataFrame, max_track_value: int):
+    def set_progress(self, processed_rows: int = None, max_track_value: int = None):
         """
         Store progres of the query, it is called after processing of batch
         """
-        if len(df) == 0:
-            return
-        self.record.processed_rows = self.record.processed_rows + len(df)
-        cur_value = self.record.context.get("track_value")
-        new_value = max_track_value
-        if new_value is not None:
-            if cur_value is None or new_value > cur_value:
-                self.record.context["track_value"] = new_value
-                flag_modified(self.record, "context")
+        if processed_rows is not None and processed_rows > 0:
+            self.record.processed_rows = self.record.processed_rows + processed_rows
+            db.session.commit()
-        db.session.commit()
+        if max_track_value is not None:
+            cur_value = self.record.context.get("track_value")
+            new_value = max_track_value
+            if new_value is not None:
+                if cur_value is None or new_value > cur_value:
+                    self.record.context["track_value"] = new_value
+                    flag_modified(self.record, "context")
+            db.session.commit()
     def on_error(self, error: Exception, step_num: int, steps_data: dict):
         """

mindsdb/utilities/config.py CHANGED Viewed

@@ -599,6 +599,7 @@ class Config:
                 ml_task_queue_consumer=None,
                 agent=None,
                 project=None,
+                update_gui=False,
             )
             return
@@ -635,6 +636,7 @@ class Config:
             help="MindsDB agent name to connect to",
         )
         parser.add_argument("--project-name", type=str, default=None, help="MindsDB project name")
+        parser.add_argument("--update-gui", action="store_true", default=False, help="Update GUI and exit")
         self._cmd_args = parser.parse_args()

MindsDB 25.7.4.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.7.4.0py3-none-any.whl → 25.8.2.0py3-none-any.whl