PyPI - MindsDB - Versions diffs - 25.1.2.1__py3-none-any.whl → 25.1.5.0__py3-none-any.whl - Mend

MindsDB 25.1.2.1py3-none-any.whl → 25.1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (95) hide show

mindsdb/interfaces/chatbot/memory.py CHANGED Viewed

@@ -1,9 +1,9 @@
+from typing import Union
 from mindsdb_sql_parser.ast import Identifier, Select, BinaryOperation, Constant, OrderBy
 from mindsdb.interfaces.storage import db
 from .types import ChatBotMessage
@@ -60,7 +60,7 @@ class BaseMemory:
         # If the chat_id is a tuple, convert it to a string when storing the message in the database.
         self._add_to_history(
-            str(chat_id) if isinstance(chat_id, tuple) else chat_id,
+            chat_id,
             chat_message,
             table_name=table_name
         )
@@ -74,7 +74,7 @@ class BaseMemory:
         else:
             history = self._get_chat_history(
-                str(chat_id) if isinstance(chat_id, tuple) else chat_id,
+                chat_id,
                 table_name
             )
             self._cache[key] = history
@@ -108,18 +108,44 @@ class HandlerMemory(BaseMemory):
         time_col = t_params['time_col']
         chat_id_cols = t_params['chat_id_col'] if isinstance(t_params['chat_id_col'], list) else [t_params['chat_id_col']]
-        ast_query = Select(
-            targets=[Identifier(text_col),
-                     Identifier(username_col),
-                     Identifier(time_col)],
-            from_table=Identifier(t_params['name']),
-            where=[BinaryOperation(
+        chat_id = chat_id if isinstance(chat_id, tuple) else (chat_id,)
+        # Add a WHERE clause for each chat_id column.
+        where_conditions = [
+            BinaryOperation(
                 op='=',
                 args=[
                     Identifier(chat_id_col),
                     Constant(chat_id[idx])
                 ]
-            ) for idx, chat_id_col in enumerate(chat_id_cols)],
+            ) for idx, chat_id_col in enumerate(chat_id_cols)
+        ]
+        # Add a WHERE clause to ignore holding messages from the bot.
+        from .chatbot_task import HOLDING_MESSAGE
+        where_conditions.append(
+            BinaryOperation(
+                op='!=',
+                args=[
+                    Identifier(text_col),
+                    Constant(HOLDING_MESSAGE)
+                ]
+            )
+        )
+        # Convert the WHERE conditions to a BinaryOperation object.
+        where_conditions_binary_operation = None
+        for condition in where_conditions:
+            if where_conditions_binary_operation is None:
+                where_conditions_binary_operation = condition
+            else:
+                where_conditions_binary_operation = BinaryOperation('and', args=[where_conditions_binary_operation, condition])
+        ast_query = Select(
+            targets=[Identifier(text_col),
+                     Identifier(username_col),
+                     Identifier(time_col)],
+            from_table=Identifier(t_params['name']),
+            where=where_conditions_binary_operation,
             order_by=[OrderBy(Identifier(time_col))],
             limit=Constant(self.MAX_DEPTH),
         )
@@ -151,9 +177,28 @@ class DBMemory(BaseMemory):
     uses mindsdb database to store messages
     '''
+    def _generate_chat_id_for_db(self, chat_id: Union[str, tuple], table_name: str = None) -> str:
+        """
+        Generate an ID for the chat to store in the database.
+        The ID is a string that includes the components of the chat ID and the table name (if provided) separated by underscores.
+        Args:
+            chat_id (str | tuple): The ID of the chat.
+            table_name (str): The name of the table the chat belongs to.
+        """
+        if isinstance(chat_id, tuple):
+            char_id_str = "_".join(str(val) for val in chat_id)
+        else:
+            char_id_str = str(chat_id)
+        if table_name:
+            chat_id_str = f"{table_name}_{char_id_str}"
+        return chat_id_str
     def _add_to_history(self, chat_id, message, table_name=None):
         chat_bot_id = self.chat_task.bot_id
-        destination = str((chat_id, table_name)) if table_name else chat_id
+        destination = self._generate_chat_id_for_db(chat_id, table_name)
         message = db.ChatBotsHistory(
             chat_bot_id=chat_bot_id,
@@ -167,7 +212,7 @@ class DBMemory(BaseMemory):
     def _get_chat_history(self, chat_id, table_name=None):
         chat_bot_id = self.chat_task.bot_id
-        destination = str((chat_id, table_name)) if table_name else chat_id
+        destination = self._generate_chat_id_for_db(chat_id, table_name)
         query = db.ChatBotsHistory.query\
             .filter(

mindsdb/interfaces/database/integrations.py CHANGED Viewed

@@ -215,6 +215,8 @@ class IntegrationController:
     def modify(self, name, data):
         self.handlers_cache.delete(name)
         integration_record = self._get_integration_record(name)
+        if isinstance(integration_record.data, dict) and integration_record.data.get('is_demo') is True:
+            raise ValueError("It is forbidden to change properties of the demo object")
         old_data = deepcopy(integration_record.data)
         for k in old_data:
             if k not in data:
@@ -234,9 +236,11 @@ class IntegrationController:
             handler = self.handler_modules[name]
             if getattr(handler, 'permanent', False) is True:
-                raise Exception('Unable to drop: is permanent integration')
+                raise Exception('Unable to drop permanent integration')
         integration_record = self._get_integration_record(name)
+        if isinstance(integration_record.data, dict) and integration_record.data.get('is_demo') is True:
+            raise Exception('Unable to drop demo object')
         # if this is ml engine
         engine_models = get_model_records(ml_handler_name=name, deleted_at=None)

mindsdb/interfaces/database/projects.py CHANGED Viewed

@@ -7,6 +7,7 @@ import sqlalchemy as sa
 import numpy as np
 from mindsdb_sql_parser.ast.base import ASTNode
+from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier
 from mindsdb_sql_parser import parse_sql
 from mindsdb.interfaces.storage import db
@@ -16,6 +17,9 @@ from mindsdb.interfaces.database.views import ViewController
 from mindsdb.utilities.context import context as ctx
 from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
 import mindsdb.utilities.profiler as profiler
+from mindsdb.api.executor.sql_query import SQLQuery
+from mindsdb.api.executor.utilities.sql import query_df
+from mindsdb.interfaces.query_context.context_controller import query_context_controller
 class Project:
@@ -24,19 +28,14 @@ class Project:
         p = Project()
         p.record = db_record
         p.name = db_record.name
-        p.company_id = db_record.company_id
+        p.company_id = ctx.company_id
         p.id = db_record.id
         return p
     def create(self, name: str):
         name = name.lower()
-        existing_record = db.Project.query.filter(
-            (sa.func.lower(db.Project.name) == name)
-            & (db.Project.company_id == ctx.company_id)
-            & (db.Project.deleted_at == sa.null())
-        ).first()
-        if existing_record is not None:
-            raise EntityExistsError('Project already exists', name)
+        company_id = ctx.company_id if ctx.company_id is not None else 0
         existing_record = db.Integration.query.filter(
             sa.func.lower(db.Integration.name) == name,
@@ -45,23 +44,28 @@ class Project:
         if existing_record is not None:
             raise EntityExistsError('Database exists with this name ', name)
+        existing_record = db.Project.query.filter(
+            (sa.func.lower(db.Project.name) == name)
+            & (db.Project.company_id == company_id)
+            & (db.Project.deleted_at == sa.null())
+        ).first()
+        if existing_record is not None:
+            raise EntityExistsError('Project already exists', name)
         record = db.Project(
             name=name,
-            company_id=ctx.company_id
+            company_id=company_id
         )
         self.record = record
         self.name = name
-        self.company_id = ctx.company_id
+        self.company_id = company_id
         db.session.add(record)
         db.session.commit()
         self.id = record.id
-    def save(self):
-        db.session.commit()
     def delete(self):
         tables = self.get_tables()
         tables = [key for key, val in tables.items() if val['type'] != 'table']
@@ -111,7 +115,7 @@ class Project:
             project_name=self.name
         )
-    def query_view(self, query: ASTNode) -> ASTNode:
+    def get_view_meta(self, query: ASTNode) -> ASTNode:
         view_name = query.from_table.parts[-1]
         view_meta = ViewController().get(
             name=view_name,
@@ -120,6 +124,30 @@ class Project:
         view_meta['query_ast'] = parse_sql(view_meta['query'])
         return view_meta
+    def query_view(self, query, session):
+        view_meta = self.get_view_meta(query)
+        query_context_controller.set_context('view', view_meta['id'])
+        try:
+            sqlquery = SQLQuery(
+                view_meta['query_ast'],
+                session=session
+            )
+            result = sqlquery.fetch(view='dataframe')
+        finally:
+            query_context_controller.release_context('view', view_meta['id'])
+        if result['success'] is False:
+            raise Exception(f"Cant execute view query: {view_meta['query_ast']}")
+        df = result['result']
+        # remove duplicated columns
+        df = df.loc[:, ~df.columns.duplicated()]
+        return query_df(df, query, session=session)
     @staticmethod
     def _get_model_data(predictor_record, integraion_record, with_secrets: bool = True):
         from mindsdb.interfaces.database.integrations import integration_controller
@@ -341,6 +369,15 @@ class Project:
                 columns = predictor_record.to_predict
                 if not isinstance(columns, list):
                     columns = [columns]
+            return columns
+        if self.get_view(table_name):
+            query = Select(targets=[Star()], from_table=Identifier(table_name), limit=Constant(1))
+            from mindsdb.api.executor.controllers.session_controller import SessionController
+            session = SessionController()
+            session.database = self.name
+            df = self.query_view(query, session)
+            return df.columns
         else:
             # is it agent?
             agent = db.Agents.query.filter_by(
@@ -360,8 +397,9 @@ class ProjectController:
         pass
     def get_list(self) -> List[Project]:
+        company_id = ctx.company_id if ctx.company_id is not None else 0
         records = db.Project.query.filter(
-            (db.Project.company_id == ctx.company_id)
+            (db.Project.company_id == company_id)
             & (db.Project.deleted_at == sa.null())
         ).order_by(db.Project.name)
@@ -371,7 +409,8 @@ class ProjectController:
         if id is not None and name is not None:
             raise ValueError("Both 'id' and 'name' is None")
-        q = db.Project.query.filter_by(company_id=ctx.company_id)
+        company_id = ctx.company_id if ctx.company_id is not None else 0
+        q = db.Project.query.filter_by(company_id=company_id)
         if id is not None:
             q = q.filter_by(id=id)

mindsdb/interfaces/database/views.py CHANGED Viewed

@@ -3,6 +3,7 @@ from mindsdb.interfaces.storage import db
 from mindsdb.interfaces.query_context.context_controller import query_context_controller
 from mindsdb.utilities.context import context as ctx
 from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
+from mindsdb.interfaces.model.functions import get_project_record, get_project_records
 class ViewController:
@@ -39,11 +40,8 @@ class ViewController:
     def update(self, name, query, project_name):
         name = name.lower()
-        project_record = db.session.query(db.Project).filter_by(
-            name=project_name,
-            company_id=ctx.company_id,
-            deleted_at=None
-        ).first()
+        project_record = get_project_record(project_name)
         rec = db.session.query(db.View).filter(
             func.lower(db.View.name) == name,
             db.View.company_id == ctx.company_id,
@@ -56,11 +54,8 @@ class ViewController:
     def delete(self, name, project_name):
         name = name.lower()
-        project_record = db.session.query(db.Project).filter_by(
-            name=project_name,
-            company_id=ctx.company_id,
-            deleted_at=None
-        ).first()
+        project_record = get_project_record(project_name)
         rec = db.session.query(db.View).filter(
             func.lower(db.View.name) == name,
             db.View.company_id == ctx.company_id,
@@ -74,17 +69,12 @@ class ViewController:
         query_context_controller.drop_query_context('view', rec.id)
     def list(self, project_name):
-        query = db.session.query(db.Project).filter_by(
-            company_id=ctx.company_id,
-            deleted_at=None
-        )
-        if project_name is not None:
-            query = query.filter_by(name=project_name)
-        project_names = {
-            i.id: i.name
-            for i in query
-        }
+        project_names = {}
+        for project in get_project_records():
+            if project_name is not None and project.name != project_name:
+                continue
+            project_names[project.id] = project.name
         query = db.session.query(db.View).filter(
             db.View.company_id == ctx.company_id,
@@ -112,11 +102,8 @@ class ViewController:
         }
     def get(self, id=None, name=None, project_name=None):
-        project_record = db.session.query(db.Project).filter_by(
-            name=project_name,
-            company_id=ctx.company_id,
-            deleted_at=None
-        ).first()
+        project_record = get_project_record(project_name)
         if id is not None:
             records = db.session.query(db.View).filter_by(
                 id=id,

mindsdb/interfaces/knowledge_base/controller.py CHANGED Viewed

@@ -52,6 +52,7 @@ class KnowledgeBaseTable:
         self.session = session
         self.document_preprocessor = None
         self.document_loader = None
+        self.model_params = None
     def configure_preprocessing(self, config: Optional[dict] = None):
         """Configure preprocessing for the knowledge base table"""
@@ -488,6 +489,7 @@ class KnowledgeBaseTable:
         df_out = project_datanode.predict(
             model_name=model_rec.name,
             df=df,
+            params=self.model_params
         )
         target = model_rec.to_predict[0]
@@ -642,11 +644,13 @@ class KnowledgeBaseController:
             storage: Identifier,
             params: dict,
             preprocessing_config: Optional[dict] = None,
-            if_not_exists: bool = False,
+            if_not_exists: bool = False
     ) -> db.KnowledgeBase:
         """
         Add a new knowledge base to the database
         :param preprocessing_config: Optional preprocessing configuration to validate and store
+        :param is_sparse: Whether to use sparse vectors for embeddings
+        :param vector_size: Optional size specification for vectors, required when is_sparse=True
         """
         # Validate preprocessing config first if provided
         if preprocessing_config is not None:
@@ -654,6 +658,12 @@ class KnowledgeBaseController:
             params = params or {}
             params['preprocessing'] = preprocessing_config
+        # Check if vector_size is provided when using sparse vectors
+        is_sparse = params.get('is_sparse')
+        vector_size = params.get('vector_size')
+        if is_sparse and vector_size is None:
+            raise ValueError("vector_size is required when is_sparse=True")
         # get project id
         project = self.session.database_controller.get_project(project_name)
         project_id = project.id
@@ -693,7 +703,16 @@ class KnowledgeBaseController:
             cloud_pg_vector = os.environ.get('KB_PGVECTOR_URL')
             if cloud_pg_vector:
                 vector_table_name = name
-                vector_db_name = self._create_persistent_pgvector()
+                # Add sparse vector support for pgvector
+                vector_db_params = {}
+                # Check both explicit parameter and model configuration
+                is_sparse = is_sparse or model_record.learn_args.get('using', {}).get('sparse')
+                if is_sparse:
+                    vector_db_params['is_sparse'] = True
+                    if vector_size is not None:
+                        vector_db_params['vector_size'] = vector_size
+                vector_db_name = self._create_persistent_pgvector(vector_db_params)
             else:
                 # create chroma db with same name
                 vector_table_name = "default_collection"
@@ -705,17 +724,20 @@ class KnowledgeBaseController:
         else:
             vector_db_name, vector_table_name = storage.parts
+        # create table in vectordb before creating KB
+        self.session.datahub.get(vector_db_name).integration_handler.create_table(
+            vector_table_name
+        )
         vector_database_id = self.session.integration_controller.get(vector_db_name)['id']
-        # create table in vectordb
-        if model_record.learn_args.get('using', {}).get('sparse') is not None:
-            self.session.datahub.get(vector_db_name).integration_handler.create_table(
-                vector_table_name, sparse=model_record.learn_args.get('using', {}).get('sparse')
-            )
-        else:
-            self.session.datahub.get(vector_db_name).integration_handler.create_table(
-                vector_table_name
-            )
+        # Store sparse vector settings in params if specified
+        if is_sparse:
+            params = params or {}
+            params['vector_config'] = {
+                'is_sparse': is_sparse
+            }
+            if vector_size is not None:
+                params['vector_config']['vector_size'] = vector_size
         kb = db.KnowledgeBase(
             name=name,
@@ -729,16 +751,15 @@ class KnowledgeBaseController:
         db.session.commit()
         return kb
-    def _create_persistent_pgvector(self):
+    def _create_persistent_pgvector(self, params=None):
         """Create default vector database for knowledge base, if not specified"""
         vector_store_name = "kb_pgvector_store"
         # check if exists
         if self.session.integration_controller.get(vector_store_name):
             return vector_store_name
-        self.session.integration_controller.add(vector_store_name, 'pgvector', {})
+        self.session.integration_controller.add(vector_store_name, 'pgvector', params or {})
         return vector_store_name
     def _create_persistent_chroma(self, kb_name, engine="chromadb"):
@@ -840,16 +861,19 @@ class KnowledgeBaseController:
         )
         return kb
-    def get_table(self, name: str, project_id: int) -> KnowledgeBaseTable:
+    def get_table(self, name: str, project_id: int, params: dict = None) -> KnowledgeBaseTable:
         """
         Returns kb table object with properly configured preprocessing
         :param name: table name
         :param project_id: project id
+        :param params: runtime parameters for KB. Keys: 'model' - parameters for embedding model
         :return: kb table object
         """
         kb = self.get(name, project_id)
         if kb is not None:
             table = KnowledgeBaseTable(kb, self.session)
+            if params:
+                table.model_params = params.get('model')
             # Always configure preprocessing - either from params or default
             if kb.params and 'preprocessing' in kb.params:

mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py CHANGED Viewed

@@ -1,15 +1,13 @@
 import os
 from typing import List, Iterator
 from langchain_core.documents import Document as LangchainDocument
-from langchain_text_splitters import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter
+from langchain_text_splitters import MarkdownHeaderTextSplitter
 import pandas as pd
 from mindsdb.interfaces.file.file_controller import FileController
 from mindsdb.integrations.utilities.rag.loaders.file_loader import FileLoader
 from mindsdb.integrations.utilities.rag.splitters.file_splitter import (
     FileSplitter,
-    DEFAULT_CHUNK_SIZE,
-    DEFAULT_CHUNK_OVERLAP
 )
 from mindsdb.integrations.handlers.web_handler.urlcrawl_helpers import get_all_websites
 from mindsdb.interfaces.knowledge_base.preprocessing.models import Document
@@ -45,12 +43,6 @@ class DocumentLoader:
         self.file_loader_class = file_loader_class
         self.mysql_proxy = mysql_proxy
-        # Initialize text splitter for query results with default settings
-        self.query_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=DEFAULT_CHUNK_SIZE,
-            chunk_overlap=DEFAULT_CHUNK_OVERLAP
-        )
     def load_files(self, file_names: List[str]) -> Iterator[Document]:
         """Load and split documents from files"""
         for file_name in file_names:
@@ -143,8 +135,9 @@ class DocumentLoader:
         # Process each row into a Document
         for _, row in df.iterrows():
-            # Extract content and metadata
+            # Extract id, content  and metadata
             content = str(row.get('content', ''))
+            id = row.get('id', None)
             # Convert remaining columns to metadata
             metadata = {
@@ -156,21 +149,9 @@ class DocumentLoader:
             # Split content using recursive splitter
             if content:
-                doc = LangchainDocument(
-                    page_content=content,
+                yield Document(
+                    id=id,
+                    content=content,
                     metadata=metadata
                 )
-                # Use FileSplitter with default recursive splitter
-                split_docs = self.file_splitter.split_documents(
-                    [doc],
-                    default_failover=True
-                )
-                for split_doc in split_docs:
-                    metadata = doc.metadata.copy()
-                    metadata.update(split_doc.metadata or {})
-                    yield Document(
-                        content=split_doc.page_content,
-                        metadata=metadata
-                    )

mindsdb/interfaces/model/functions.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, List
 from sqlalchemy import null, func
@@ -41,9 +41,7 @@ def get_integration_record(name: str) -> db.Integration:
 @profiler.profile()
 def get_project_record(name: str) -> db.Project:
-    company_id = ctx.company_id
-    if company_id is None:
-        company_id = null()
+    company_id = ctx.company_id if ctx.company_id is not None else 0
     project_record = (
         db.session.query(db.Project)
@@ -56,6 +54,19 @@ def get_project_record(name: str) -> db.Project:
     return project_record
+@profiler.profile()
+def get_project_records() -> List[db.Project]:
+    company_id = ctx.company_id if ctx.company_id is not None else 0
+    return (
+        db.session.query(db.Project)
+        .filter(
+            (db.Project.company_id == company_id)
+            & (db.Project.deleted_at == null())
+        ).all()
+    )
 @profiler.profile()
 def get_predictor_integration(record: db.Predictor) -> db.Integration:
     integration_record = (

mindsdb/interfaces/model/model_controller.py CHANGED Viewed

@@ -7,14 +7,15 @@ from multiprocessing.pool import ThreadPool
 import pandas as pd
 from dateutil.parser import parse as parse_datetime
-from sqlalchemy import func, null
+from sqlalchemy import func
 import numpy as np
 import mindsdb.interfaces.storage.db as db
 from mindsdb.utilities.config import Config
 from mindsdb.interfaces.model.functions import (
     get_model_record,
-    get_model_records
+    get_model_records,
+    get_project_record
 )
 from mindsdb.interfaces.storage.json import get_json_storage
 from mindsdb.interfaces.storage.model_fs import ModelStorage
@@ -151,11 +152,7 @@ class ModelController():
     def delete_model(self, model_name: str, project_name: str = 'mindsdb', version=None):
         from mindsdb.interfaces.database.database import DatabaseController
-        project_record = db.Project.query.filter(
-            (func.lower(db.Project.name) == func.lower(project_name))
-            & (db.Project.company_id == ctx.company_id)
-            & (db.Project.deleted_at == null())
-        ).first()
+        project_record = get_project_record(func.lower(project_name))
         if project_record is None:
             raise Exception(f"Project '{project_name}' does not exists")

MindsDB 25.1.2.1__py3-none-any.whl → 25.1.5.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.1.2.1py3-none-any.whl → 25.1.5.0py3-none-any.whl