PyPI - MindsDB - Versions diffs - 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl - Mend

MindsDB 25.1.2.1py3-none-any.whl → 25.1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (77) hide show

{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
mindsdb/__about__.py +1 -1
mindsdb/__main__.py +5 -3
mindsdb/api/executor/__init__.py +0 -1
mindsdb/api/executor/command_executor.py +2 -1
mindsdb/api/executor/data_types/answer.py +1 -1
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
mindsdb/api/executor/sql_query/__init__.py +1 -0
mindsdb/api/executor/sql_query/result_set.py +36 -21
mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
mindsdb/api/executor/utilities/sql.py +2 -10
mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
mindsdb/api/http/namespaces/sql.py +3 -1
mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
mindsdb/integrations/utilities/rag/settings.py +8 -2
mindsdb/integrations/utilities/sql_utils.py +1 -1
mindsdb/interfaces/agents/agents_controller.py +3 -5
mindsdb/interfaces/agents/langchain_agent.py +112 -150
mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
mindsdb/interfaces/chatbot/memory.py +58 -13
mindsdb/interfaces/database/projects.py +17 -15
mindsdb/interfaces/database/views.py +12 -25
mindsdb/interfaces/knowledge_base/controller.py +39 -15
mindsdb/interfaces/model/functions.py +15 -4
mindsdb/interfaces/model/model_controller.py +4 -7
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
mindsdb/interfaces/skills/retrieval_tool.py +10 -3
mindsdb/interfaces/skills/skill_tool.py +97 -53
mindsdb/interfaces/skills/sql_agent.py +77 -36
mindsdb/interfaces/storage/db.py +1 -1
mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
mindsdb/utilities/cache.py +7 -4
mindsdb/utilities/context.py +11 -1
mindsdb/utilities/langfuse.py +264 -0
mindsdb/utilities/log.py +20 -2
mindsdb/utilities/otel/__init__.py +206 -0
mindsdb/utilities/otel/logger.py +25 -0
mindsdb/utilities/otel/meter.py +19 -0
mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
mindsdb/utilities/otel/tracer.py +16 -0
mindsdb/utilities/partitioning.py +52 -0
mindsdb/utilities/render/sqlalchemy_render.py +7 -1
mindsdb/utilities/utils.py +34 -0
mindsdb/utilities/otel.py +0 -72
{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
{MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0

mindsdb/interfaces/chatbot/chatbot_task.py CHANGED Viewed

@@ -53,15 +53,23 @@ class ChatBotTask(BaseTask):
         chat_params = self.chat_handler.get_chat_config()
         polling = chat_params['polling']['type']
+        memory = chat_params['memory']['type'] if 'memory' in chat_params else None
+        memory_cls = None
+        if memory:
+            memory_cls = DBMemory if memory == 'db' else HandlerMemory
         if polling == 'message_count':
             chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
             self.chat_pooling = MessageCountPolling(self, chat_params)
-            self.memory = HandlerMemory(self, chat_params)
+            # The default type for message count polling is HandlerMemory if not specified.
+            self.memory = HandlerMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
         elif polling == 'realtime':
             chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
             self.chat_pooling = RealtimePolling(self, chat_params)
-            self.memory = DBMemory(self, chat_params)
+            # The default type for real-time polling is DBMemory if not specified.
+            self.memory = DBMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
         elif polling == 'webhook':
             self.chat_pooling = WebhookPolling(self, chat_params)
@@ -80,11 +88,11 @@ class ChatBotTask(BaseTask):
         self.chat_pooling.run(stop_event)
     def on_message(self, message: ChatBotMessage, chat_id=None, chat_memory=None, table_name=None):
-        if not chat_id and chat_memory:
+        if not chat_id and not chat_memory:
             raise Exception('chat_id or chat_memory should be provided')
         try:
-            self._on_holding_message(chat_id, table_name)
+            self._on_holding_message(chat_id, chat_memory, table_name)
             self._on_message(message, chat_id, chat_memory, table_name)
         except (SystemExit, KeyboardInterrupt):
             raise
@@ -93,15 +101,18 @@ class ChatBotTask(BaseTask):
             logger.error(error)
             self.set_error(str(error))
-    def _on_holding_message(self, chat_id: str, table_name: str = None):
+    def _on_holding_message(self, chat_id: str = None, chat_memory: BaseMemory = None, table_name: str = None):
         """
         Send a message to hold the user's attention while the bot is processing the request.
         This message will not be saved in the chat memory.
         Args:
             chat_id (str): The ID of the chat.
+            chat_memory (BaseMemory): The memory of the chat.
             table_name (str): The name of the table.
         """
+        chat_id = chat_id if chat_id else chat_memory.chat_id
         response_message = ChatBotMessage(
             ChatBotMessage.Type.DIRECT,
             HOLDING_MESSAGE,

mindsdb/interfaces/chatbot/memory.py CHANGED Viewed

@@ -1,9 +1,9 @@
+from typing import Union
 from mindsdb_sql_parser.ast import Identifier, Select, BinaryOperation, Constant, OrderBy
 from mindsdb.interfaces.storage import db
 from .types import ChatBotMessage
@@ -60,7 +60,7 @@ class BaseMemory:
         # If the chat_id is a tuple, convert it to a string when storing the message in the database.
         self._add_to_history(
-            str(chat_id) if isinstance(chat_id, tuple) else chat_id,
+            chat_id,
             chat_message,
             table_name=table_name
         )
@@ -74,7 +74,7 @@ class BaseMemory:
         else:
             history = self._get_chat_history(
-                str(chat_id) if isinstance(chat_id, tuple) else chat_id,
+                chat_id,
                 table_name
             )
             self._cache[key] = history
@@ -108,18 +108,44 @@ class HandlerMemory(BaseMemory):
         time_col = t_params['time_col']
         chat_id_cols = t_params['chat_id_col'] if isinstance(t_params['chat_id_col'], list) else [t_params['chat_id_col']]
-        ast_query = Select(
-            targets=[Identifier(text_col),
-                     Identifier(username_col),
-                     Identifier(time_col)],
-            from_table=Identifier(t_params['name']),
-            where=[BinaryOperation(
+        chat_id = chat_id if isinstance(chat_id, tuple) else (chat_id,)
+        # Add a WHERE clause for each chat_id column.
+        where_conditions = [
+            BinaryOperation(
                 op='=',
                 args=[
                     Identifier(chat_id_col),
                     Constant(chat_id[idx])
                 ]
-            ) for idx, chat_id_col in enumerate(chat_id_cols)],
+            ) for idx, chat_id_col in enumerate(chat_id_cols)
+        ]
+        # Add a WHERE clause to ignore holding messages from the bot.
+        from .chatbot_task import HOLDING_MESSAGE
+        where_conditions.append(
+            BinaryOperation(
+                op='!=',
+                args=[
+                    Identifier(text_col),
+                    Constant(HOLDING_MESSAGE)
+                ]
+            )
+        )
+        # Convert the WHERE conditions to a BinaryOperation object.
+        where_conditions_binary_operation = None
+        for condition in where_conditions:
+            if where_conditions_binary_operation is None:
+                where_conditions_binary_operation = condition
+            else:
+                where_conditions_binary_operation = BinaryOperation('and', args=[where_conditions_binary_operation, condition])
+        ast_query = Select(
+            targets=[Identifier(text_col),
+                     Identifier(username_col),
+                     Identifier(time_col)],
+            from_table=Identifier(t_params['name']),
+            where=where_conditions_binary_operation,
             order_by=[OrderBy(Identifier(time_col))],
             limit=Constant(self.MAX_DEPTH),
         )
@@ -151,9 +177,28 @@ class DBMemory(BaseMemory):
     uses mindsdb database to store messages
     '''
+    def _generate_chat_id_for_db(self, chat_id: Union[str, tuple], table_name: str = None) -> str:
+        """
+        Generate an ID for the chat to store in the database.
+        The ID is a string that includes the components of the chat ID and the table name (if provided) separated by underscores.
+        Args:
+            chat_id (str | tuple): The ID of the chat.
+            table_name (str): The name of the table the chat belongs to.
+        """
+        if isinstance(chat_id, tuple):
+            char_id_str = "_".join(str(val) for val in chat_id)
+        else:
+            char_id_str = str(chat_id)
+        if table_name:
+            chat_id_str = f"{table_name}_{char_id_str}"
+        return chat_id_str
     def _add_to_history(self, chat_id, message, table_name=None):
         chat_bot_id = self.chat_task.bot_id
-        destination = str((chat_id, table_name)) if table_name else chat_id
+        destination = self._generate_chat_id_for_db(chat_id, table_name)
         message = db.ChatBotsHistory(
             chat_bot_id=chat_bot_id,
@@ -167,7 +212,7 @@ class DBMemory(BaseMemory):
     def _get_chat_history(self, chat_id, table_name=None):
         chat_bot_id = self.chat_task.bot_id
-        destination = str((chat_id, table_name)) if table_name else chat_id
+        destination = self._generate_chat_id_for_db(chat_id, table_name)
         query = db.ChatBotsHistory.query\
             .filter(

mindsdb/interfaces/database/projects.py CHANGED Viewed

@@ -24,19 +24,14 @@ class Project:
         p = Project()
         p.record = db_record
         p.name = db_record.name
-        p.company_id = db_record.company_id
+        p.company_id = ctx.company_id
         p.id = db_record.id
         return p
     def create(self, name: str):
         name = name.lower()
-        existing_record = db.Project.query.filter(
-            (sa.func.lower(db.Project.name) == name)
-            & (db.Project.company_id == ctx.company_id)
-            & (db.Project.deleted_at == sa.null())
-        ).first()
-        if existing_record is not None:
-            raise EntityExistsError('Project already exists', name)
+        company_id = ctx.company_id if ctx.company_id is not None else 0
         existing_record = db.Integration.query.filter(
             sa.func.lower(db.Integration.name) == name,
@@ -45,23 +40,28 @@ class Project:
         if existing_record is not None:
             raise EntityExistsError('Database exists with this name ', name)
+        existing_record = db.Project.query.filter(
+            (sa.func.lower(db.Project.name) == name)
+            & (db.Project.company_id == company_id)
+            & (db.Project.deleted_at == sa.null())
+        ).first()
+        if existing_record is not None:
+            raise EntityExistsError('Project already exists', name)
         record = db.Project(
             name=name,
-            company_id=ctx.company_id
+            company_id=company_id
         )
         self.record = record
         self.name = name
-        self.company_id = ctx.company_id
+        self.company_id = company_id
         db.session.add(record)
         db.session.commit()
         self.id = record.id
-    def save(self):
-        db.session.commit()
     def delete(self):
         tables = self.get_tables()
         tables = [key for key, val in tables.items() if val['type'] != 'table']
@@ -360,8 +360,9 @@ class ProjectController:
         pass
     def get_list(self) -> List[Project]:
+        company_id = ctx.company_id if ctx.company_id is not None else 0
         records = db.Project.query.filter(
-            (db.Project.company_id == ctx.company_id)
+            (db.Project.company_id == company_id)
             & (db.Project.deleted_at == sa.null())
         ).order_by(db.Project.name)
@@ -371,7 +372,8 @@ class ProjectController:
         if id is not None and name is not None:
             raise ValueError("Both 'id' and 'name' is None")
-        q = db.Project.query.filter_by(company_id=ctx.company_id)
+        company_id = ctx.company_id if ctx.company_id is not None else 0
+        q = db.Project.query.filter_by(company_id=company_id)
         if id is not None:
             q = q.filter_by(id=id)

mindsdb/interfaces/database/views.py CHANGED Viewed

@@ -3,6 +3,7 @@ from mindsdb.interfaces.storage import db
 from mindsdb.interfaces.query_context.context_controller import query_context_controller
 from mindsdb.utilities.context import context as ctx
 from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
+from mindsdb.interfaces.model.functions import get_project_record, get_project_records
 class ViewController:
@@ -39,11 +40,8 @@ class ViewController:
     def update(self, name, query, project_name):
         name = name.lower()
-        project_record = db.session.query(db.Project).filter_by(
-            name=project_name,
-            company_id=ctx.company_id,
-            deleted_at=None
-        ).first()
+        project_record = get_project_record(project_name)
         rec = db.session.query(db.View).filter(
             func.lower(db.View.name) == name,
             db.View.company_id == ctx.company_id,
@@ -56,11 +54,8 @@ class ViewController:
     def delete(self, name, project_name):
         name = name.lower()
-        project_record = db.session.query(db.Project).filter_by(
-            name=project_name,
-            company_id=ctx.company_id,
-            deleted_at=None
-        ).first()
+        project_record = get_project_record(project_name)
         rec = db.session.query(db.View).filter(
             func.lower(db.View.name) == name,
             db.View.company_id == ctx.company_id,
@@ -74,17 +69,12 @@ class ViewController:
         query_context_controller.drop_query_context('view', rec.id)
     def list(self, project_name):
-        query = db.session.query(db.Project).filter_by(
-            company_id=ctx.company_id,
-            deleted_at=None
-        )
-        if project_name is not None:
-            query = query.filter_by(name=project_name)
-        project_names = {
-            i.id: i.name
-            for i in query
-        }
+        project_names = {}
+        for project in get_project_records():
+            if project_name is not None and project.name != project_name:
+                continue
+            project_names[project.id] = project.name
         query = db.session.query(db.View).filter(
             db.View.company_id == ctx.company_id,
@@ -112,11 +102,8 @@ class ViewController:
         }
     def get(self, id=None, name=None, project_name=None):
-        project_record = db.session.query(db.Project).filter_by(
-            name=project_name,
-            company_id=ctx.company_id,
-            deleted_at=None
-        ).first()
+        project_record = get_project_record(project_name)
         if id is not None:
             records = db.session.query(db.View).filter_by(
                 id=id,

mindsdb/interfaces/knowledge_base/controller.py CHANGED Viewed

@@ -52,6 +52,7 @@ class KnowledgeBaseTable:
         self.session = session
         self.document_preprocessor = None
         self.document_loader = None
+        self.model_params = None
     def configure_preprocessing(self, config: Optional[dict] = None):
         """Configure preprocessing for the knowledge base table"""
@@ -488,6 +489,7 @@ class KnowledgeBaseTable:
         df_out = project_datanode.predict(
             model_name=model_rec.name,
             df=df,
+            params=self.model_params
         )
         target = model_rec.to_predict[0]
@@ -642,11 +644,13 @@ class KnowledgeBaseController:
             storage: Identifier,
             params: dict,
             preprocessing_config: Optional[dict] = None,
-            if_not_exists: bool = False,
+            if_not_exists: bool = False
     ) -> db.KnowledgeBase:
         """
         Add a new knowledge base to the database
         :param preprocessing_config: Optional preprocessing configuration to validate and store
+        :param is_sparse: Whether to use sparse vectors for embeddings
+        :param vector_size: Optional size specification for vectors, required when is_sparse=True
         """
         # Validate preprocessing config first if provided
         if preprocessing_config is not None:
@@ -654,6 +658,12 @@ class KnowledgeBaseController:
             params = params or {}
             params['preprocessing'] = preprocessing_config
+        # Check if vector_size is provided when using sparse vectors
+        is_sparse = params.get('is_sparse')
+        vector_size = params.get('vector_size')
+        if is_sparse and vector_size is None:
+            raise ValueError("vector_size is required when is_sparse=True")
         # get project id
         project = self.session.database_controller.get_project(project_name)
         project_id = project.id
@@ -693,7 +703,20 @@ class KnowledgeBaseController:
             cloud_pg_vector = os.environ.get('KB_PGVECTOR_URL')
             if cloud_pg_vector:
                 vector_table_name = name
-                vector_db_name = self._create_persistent_pgvector()
+                # Add sparse vector support for pgvector
+                vector_db_params = {}
+                # Check both explicit parameter and model configuration
+                is_sparse = is_sparse or model_record.learn_args.get('using', {}).get('sparse')
+                if is_sparse:
+                    vector_db_params['is_sparse'] = True
+                    if vector_size is not None:
+                        vector_db_params['vector_size'] = vector_size
+                vector_db_name = self._create_persistent_pgvector(vector_db_params)
+                # create table in vectordb before creating KB
+                self.session.datahub.get(vector_db_name).integration_handler.create_table(
+                    vector_table_name
+                )
             else:
                 # create chroma db with same name
                 vector_table_name = "default_collection"
@@ -707,15 +730,14 @@ class KnowledgeBaseController:
         vector_database_id = self.session.integration_controller.get(vector_db_name)['id']
-        # create table in vectordb
-        if model_record.learn_args.get('using', {}).get('sparse') is not None:
-            self.session.datahub.get(vector_db_name).integration_handler.create_table(
-                vector_table_name, sparse=model_record.learn_args.get('using', {}).get('sparse')
-            )
-        else:
-            self.session.datahub.get(vector_db_name).integration_handler.create_table(
-                vector_table_name
-            )
+        # Store sparse vector settings in params if specified
+        if is_sparse:
+            params = params or {}
+            params['vector_config'] = {
+                'is_sparse': is_sparse
+            }
+            if vector_size is not None:
+                params['vector_config']['vector_size'] = vector_size
         kb = db.KnowledgeBase(
             name=name,
@@ -729,16 +751,15 @@ class KnowledgeBaseController:
         db.session.commit()
         return kb
-    def _create_persistent_pgvector(self):
+    def _create_persistent_pgvector(self, params=None):
         """Create default vector database for knowledge base, if not specified"""
         vector_store_name = "kb_pgvector_store"
         # check if exists
         if self.session.integration_controller.get(vector_store_name):
             return vector_store_name
-        self.session.integration_controller.add(vector_store_name, 'pgvector', {})
+        self.session.integration_controller.add(vector_store_name, 'pgvector', params or {})
         return vector_store_name
     def _create_persistent_chroma(self, kb_name, engine="chromadb"):
@@ -840,16 +861,19 @@ class KnowledgeBaseController:
         )
         return kb
-    def get_table(self, name: str, project_id: int) -> KnowledgeBaseTable:
+    def get_table(self, name: str, project_id: int, params: dict = None) -> KnowledgeBaseTable:
         """
         Returns kb table object with properly configured preprocessing
         :param name: table name
         :param project_id: project id
+        :param params: runtime parameters for KB. Keys: 'model' - parameters for embedding model
         :return: kb table object
         """
         kb = self.get(name, project_id)
         if kb is not None:
             table = KnowledgeBaseTable(kb, self.session)
+            if params:
+                table.model_params = params.get('model')
             # Always configure preprocessing - either from params or default
             if kb.params and 'preprocessing' in kb.params:

mindsdb/interfaces/model/functions.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, List
 from sqlalchemy import null, func
@@ -41,9 +41,7 @@ def get_integration_record(name: str) -> db.Integration:
 @profiler.profile()
 def get_project_record(name: str) -> db.Project:
-    company_id = ctx.company_id
-    if company_id is None:
-        company_id = null()
+    company_id = ctx.company_id if ctx.company_id is not None else 0
     project_record = (
         db.session.query(db.Project)
@@ -56,6 +54,19 @@ def get_project_record(name: str) -> db.Project:
     return project_record
+@profiler.profile()
+def get_project_records() -> List[db.Project]:
+    company_id = ctx.company_id if ctx.company_id is not None else 0
+    return (
+        db.session.query(db.Project)
+        .filter(
+            (db.Project.company_id == company_id)
+            & (db.Project.deleted_at == null())
+        ).all()
+    )
 @profiler.profile()
 def get_predictor_integration(record: db.Predictor) -> db.Integration:
     integration_record = (

mindsdb/interfaces/model/model_controller.py CHANGED Viewed

@@ -7,14 +7,15 @@ from multiprocessing.pool import ThreadPool
 import pandas as pd
 from dateutil.parser import parse as parse_datetime
-from sqlalchemy import func, null
+from sqlalchemy import func
 import numpy as np
 import mindsdb.interfaces.storage.db as db
 from mindsdb.utilities.config import Config
 from mindsdb.interfaces.model.functions import (
     get_model_record,
-    get_model_records
+    get_model_records,
+    get_project_record
 )
 from mindsdb.interfaces.storage.json import get_json_storage
 from mindsdb.interfaces.storage.model_fs import ModelStorage
@@ -151,11 +152,7 @@ class ModelController():
     def delete_model(self, model_name: str, project_name: str = 'mindsdb', version=None):
         from mindsdb.interfaces.database.database import DatabaseController
-        project_record = db.Project.query.filter(
-            (func.lower(db.Project.name) == func.lower(project_name))
-            & (db.Project.company_id == ctx.company_id)
-            & (db.Project.deleted_at == null())
-        ).first()
+        project_record = get_project_record(func.lower(project_name))
         if project_record is None:
             raise Exception(f"Project '{project_name}' does not exists")

mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List
+from textwrap import dedent
 from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
 from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool
@@ -11,7 +12,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
     def get_tools(self, prefix='') -> List[BaseTool]:
         """Get the tools in the toolkit."""
-        list_sql_database_tool = ListSQLDatabaseTool(name=f'sql_db_list_tables{prefix}', db=self.db)
+        list_sql_database_tool = ListSQLDatabaseTool(
+            name=f'sql_db_list_tables{prefix}',
+            db=self.db,
+            description=(
+                "Input is an empty string, output is a comma-separated list of tables in the database. "
+                "Each table name in the list may be in one of two formats: database_name.table_name or "
+                "database_name.schema_name.table_name."
+            )
+        )
         info_sql_database_tool_description = (
             "Input: A comma-separated list of tables. Output: Schema and sample rows for those tables. "
@@ -25,43 +34,43 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
             db=self.db, description=info_sql_database_tool_description
         )
-        query_sql_database_tool_description = (
-            "Input: A detailed SQL query. Output: Database result or error message. "
-            "For errors, rewrite and retry the query. For 'Unknown column' errors, use "
-            f"{info_sql_database_tool.name} to check table fields. "
-            "This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases. "
-            "Follow these instructions with utmost precision: "
-            "1. Query Output Format: "
-            "   - Always return results in well-formatted **Markdown tables**. "
-            "   - Ensure clarity and proper structure for easy readability. "
-            "2. Sample Data: "
-            "   - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers. "
-            "3. Categorical Data: "
-            "   - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first. "
-            "   - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses. "
-            "4. Result Limiting and Counting: "
-            "   - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`. "
-            "   - If the count is greater than 10, limit the query to return only 10 results initially. "
-            "   - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results. "
-            "   - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped. "
-            "5. Date Handling: "
-            "   - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date. "
-            "   - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..` "
-            "   - Do not compare date values without casting columns to date. "
-            "   - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples: "
-            "     SELECT NOW() + INTERVAL 5 DAY; "
-            "     SELECT NOW() - INTERVAL 3 HOUR; "
-            "     SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY; "
-            "     SELECT NOW() - INTERVAL 1 YEAR; "
-            "6. Query Best Practices: "
-            "   - Query only necessary columns, not all. "
-            "   - Use only existing column names from correct tables. "
-            "   - Use database-specific syntax for date operations. "
-            "7. Error Handling: "
-            "   - For errors, rewrite and retry the query. "
-            "   - For 'Unknown column' errors, check table fields using info_sql_database_tool. "
-            "Adhere to these guidelines for all queries and responses. Ask for clarification if needed."
-        )
+        query_sql_database_tool_description = dedent(f"""\
+            Input: A detailed SQL query.
+            Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
+            This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
+            Follow these instructions with utmost precision:
+            1. Query Output Format:
+               - Always return results in well-formatted **Markdown tables**.
+               - Ensure clarity and proper structure for easy readability.
+            2. Sample Data:
+               - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers.
+            3. Categorical Data:
+               - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first.
+               - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses.
+            4. Result Limiting and Counting:
+               - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`.
+               - If the count is greater than 10, limit the query to return only 10 results initially.
+               - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results.
+               - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
+            5. Date Handling:
+               - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
+               - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
+               - Do not compare date values without casting columns to date.
+               - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
+                 SELECT NOW() + INTERVAL 5 DAY;
+                 SELECT NOW() - INTERVAL 3 HOUR;
+                 SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY;
+                 SELECT NOW() - INTERVAL 1 YEAR;
+            6. Query Best Practices:
+               - Always send only one query at a time.
+               - Query only necessary columns, not all.
+               - Use only existing column names from correct tables.
+               - Use database-specific syntax for date operations.
+            7. Error Handling:
+               - For errors, rewrite and retry the query.
+               - For 'Unknown column' errors, check table fields using info_sql_database_tool.
+            Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
+        """)
         query_sql_database_tool = QuerySQLDataBaseTool(
             name=f'sql_db_query{prefix}',

mindsdb/interfaces/skills/retrieval_tool.py CHANGED Viewed

@@ -43,10 +43,17 @@ def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
             raise ValueError(f"Knowledge base not found: {kb_name}")
         kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
+        vector_store_config = {
+            'kb_table': kb_table
+        }
+        is_sparse = tools_config.pop('is_sparse', None)
+        vector_size = tools_config.pop('vector_size', None)
+        if is_sparse is not None:
+            vector_store_config['is_sparse'] = is_sparse
+        if vector_size is not None:
+            vector_store_config['vector_size'] = vector_size
         kb_params = {
-            'vector_store_config': {
-                'kb_table': kb_table
-            }
+            'vector_store_config': vector_store_config
         }
         # Get embedding model from knowledge base table

MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.1.2.1py3-none-any.whl → 25.1.4.0py3-none-any.whl