PyPI - MindsDB - Versions diffs - 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl - Mend

MindsDB 25.1.3.0py3-none-any.whl → 25.1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (55) hide show

mindsdb/interfaces/chatbot/memory.py CHANGED Viewed

@@ -1,9 +1,9 @@
+from typing import Union
 from mindsdb_sql_parser.ast import Identifier, Select, BinaryOperation, Constant, OrderBy
 from mindsdb.interfaces.storage import db
 from .types import ChatBotMessage
@@ -60,7 +60,7 @@ class BaseMemory:
         # If the chat_id is a tuple, convert it to a string when storing the message in the database.
         self._add_to_history(
-            str(chat_id) if isinstance(chat_id, tuple) else chat_id,
+            chat_id,
             chat_message,
             table_name=table_name
         )
@@ -74,7 +74,7 @@ class BaseMemory:
         else:
             history = self._get_chat_history(
-                str(chat_id) if isinstance(chat_id, tuple) else chat_id,
+                chat_id,
                 table_name
             )
             self._cache[key] = history
@@ -108,18 +108,44 @@ class HandlerMemory(BaseMemory):
         time_col = t_params['time_col']
         chat_id_cols = t_params['chat_id_col'] if isinstance(t_params['chat_id_col'], list) else [t_params['chat_id_col']]
-        ast_query = Select(
-            targets=[Identifier(text_col),
-                     Identifier(username_col),
-                     Identifier(time_col)],
-            from_table=Identifier(t_params['name']),
-            where=[BinaryOperation(
+        chat_id = chat_id if isinstance(chat_id, tuple) else (chat_id,)
+        # Add a WHERE clause for each chat_id column.
+        where_conditions = [
+            BinaryOperation(
                 op='=',
                 args=[
                     Identifier(chat_id_col),
                     Constant(chat_id[idx])
                 ]
-            ) for idx, chat_id_col in enumerate(chat_id_cols)],
+            ) for idx, chat_id_col in enumerate(chat_id_cols)
+        ]
+        # Add a WHERE clause to ignore holding messages from the bot.
+        from .chatbot_task import HOLDING_MESSAGE
+        where_conditions.append(
+            BinaryOperation(
+                op='!=',
+                args=[
+                    Identifier(text_col),
+                    Constant(HOLDING_MESSAGE)
+                ]
+            )
+        )
+        # Convert the WHERE conditions to a BinaryOperation object.
+        where_conditions_binary_operation = None
+        for condition in where_conditions:
+            if where_conditions_binary_operation is None:
+                where_conditions_binary_operation = condition
+            else:
+                where_conditions_binary_operation = BinaryOperation('and', args=[where_conditions_binary_operation, condition])
+        ast_query = Select(
+            targets=[Identifier(text_col),
+                     Identifier(username_col),
+                     Identifier(time_col)],
+            from_table=Identifier(t_params['name']),
+            where=where_conditions_binary_operation,
             order_by=[OrderBy(Identifier(time_col))],
             limit=Constant(self.MAX_DEPTH),
         )
@@ -151,9 +177,28 @@ class DBMemory(BaseMemory):
     uses mindsdb database to store messages
     '''
+    def _generate_chat_id_for_db(self, chat_id: Union[str, tuple], table_name: str = None) -> str:
+        """
+        Generate an ID for the chat to store in the database.
+        The ID is a string that includes the components of the chat ID and the table name (if provided) separated by underscores.
+        Args:
+            chat_id (str | tuple): The ID of the chat.
+            table_name (str): The name of the table the chat belongs to.
+        """
+        if isinstance(chat_id, tuple):
+            char_id_str = "_".join(str(val) for val in chat_id)
+        else:
+            char_id_str = str(chat_id)
+        if table_name:
+            chat_id_str = f"{table_name}_{char_id_str}"
+        return chat_id_str
     def _add_to_history(self, chat_id, message, table_name=None):
         chat_bot_id = self.chat_task.bot_id
-        destination = str((chat_id, table_name)) if table_name else chat_id
+        destination = self._generate_chat_id_for_db(chat_id, table_name)
         message = db.ChatBotsHistory(
             chat_bot_id=chat_bot_id,
@@ -167,7 +212,7 @@ class DBMemory(BaseMemory):
     def _get_chat_history(self, chat_id, table_name=None):
         chat_bot_id = self.chat_task.bot_id
-        destination = str((chat_id, table_name)) if table_name else chat_id
+        destination = self._generate_chat_id_for_db(chat_id, table_name)
         query = db.ChatBotsHistory.query\
             .filter(

mindsdb/interfaces/database/projects.py CHANGED Viewed

@@ -24,19 +24,14 @@ class Project:
         p = Project()
         p.record = db_record
         p.name = db_record.name
-        p.company_id = db_record.company_id
+        p.company_id = ctx.company_id
         p.id = db_record.id
         return p
     def create(self, name: str):
         name = name.lower()
-        existing_record = db.Project.query.filter(
-            (sa.func.lower(db.Project.name) == name)
-            & (db.Project.company_id == ctx.company_id)
-            & (db.Project.deleted_at == sa.null())
-        ).first()
-        if existing_record is not None:
-            raise EntityExistsError('Project already exists', name)
+        company_id = ctx.company_id if ctx.company_id is not None else 0
         existing_record = db.Integration.query.filter(
             sa.func.lower(db.Integration.name) == name,
@@ -45,23 +40,28 @@ class Project:
         if existing_record is not None:
             raise EntityExistsError('Database exists with this name ', name)
+        existing_record = db.Project.query.filter(
+            (sa.func.lower(db.Project.name) == name)
+            & (db.Project.company_id == company_id)
+            & (db.Project.deleted_at == sa.null())
+        ).first()
+        if existing_record is not None:
+            raise EntityExistsError('Project already exists', name)
         record = db.Project(
             name=name,
-            company_id=ctx.company_id
+            company_id=company_id
         )
         self.record = record
         self.name = name
-        self.company_id = ctx.company_id
+        self.company_id = company_id
         db.session.add(record)
         db.session.commit()
         self.id = record.id
-    def save(self):
-        db.session.commit()
     def delete(self):
         tables = self.get_tables()
         tables = [key for key, val in tables.items() if val['type'] != 'table']
@@ -360,8 +360,9 @@ class ProjectController:
         pass
     def get_list(self) -> List[Project]:
+        company_id = ctx.company_id if ctx.company_id is not None else 0
         records = db.Project.query.filter(
-            (db.Project.company_id == ctx.company_id)
+            (db.Project.company_id == company_id)
             & (db.Project.deleted_at == sa.null())
         ).order_by(db.Project.name)
@@ -371,7 +372,8 @@ class ProjectController:
         if id is not None and name is not None:
             raise ValueError("Both 'id' and 'name' is None")
-        q = db.Project.query.filter_by(company_id=ctx.company_id)
+        company_id = ctx.company_id if ctx.company_id is not None else 0
+        q = db.Project.query.filter_by(company_id=company_id)
         if id is not None:
             q = q.filter_by(id=id)

mindsdb/interfaces/database/views.py CHANGED Viewed

@@ -3,6 +3,7 @@ from mindsdb.interfaces.storage import db
 from mindsdb.interfaces.query_context.context_controller import query_context_controller
 from mindsdb.utilities.context import context as ctx
 from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
+from mindsdb.interfaces.model.functions import get_project_record, get_project_records
 class ViewController:
@@ -39,11 +40,8 @@ class ViewController:
     def update(self, name, query, project_name):
         name = name.lower()
-        project_record = db.session.query(db.Project).filter_by(
-            name=project_name,
-            company_id=ctx.company_id,
-            deleted_at=None
-        ).first()
+        project_record = get_project_record(project_name)
         rec = db.session.query(db.View).filter(
             func.lower(db.View.name) == name,
             db.View.company_id == ctx.company_id,
@@ -56,11 +54,8 @@ class ViewController:
     def delete(self, name, project_name):
         name = name.lower()
-        project_record = db.session.query(db.Project).filter_by(
-            name=project_name,
-            company_id=ctx.company_id,
-            deleted_at=None
-        ).first()
+        project_record = get_project_record(project_name)
         rec = db.session.query(db.View).filter(
             func.lower(db.View.name) == name,
             db.View.company_id == ctx.company_id,
@@ -74,17 +69,12 @@ class ViewController:
         query_context_controller.drop_query_context('view', rec.id)
     def list(self, project_name):
-        query = db.session.query(db.Project).filter_by(
-            company_id=ctx.company_id,
-            deleted_at=None
-        )
-        if project_name is not None:
-            query = query.filter_by(name=project_name)
-        project_names = {
-            i.id: i.name
-            for i in query
-        }
+        project_names = {}
+        for project in get_project_records():
+            if project_name is not None and project.name != project_name:
+                continue
+            project_names[project.id] = project.name
         query = db.session.query(db.View).filter(
             db.View.company_id == ctx.company_id,
@@ -112,11 +102,8 @@ class ViewController:
         }
     def get(self, id=None, name=None, project_name=None):
-        project_record = db.session.query(db.Project).filter_by(
-            name=project_name,
-            company_id=ctx.company_id,
-            deleted_at=None
-        ).first()
+        project_record = get_project_record(project_name)
         if id is not None:
             records = db.session.query(db.View).filter_by(
                 id=id,

mindsdb/interfaces/knowledge_base/controller.py CHANGED Viewed

@@ -52,6 +52,7 @@ class KnowledgeBaseTable:
         self.session = session
         self.document_preprocessor = None
         self.document_loader = None
+        self.model_params = None
     def configure_preprocessing(self, config: Optional[dict] = None):
         """Configure preprocessing for the knowledge base table"""
@@ -488,6 +489,7 @@ class KnowledgeBaseTable:
         df_out = project_datanode.predict(
             model_name=model_rec.name,
             df=df,
+            params=self.model_params
         )
         target = model_rec.to_predict[0]
@@ -859,16 +861,19 @@ class KnowledgeBaseController:
         )
         return kb
-    def get_table(self, name: str, project_id: int) -> KnowledgeBaseTable:
+    def get_table(self, name: str, project_id: int, params: dict = None) -> KnowledgeBaseTable:
         """
         Returns kb table object with properly configured preprocessing
         :param name: table name
         :param project_id: project id
+        :param params: runtime parameters for KB. Keys: 'model' - parameters for embedding model
         :return: kb table object
         """
         kb = self.get(name, project_id)
         if kb is not None:
             table = KnowledgeBaseTable(kb, self.session)
+            if params:
+                table.model_params = params.get('model')
             # Always configure preprocessing - either from params or default
             if kb.params and 'preprocessing' in kb.params:

mindsdb/interfaces/model/functions.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, List
 from sqlalchemy import null, func
@@ -41,9 +41,7 @@ def get_integration_record(name: str) -> db.Integration:
 @profiler.profile()
 def get_project_record(name: str) -> db.Project:
-    company_id = ctx.company_id
-    if company_id is None:
-        company_id = null()
+    company_id = ctx.company_id if ctx.company_id is not None else 0
     project_record = (
         db.session.query(db.Project)
@@ -56,6 +54,19 @@ def get_project_record(name: str) -> db.Project:
     return project_record
+@profiler.profile()
+def get_project_records() -> List[db.Project]:
+    company_id = ctx.company_id if ctx.company_id is not None else 0
+    return (
+        db.session.query(db.Project)
+        .filter(
+            (db.Project.company_id == company_id)
+            & (db.Project.deleted_at == null())
+        ).all()
+    )
 @profiler.profile()
 def get_predictor_integration(record: db.Predictor) -> db.Integration:
     integration_record = (

mindsdb/interfaces/model/model_controller.py CHANGED Viewed

@@ -7,14 +7,15 @@ from multiprocessing.pool import ThreadPool
 import pandas as pd
 from dateutil.parser import parse as parse_datetime
-from sqlalchemy import func, null
+from sqlalchemy import func
 import numpy as np
 import mindsdb.interfaces.storage.db as db
 from mindsdb.utilities.config import Config
 from mindsdb.interfaces.model.functions import (
     get_model_record,
-    get_model_records
+    get_model_records,
+    get_project_record
 )
 from mindsdb.interfaces.storage.json import get_json_storage
 from mindsdb.interfaces.storage.model_fs import ModelStorage
@@ -151,11 +152,7 @@ class ModelController():
     def delete_model(self, model_name: str, project_name: str = 'mindsdb', version=None):
         from mindsdb.interfaces.database.database import DatabaseController
-        project_record = db.Project.query.filter(
-            (func.lower(db.Project.name) == func.lower(project_name))
-            & (db.Project.company_id == ctx.company_id)
-            & (db.Project.deleted_at == null())
-        ).first()
+        project_record = get_project_record(func.lower(project_name))
         if project_record is None:
             raise Exception(f"Project '{project_name}' does not exists")

mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List
+from textwrap import dedent
 from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
 from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool
@@ -11,7 +12,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
     def get_tools(self, prefix='') -> List[BaseTool]:
         """Get the tools in the toolkit."""
-        list_sql_database_tool = ListSQLDatabaseTool(name=f'sql_db_list_tables{prefix}', db=self.db)
+        list_sql_database_tool = ListSQLDatabaseTool(
+            name=f'sql_db_list_tables{prefix}',
+            db=self.db,
+            description=(
+                "Input is an empty string, output is a comma-separated list of tables in the database. "
+                "Each table name in the list may be in one of two formats: database_name.table_name or "
+                "database_name.schema_name.table_name."
+            )
+        )
         info_sql_database_tool_description = (
             "Input: A comma-separated list of tables. Output: Schema and sample rows for those tables. "
@@ -25,43 +34,43 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
             db=self.db, description=info_sql_database_tool_description
         )
-        query_sql_database_tool_description = (
-            "Input: A detailed SQL query. Output: Database result or error message. "
-            "For errors, rewrite and retry the query. For 'Unknown column' errors, use "
-            f"{info_sql_database_tool.name} to check table fields. "
-            "This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases. "
-            "Follow these instructions with utmost precision: "
-            "1. Query Output Format: "
-            "   - Always return results in well-formatted **Markdown tables**. "
-            "   - Ensure clarity and proper structure for easy readability. "
-            "2. Sample Data: "
-            "   - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers. "
-            "3. Categorical Data: "
-            "   - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first. "
-            "   - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses. "
-            "4. Result Limiting and Counting: "
-            "   - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`. "
-            "   - If the count is greater than 10, limit the query to return only 10 results initially. "
-            "   - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results. "
-            "   - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped. "
-            "5. Date Handling: "
-            "   - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date. "
-            "   - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..` "
-            "   - Do not compare date values without casting columns to date. "
-            "   - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples: "
-            "     SELECT NOW() + INTERVAL 5 DAY; "
-            "     SELECT NOW() - INTERVAL 3 HOUR; "
-            "     SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY; "
-            "     SELECT NOW() - INTERVAL 1 YEAR; "
-            "6. Query Best Practices: "
-            "   - Query only necessary columns, not all. "
-            "   - Use only existing column names from correct tables. "
-            "   - Use database-specific syntax for date operations. "
-            "7. Error Handling: "
-            "   - For errors, rewrite and retry the query. "
-            "   - For 'Unknown column' errors, check table fields using info_sql_database_tool. "
-            "Adhere to these guidelines for all queries and responses. Ask for clarification if needed."
-        )
+        query_sql_database_tool_description = dedent(f"""\
+            Input: A detailed SQL query.
+            Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
+            This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
+            Follow these instructions with utmost precision:
+            1. Query Output Format:
+               - Always return results in well-formatted **Markdown tables**.
+               - Ensure clarity and proper structure for easy readability.
+            2. Sample Data:
+               - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers.
+            3. Categorical Data:
+               - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first.
+               - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses.
+            4. Result Limiting and Counting:
+               - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`.
+               - If the count is greater than 10, limit the query to return only 10 results initially.
+               - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results.
+               - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
+            5. Date Handling:
+               - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
+               - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
+               - Do not compare date values without casting columns to date.
+               - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
+                 SELECT NOW() + INTERVAL 5 DAY;
+                 SELECT NOW() - INTERVAL 3 HOUR;
+                 SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY;
+                 SELECT NOW() - INTERVAL 1 YEAR;
+            6. Query Best Practices:
+               - Always send only one query at a time.
+               - Query only necessary columns, not all.
+               - Use only existing column names from correct tables.
+               - Use database-specific syntax for date operations.
+            7. Error Handling:
+               - For errors, rewrite and retry the query.
+               - For 'Unknown column' errors, check table fields using info_sql_database_tool.
+            Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
+        """)
         query_sql_database_tool = QuerySQLDataBaseTool(
             name=f'sql_db_query{prefix}',

MindsDB 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.1.3.0py3-none-any.whl → 25.1.4.0py3-none-any.whl