PyPI - MindsDB - Versions diffs - 25.3.2.0__py3-none-any.whl → 25.3.4.0__py3-none-any.whl - Mend

MindsDB 25.3.2.0py3-none-any.whl → 25.3.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (45) hide show

mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py ADDED Viewed

@@ -0,0 +1,431 @@
+from typing import List
+import pandas as pd
+from mindsdb.integrations.handlers.ms_teams_handler.ms_graph_api_teams_client import MSGraphAPITeamsDelegatedPermissionsClient
+from mindsdb.integrations.libs.api_handler import APIResource
+from mindsdb.integrations.utilities.sql_utils import (
+    FilterCondition,
+    FilterOperator,
+    SortColumn
+)
+class TeamsTable(APIResource):
+    """
+    The table abstraction for the 'teams' resource of the Microsoft Graph API.
+    """
+    def list(
+        self,
+        conditions: List[FilterCondition] = None,
+        limit: int = None,
+        sort: List[SortColumn] = None,
+        targets: List[str] = None,
+        **kwargs
+    ):
+        """
+        Executes a parsed SELECT SQL query on the 'teams' resource of the Microsoft Graph API.
+        Args:
+            conditions (List[FilterCondition]): The list of parsed filter conditions.
+            limit (int): The maximum number of records to return.
+            sort (List[SortColumn]): The list of parsed sort columns.
+            targets (List[str]): The list of target columns to return.
+        """
+        client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect()
+        teams = client.get_all_groups()
+        teams_df = pd.json_normalize(teams, sep="_")
+        teams_df = teams_df[self.get_columns()]
+        return teams_df
+    def get_columns(self) -> List[str]:
+        """
+        Retrieves the attributes (columns) of the 'teams' resource.
+        Returns:
+            List[Text]: A list of attributes (columns) of the 'teams' resource.
+        """
+        return [
+            "id",
+            "createdDateTime",
+            "displayName",
+            "description",
+            "internalId",
+            "classification",
+            "specialization",
+            "visibility",
+            "webUrl",
+            "isArchived",
+            "tenantId",
+            "isMembershipLimitedToOwners",
+        ]
+class ChannelsTable(APIResource):
+    """
+    The table abstraction for the 'channels' resource of the Microsoft Graph API.
+    """
+    def list(
+        self,
+        conditions: List[FilterCondition] = None,
+        limit: int = None,
+        sort: List[SortColumn] = None,
+        targets: List[str] = None,
+        **kwargs
+    ):
+        """
+        Executes a parsed SELECT SQL query on the 'channels' resource of the Microsoft Graph API.
+        Args:
+            conditions (List[FilterCondition]): The list of parsed filter conditions.
+            limit (int): The maximum number of records to return.
+            sort (List[SortColumn]): The list of parsed sort columns.
+            targets (List[str]): The list of target columns to return.
+        """
+        client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect()
+        channels = []
+        team_id, channel_ids = None, None
+        for condition in conditions:
+            if condition.column == "teamId":
+                if condition.op == FilterOperator.EQUAL:
+                    team_id = condition.value
+                else:
+                    raise ValueError(
+                        f"Unsupported operator '{condition.op}' for column 'teamId'."
+                    )
+                condition.applied = True
+            if condition.column == "id":
+                if condition.op == FilterOperator.EQUAL:
+                    channel_ids = [condition.value]
+                elif condition.op == FilterOperator.IN:
+                    channel_ids = condition.value
+                else:
+                    raise ValueError(
+                        f"Unsupported operator '{condition.op}' for column 'id'."
+                    )
+                condition.applied = True
+        if team_id:
+            if channel_ids:
+                channels = client.get_channels_in_group_by_ids(team_id, channel_ids)
+            else:
+                channels = client.get_all_channels_in_group(team_id)
+        elif channel_ids:
+            channels = client.get_channels_across_all_groups_by_ids(channel_ids)
+        else:
+            channels = client.get_all_channels_across_all_groups()
+        channels_df = pd.json_normalize(channels, sep="_")
+        channels_df = channels_df[self.get_columns()]
+        return channels_df
+    def get_columns(self) -> List[str]:
+        """
+        Retrieves the attributes (columns) of the 'chats' resource.
+        Returns:
+            List[Text]: A list of attributes (columns) of the 'chats' resource.
+        """
+        return [
+            "id",
+            "createdDateTime",
+            "displayName",
+            "description",
+            "isFavoriteByDefault",
+            "email",
+            "tenantId",
+            "webUrl",
+            "membershipType",
+            "teamId",
+        ]
+class ChannelMessagesTable(APIResource):
+    """
+    The table abstraction for the 'channel messages' resource of the Microsoft Graph API.
+    """
+    def list(
+        self,
+        conditions: List[FilterCondition] = None,
+        limit: int = None,
+        sort: List[SortColumn] = None,
+        targets: List[str] = None,
+        **kwargs
+    ):
+        """
+        Executes a parsed SELECT SQL query on the 'channel messages' resource of the Microsoft Graph API.
+        Args:
+            conditions (List[FilterCondition]): The list of parsed filter conditions.
+            limit (int): The maximum number of records to return.
+            sort (List[SortColumn]): The list of parsed sort columns.
+            targets (List[str]): The list of target columns to return.
+        """
+        client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect()
+        messages = []
+        group_id, channel_id, message_ids = None, None, None
+        for condition in conditions:
+            if condition.column == "channelIdentity_teamId":
+                if condition.op == FilterOperator.EQUAL:
+                    group_id = condition.value
+                else:
+                    raise ValueError(
+                        f"Unsupported operator '{condition.op}' for column 'channelIdentity_teamId'."
+                    )
+                condition.applied = True
+            if condition.column == "channelIdentity_channelId":
+                if condition.op == FilterOperator.EQUAL:
+                    channel_id = condition.value
+                else:
+                    raise ValueError(
+                        f"Unsupported operator '{condition.op}' for column 'channelIdentity_channelId'."
+                    )
+                condition.applied = True
+            if condition.column == "id":
+                if condition.op == FilterOperator.EQUAL:
+                    message_ids = [condition.value]
+                elif condition.op == FilterOperator.IN:
+                    message_ids = condition.value
+                else:
+                    raise ValueError(
+                        f"Unsupported operator '{condition.op}' for column 'id'."
+                    )
+                condition.applied = True
+        if not group_id or not channel_id:
+            raise ValueError("The 'channelIdentity_teamId' and 'channelIdentity_channelId' columns are required.")
+        if message_ids:
+            messages = client.get_messages_in_channel_by_ids(group_id, channel_id, message_ids)
+        else:
+            messages = client.get_all_messages_in_channel(group_id, channel_id, limit)
+        messages_df = pd.json_normalize(messages, sep="_")
+        messages_df = messages_df[self.get_columns()]
+        return messages_df
+    def get_columns(self) -> List[str]:
+        """
+        Retrieves the attributes (columns) of the 'chat messages' resource.
+        Returns:
+            List[Text]: A list of attributes (columns) of the 'chat messages' resource.
+        """
+        return [
+            "id",
+            "replyToId",
+            "etag",
+            "messageType",
+            "createdDateTime",
+            "lastModifiedDateTime",
+            "lastEditedDateTime",
+            "deletedDateTime",
+            "subject",
+            "summary",
+            "chatId",
+            "importance",
+            "locale",
+            "webUrl",
+            "policyViolation",
+            "from_application",
+            "from_device",
+            "from_user_id",
+            "from_user_displayName",
+            "from_user_userIdentityType",
+            "body_contentType",
+            "body_content",
+            "channelIdentity_teamId",
+            "channelIdentity_channelId",
+        ]
+class ChatsTable(APIResource):
+    """
+    The table abstraction for the 'chats' resource of the Microsoft Graph API.
+    """
+    def list(
+        self,
+        conditions: List[FilterCondition] = None,
+        limit: int = None,
+        sort: List[SortColumn] = None,
+        targets: List[str] = None,
+        **kwargs
+    ):
+        """
+        Executes a parsed SELECT SQL query on the 'chats' resource of the Microsoft Graph API.
+        Args:
+            conditions (List[FilterCondition]): The list of parsed filter conditions.
+            limit (int): The maximum number of records to return.
+            sort (List[SortColumn]): The list of parsed sort columns.
+            targets (List[str]): The list of target columns to return.
+        """
+        client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect()
+        chats = []
+        chat_ids = None
+        for condition in conditions:
+            if condition.column == "id":
+                if condition.op == FilterOperator.EQUAL:
+                    chat_ids = [condition.value]
+                elif condition.op == FilterOperator.IN:
+                    chat_ids = condition.value
+                else:
+                    raise ValueError(
+                        f"Unsupported operator '{condition.op}' for column 'id'."
+                    )
+                condition.applied = True
+        if chat_ids:
+            chats = client.get_chats_by_ids(chat_ids)
+        else:
+            chats = client.get_all_chats(limit)
+        chats_df = pd.json_normalize(chats, sep="_")
+        chats_df = chats_df[self.get_columns()]
+        return chats_df
+    def get_columns(self) -> List[str]:
+        """
+        Retrieves the attributes (columns) of the 'chats' resource.
+        Returns:
+            List[Text]: A list of attributes (columns) of the 'chats' resource.
+        """
+        return [
+            "id",
+            "topic",
+            "createdDateTime",
+            "lastUpdatedDateTime",
+            "chatType",
+            "webUrl",
+            "isHiddenForAllMembers"
+        ]
+class ChatMessagesTable(APIResource):
+    """
+    The table abstraction for the 'chat messages' resource of the Microsoft Graph API.
+    """
+    def list(
+        self,
+        conditions: List[FilterCondition] = None,
+        limit: int = None,
+        sort: List[SortColumn] = None,
+        targets: List[str] = None,
+        **kwargs
+    ):
+        """
+        Executes a parsed SELECT SQL query on the 'chat messages' resource of the Microsoft Graph API.
+        Args:
+            conditions (List[FilterCondition]): The list of parsed filter conditions.
+            limit (int): The maximum number of records to return.
+            sort (List[SortColumn]): The list of parsed sort columns.
+            targets (List[str]): The list of target columns to return.
+        """
+        client: MSGraphAPITeamsDelegatedPermissionsClient = self.handler.connect()
+        messages = []
+        chat_id, message_ids = None, None
+        for condition in conditions:
+            if condition.column == "chatId":
+                if condition.op == FilterOperator.EQUAL:
+                    chat_id = condition.value
+                else:
+                    raise ValueError(
+                        f"Unsupported operator '{condition.op}' for column 'chatId'."
+                    )
+                condition.applied = True
+            if condition.column == "id":
+                if condition.op == FilterOperator.EQUAL:
+                    message_ids = [condition.value]
+                elif condition.op == FilterOperator.IN:
+                    message_ids = condition.value
+                else:
+                    raise ValueError(
+                        f"Unsupported operator '{condition.op}' for column 'id'."
+                    )
+                condition.applied = True
+        if not chat_id:
+            raise ValueError("The 'chatId' column is required.")
+        if message_ids:
+            messages = client.get_messages_in_chat_by_ids(chat_id, message_ids)
+        else:
+            messages = client.get_all_messages_in_chat(chat_id, limit)
+        messages_df = pd.json_normalize(messages, sep="_")
+        messages_df = messages_df[self.get_columns()]
+        return messages_df
+    def get_columns(self) -> List[str]:
+        """
+        Retrieves the attributes (columns) of the 'chat messages' resource.
+        Returns:
+            List[Text]: A list of attributes (columns) of the 'chat messages' resource.
+        """
+        return [
+            "id",
+            "replyToId",
+            "etag",
+            "messageType",
+            "createdDateTime",
+            "lastModifiedDateTime",
+            "lastEditedDateTime",
+            "deletedDateTime",
+            "subject",
+            "summary",
+            "chatId",
+            "importance",
+            "locale",
+            "webUrl",
+            "policyViolation",
+            "from_application",
+            "from_device",
+            "from_user_id",
+            "from_user_displayName",
+            "from_user_userIdentityType",
+            "body_contentType",
+            "body_content",
+        ]

mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py CHANGED Viewed

@@ -114,13 +114,27 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
         if conditions is None:
             return {}
-        return {
-            condition.column.split(".")[-1]: {
+        filter_conditions = {}
+        for condition in conditions:
+            parts = condition.column.split(".")
+            key = parts[0]
+            # converts 'col.el1.el2' to col->'el1'->>'el2'
+            if len(parts) > 1:
+                # intermediate elements
+                for el in parts[1:-1]:
+                    key += f" -> '{el}'"
+                # last element
+                key += f" ->> '{parts[-1]}'"
+            filter_conditions[key] = {
                 "op": condition.op.value,
                 "value": condition.value,
             }
-            for condition in conditions
-        }
+        return filter_conditions
     @staticmethod
     def _construct_where_clause(filter_conditions=None):

mindsdb/integrations/handlers/redshift_handler/redshift_handler.py CHANGED Viewed

@@ -57,6 +57,7 @@ class RedshiftHandler(PostgresHandler):
                 connection.commit()
             except Exception as e:
                 logger.error(f"Error inserting data into {table_name}, {e}!")
+                connection.rollback()
                 response = Response(
                     RESPONSE_TYPE.ERROR,
                     error_code=0,

mindsdb/integrations/handlers/salesforce_handler/requirements.txt CHANGED Viewed

	@@ -1 +1 @@
1	- salesforce_api
1	+ salesforce_api==0.1.45

mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py CHANGED Viewed

@@ -39,31 +39,8 @@ class SalesforceHandler(APIHandler):
         self.connection = None
         self.is_connected = False
-        # Register Salesforce tables.
-        self.resource_names = {
-            'Account',
-            'Contact',
-            'Opportunity',
-            'Lead',
-            'Task',
-            'Event',
-            'User',
-            'Product2',
-            'Pricebook2',
-            'PricebookEntry',
-            'Order',
-            'OrderItem',
-            'Case',
-            'Campaign',
-            'CampaignMember',
-            'Contract',
-            'Asset'
-        }
-        for resource_name in self.resource_names:
-            table_class = create_table_class(resource_name, resource_name)
-            self._register_table(resource_name, table_class(self))
+        self.thread_safe = True
+        self.resource_names = []
     def connect(self) -> salesforce_api.client.Client:
         """
@@ -92,6 +69,12 @@ class SalesforceHandler(APIHandler):
                 is_sandbox=self.connection_data.get('is_sandbox', False)
             )
             self.is_connected = True
+            # Register Salesforce tables.
+            for resource_name in self._get_resource_names():
+                table_class = create_table_class(resource_name)
+                self._register_table(resource_name.lower(), table_class(self))
             return self.connection
         except AuthenticationError as auth_error:
             logger.error(f"Authentication error connecting to Salesforce, {auth_error}!")
@@ -179,3 +162,15 @@ class SalesforceHandler(APIHandler):
             )
         return response
+    def _get_resource_names(self) -> None:
+        """
+        Retrieves the names of the Salesforce resources.
+        Returns:
+            None
+        """
+        if not self.resource_names:
+            self.resource_names = [resource['name'] for resource in self.connection.sobjects.describe()['sobjects']]
+        return self.resource_names

mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py CHANGED Viewed

@@ -11,7 +11,7 @@ from mindsdb.utilities import log
 logger = log.getLogger(__name__)
-def create_table_class(table_name: Text, resource_name: Text) -> APIResource:
+def create_table_class(resource_name: Text) -> APIResource:
     """
     Creates a table class for the given Salesforce resource.
     """
@@ -31,7 +31,7 @@ def create_table_class(table_name: Text, resource_name: Text) -> APIResource:
             Returns:
                 pd.DataFrame: A DataFrame containing the data retrieved from the Salesforce resource.
             """
-            query.from_table = table_name
+            query.from_table = resource_name
             # SOQL does not support * in SELECT queries. Replace * with column names.
             if isinstance(query.targets[0], Star):

mindsdb/integrations/handlers/timescaledb_handler/timescaledb_handler.py CHANGED Viewed

@@ -10,8 +10,6 @@ class TimeScaleDBHandler(PostgresHandler):
         super().__init__(name, **kwargs)
 connection_args = OrderedDict(
     host={
         'type': ARG_TYPE.STR,
@@ -31,6 +29,12 @@ connection_args = OrderedDict(
         'type': ARG_TYPE.STR,
         'description': 'The password to authenticate the user with the TimeScaleDB server.'
     },
+    schema={
+        'type': ARG_TYPE.STR,
+        'description': 'The schema in which objects are searched first.',
+        'required': False,
+        'label': 'Schema'
+    },
     port={
         'type': ARG_TYPE.INT,
         'description': 'Specify port to connect TimeScaleDB '
@@ -39,8 +43,9 @@ connection_args = OrderedDict(
 connection_args_example = OrderedDict(
     host='127.0.0.1',
-    port=36806,
-    password='P455W0rD',
-    user='tsdbadmin',
-    database="tsdb"
+    port=5432,
+    password='password',
+    user='root',
+    database="timescaledb",
+    schema='public'
 )

mindsdb/integrations/libs/ml_handler_process/learn_process.py CHANGED Viewed

@@ -111,10 +111,16 @@ def learn_process(data_integration_ref: dict, problem_definition: dict, fetch_da
             )
             handlers_cacher[predictor_record.id] = ml_handler
-            if not ml_handler.generative:
+            if not ml_handler.generative and target is not None:
                 if training_data_df is not None and target not in training_data_df.columns:
-                    raise Exception(
-                        f'Prediction target "{target}" not found in training dataframe: {list(training_data_df.columns)}')
+                    # is the case different? convert column case in input dataframe
+                    col_names = {c.lower(): c for c in training_data_df.columns}
+                    target_found = col_names.get(target.lower())
+                    if target_found:
+                        training_data_df.rename(columns={target_found: target}, inplace=True)
+                    else:
+                        raise Exception(
+                            f'Prediction target "{target}" not found in training dataframe: {list(training_data_df.columns)}')
             # create new model
             if base_model_id is None:

mindsdb/integrations/libs/vectordatabase_handler.py CHANGED Viewed

@@ -325,7 +325,7 @@ class VectorStoreHandler(BaseHandler):
         if not df_insert.empty:
             self.insert(table_name, df_insert)
-    def _dispatch_delete(self, query: Delete):
+    def dispatch_delete(self, query: Delete):
         """
         Dispatch delete query to the appropriate method.
         """
@@ -382,7 +382,7 @@ class VectorStoreHandler(BaseHandler):
             DropTables: self._dispatch_drop_table,
             Insert: self._dispatch_insert,
             Update: self._dispatch_update,
-            Delete: self._dispatch_delete,
+            Delete: self.dispatch_delete,
             Select: self.dispatch_select,
         }
         if type(query) in dispatch_router:

mindsdb/integrations/utilities/files/file_reader.py CHANGED Viewed

@@ -309,7 +309,7 @@ class FileReader(FormatDetector):
             )
         text = file_obj.read()
-        metadata = {"source": name}
+        metadata = {"source_file": name, "file_format": "txt"}
         documents = [Document(page_content=text, metadata=metadata)]
         text_splitter = RecursiveCharacterTextSplitter(
@@ -325,7 +325,7 @@ class FileReader(FormatDetector):
         )
     @staticmethod
-    def read_pdf(file_obj: BytesIO, **kwargs):
+    def read_pdf(file_obj: BytesIO, name=None, **kwargs):
         with fitz.open(stream=file_obj.read()) as pdf:  # open pdf
             text = chr(12).join([page.get_text() for page in pdf])
@@ -337,7 +337,7 @@ class FileReader(FormatDetector):
         split_text = text_splitter.split_text(text)
         return pd.DataFrame(
-            {"content": split_text, "metadata": [{}] * len(split_text)}
+            {"content": split_text, "metadata": [{"file_format": "pdf", "source_file": name}] * len(split_text)}
         )
     @staticmethod

MindsDB 25.3.2.0__py3-none-any.whl → 25.3.4.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.3.2.0py3-none-any.whl → 25.3.4.0py3-none-any.whl