PyPI - MindsDB - Versions diffs - 25.1.2.0__py3-none-any.whl → 25.1.5.0__py3-none-any.whl - Mend

MindsDB 25.1.2.0py3-none-any.whl → 25.1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (99) hide show

mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py CHANGED Viewed

@@ -1,8 +1,10 @@
+import ast
 from typing import List, Optional
-import pinecone
+import numpy as np
+from pinecone import Pinecone, ServerlessSpec
+from pinecone.core.openapi.shared.exceptions import NotFoundException, PineconeApiException
 import pandas as pd
-import ast
 from mindsdb.integrations.libs.response import RESPONSE_TYPE
 from mindsdb.integrations.libs.response import HandlerResponse
@@ -18,32 +20,30 @@ from mindsdb.utilities import log
 logger = log.getLogger(__name__)
+DEFAULT_CREATE_TABLE_PARAMS = {
+    "dimension": 8,
+    "metric": "cosine",
+    "spec": {
+        "cloud": "aws",
+        "region": "us-east-1"
+    }
+}
+MAX_FETCH_LIMIT = 10000
+UPSERT_BATCH_SIZE = 99  # API reccomendation
 class PineconeHandler(VectorStoreHandler):
     """This handler handles connection and execution of the Pinecone statements."""
     name = "pinecone"
-    def __init__(self, name: str, **kwargs):
+    def __init__(self, name: str, connection_data: dict, **kwargs):
         super().__init__(name)
-        self.MAX_FETCH_LIMIT = 10000
-        self._connection_data = kwargs.get("connection_data")
-        self._client_config = {
-            "api_key": self._connection_data.get("api_key"),
-            "environment": self._connection_data.get("environment")
-        }
-        self._table_create_params = {
-            "dimension": 8,
-            "metric": "cosine",
-            "pods": 1,
-            "replicas": 1,
-            "pod_type": 'p1',
-        }
-        for key in self._table_create_params:
-            if key in self._connection_data:
-                self._table_create_params[key] = self._connection_data[key]
+        self.connection_data = connection_data
+        self.kwargs = kwargs
+        self.connection = None
         self.is_connected = False
-        self.connect()
     def __del__(self):
         if self.is_connected is True:
@@ -51,7 +51,8 @@ class PineconeHandler(VectorStoreHandler):
     def _get_index_handle(self, index_name):
         """Returns handler to index specified by `index_name`"""
-        index = pinecone.Index(index_name)
+        connection = self.connect()
+        index = connection.Index(index_name)
         try:
             index.describe_index_stats()
         except Exception:
@@ -135,10 +136,15 @@ class PineconeHandler(VectorStoreHandler):
     def connect(self):
         """Connect to a pinecone database."""
+        if self.is_connected is True:
+            return self.connection
+        if 'api_key' not in self.connection_data:
+            raise ValueError('Required parameter (api_key) must be provided.')
         try:
-            pinecone.init(api_key=self._client_config["api_key"], environment=self._client_config["environment"])
-            pinecone.list_indexes()
-            self.is_connected = True
+            self.connection = Pinecone(api_key=self.connection_data['api_key'])
+            return self.connection
         except Exception as e:
             logger.error(f"Error connecting to Pinecone client, {e}!")
             self.is_connected = False
@@ -147,55 +153,99 @@ class PineconeHandler(VectorStoreHandler):
         """Close the pinecone connection."""
         if self.is_connected is False:
             return
-        pinecone.init(api_key="", environment="")
+        self.connection = None
         self.is_connected = False
     def check_connection(self):
         """Check the connection to pinecone."""
-        response_code = StatusResponse(False)
+        response = StatusResponse(False)
+        need_to_close = self.is_connected is False
         try:
-            pinecone.list_indexes()
-            response_code.success = True
+            connection = self.connect()
+            connection.list_indexes()
+            response.success = True
         except Exception as e:
             logger.error(f"Error connecting to pinecone , {e}!")
-            response_code.error_message = str(e)
-        return response_code
+            response.error_message = str(e)
+        if response.success is True and need_to_close:
+            self.disconnect()
+        if response.success is False and self.is_connected is True:
+            self.is_connected = False
+        return response
     def get_tables(self) -> HandlerResponse:
         """Get the list of indexes in the pinecone database."""
-        indexes = pinecone.list_indexes()
-        indexes_names = pd.DataFrame(
-            columns=["index_name"],
-            data=[index for index in indexes],
+        connection = self.connect()
+        indexes = connection.list_indexes()
+        df = pd.DataFrame(
+            columns=["table_name"],
+            data=[index['name'] for index in indexes],
         )
-        return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=indexes_names)
+        return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=df)
     def create_table(self, table_name: str, if_not_exists=True):
         """Create an index with the given name in the Pinecone database."""
-        pinecone.create_index(name=table_name, **self._table_create_params)
+        connection = self.connect()
+        # TODO: Should other parameters be supported? Pod indexes?
+        # TODO: Should there be a better way to provide these parameters rather than when establishing the connection?
+        create_table_params = {}
+        for key, val in DEFAULT_CREATE_TABLE_PARAMS.items():
+            if key in self.connection_data:
+                create_table_params[key] = self.connection_data[key]
+            else:
+                create_table_params[key] = val
+        create_table_params["spec"] = ServerlessSpec(**create_table_params["spec"])
+        try:
+            connection.create_index(name=table_name, **create_table_params)
+        except PineconeApiException as pinecone_error:
+            if pinecone_error.status == 409 and if_not_exists:
+                return
+            raise Exception(f"Error creating index '{table_name}': {pinecone_error}")
-    def insert(self, table_name: str, data: pd.DataFrame, columns: List[str] = None):
+    def insert(self, table_name: str, data: pd.DataFrame):
         """Insert data into pinecone index passed in through `table_name` parameter."""
-        upsert_batch_size = 99  # API reccomendation
         index = self._get_index_handle(table_name)
         if index is None:
             raise Exception(f"Error getting index '{table_name}', are you sure the name is correct?")
         data.rename(columns={
             TableField.ID.value: "id",
-            TableField.EMBEDDINGS.value: "values",
-            TableField.METADATA.value: "metadata"},
+            TableField.EMBEDDINGS.value: "values"},
             inplace=True)
-        data = data[["id", "values", "metadata"]]
-        for chunk in (data[pos:pos + upsert_batch_size] for pos in range(0, len(data), upsert_batch_size)):
+        columns = ["id", "values"]
+        if TableField.METADATA.value in data.columns:
+            data.rename(columns={TableField.METADATA.value: "metadata"}, inplace=True)
+            # fill None and NaN values with empty dict
+            if data['metadata'].isnull().any():
+                data['metadata'] = data['metadata'].apply(lambda x: {} if x is None or (isinstance(x, float) and np.isnan(x)) else x)
+            columns.append("metadata")
+        data = data[columns]
+        # convert the embeddings to lists if they are strings
+        data["values"] = data["values"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
+        for chunk in (data[pos:pos + UPSERT_BATCH_SIZE] for pos in range(0, len(data), UPSERT_BATCH_SIZE)):
             chunk = chunk.to_dict(orient="records")
             index.upsert(vectors=chunk)
     def drop_table(self, table_name: str, if_exists=True):
         """Delete an index passed in through `table_name` from the pinecone ."""
-        pinecone.delete_index(table_name)
+        connection = self.connect()
+        try:
+            connection.delete_index(table_name)
+        except NotFoundException:
+            if if_exists:
+                return
+            raise Exception(f"Error deleting index '{table_name}', are you sure the name is correct?")
     def delete(self, table_name: str, conditions: List[FilterCondition] = None):
         """Delete records in pinecone index `table_name` based on ids or based on metadata conditions."""
@@ -225,6 +275,7 @@ class PineconeHandler(VectorStoreHandler):
         limit: int = None,
     ):
         """Run query on pinecone index named `table_name` and get results."""
+        # TODO: Add support for namespaces.
         index = self._get_index_handle(table_name)
         if index is None:
             raise Exception(f"Error getting index '{table_name}', are you sure the name is correct?")
@@ -233,23 +284,28 @@ class PineconeHandler(VectorStoreHandler):
             "include_values": True,
             "include_metadata": True
         }
         # check for metadata filter
         metadata_filters = self._translate_metadata_condition(conditions)
-        # check for vector filter
-        vector_filter = (
-            None
-            if conditions is None
-            else [
-                condition.value
-                for condition in conditions
-                if condition.column == TableField.SEARCH_VECTOR.value
-            ]
-        )
-        if vector_filter:
-            if len(vector_filter) > 1:
+        if metadata_filters is not None:
+            query["filter"] = metadata_filters
+        # check for vector and id filters
+        vector_filters = []
+        id_filters = []
+        if conditions:
+            for condition in conditions:
+                if condition.column == TableField.SEARCH_VECTOR.value:
+                    vector_filters.append(condition.value)
+                elif condition.column == TableField.ID.value:
+                    id_filters.append(condition.value)
+        if vector_filters:
+            if len(vector_filters) > 1:
                 raise Exception("You cannot have multiple search_vectors in query")
-            query["vector"] = vector_filter[0]
+            query["vector"] = vector_filters[0]
             # For subqueries, the vector filter is a list of list of strings
             if isinstance(query["vector"], list) and isinstance(query["vector"][0], str):
                 if len(query["vector"]) > 1:
@@ -260,26 +316,21 @@ class PineconeHandler(VectorStoreHandler):
                 except Exception as e:
                     raise Exception(f"Cannot parse the search vector '{query['vector']}'into a list: {e}")
-        # check for limit
-        if limit is not None:
-            query["top_k"] = limit
-        else:
-            query["top_k"] = self.MAX_FETCH_LIMIT
-        if metadata_filters is not None:
-            query["filter"] = metadata_filters
-        # check for id filter
-        id_filters = None
-        if conditions is not None:
-            id_filters = [
-                condition.value
-                for condition in conditions
-                if condition.column == TableField.ID.value
-            ] or None
         if id_filters:
             if len(id_filters) > 1:
                 raise Exception("You cannot have multiple IDs in query")
             query["id"] = id_filters[0]
+        if not vector_filters and not id_filters:
+            raise Exception("You must provide either a search_vector or an ID in the query")
+        # check for limit
+        if limit is not None:
+            query["top_k"] = limit
+        else:
+            query["top_k"] = MAX_FETCH_LIMIT
         # exec query
         try:
             result = index.query(**query)

mindsdb/integrations/handlers/pinecone_handler/requirements.txt CHANGED Viewed

	@@ -1 +1 @@
1	- pinecone-client
1	+ pinecone-client==5.0.1

mindsdb/integrations/handlers/postgres_handler/postgres_handler.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import time
 import json
+from typing import Optional
 import pandas as pd
 import psycopg
@@ -161,7 +162,7 @@ class PostgresHandler(DatabaseHandler):
             'float8': 'float64'
         }
         columns = df.columns
-        df = df.set_axis(range(len(columns)), axis=1)
+        df.columns = list(range(len(columns)))
         for column_index, column_name in enumerate(df.columns):
             col = df[column_name]
             if str(col.dtype) == 'object':
@@ -172,7 +173,7 @@ class PostgresHandler(DatabaseHandler):
                         df[column_name] = col.astype(types_map[pg_type.name])
                     except ValueError as e:
                         logger.error(f'Error casting column {col.name} to {types_map[pg_type.name]}: {e}')
-        return df.set_axis(columns, axis=1)
+        df.columns = columns
     @profiler.profile()
     def native_query(self, query: str, params=None) -> Response:
@@ -202,7 +203,7 @@ class PostgresHandler(DatabaseHandler):
                         result,
                         columns=[x.name for x in cur.description]
                     )
-                    df = self._cast_dtypes(df, cur.description)
+                    self._cast_dtypes(df, cur.description)
                     response = Response(
                         RESPONSE_TYPE.TABLE,
                         df
@@ -281,21 +282,27 @@ class PostgresHandler(DatabaseHandler):
         """
         return self.native_query(query)
-    def get_columns(self, table_name: str) -> Response:
+    def get_columns(self, table_name: str, schema_name: Optional[str] = None) -> Response:
         """
         Retrieves column details for a specified table in the PostgreSQL database.
         Args:
             table_name (str): The name of the table for which to retrieve column information.
+            schema_name (str): The name of the schema in which the table is located.
         Returns:
             Response: A response object containing the column details, formatted as per the `Response` class.
         Raises:
             ValueError: If the 'table_name' is not a valid string.
         """
         if not table_name or not isinstance(table_name, str):
             raise ValueError("Invalid table name provided.")
+        if isinstance(schema_name, str):
+            schema_name = f"'{schema_name}'"
+        else:
+            schema_name = 'current_schema()'
         query = f"""
             SELECT
                 column_name as "Field",
@@ -305,12 +312,11 @@ class PostgresHandler(DatabaseHandler):
             WHERE
                 table_name = '{table_name}'
             AND
-                table_schema = current_schema()
+                table_schema = {schema_name}
         """
         return self.native_query(query)
     def subscribe(self, stop_event, callback, table_name, columns=None, **kwargs):
         config = self._make_connection_args()
         config['autocommit'] = True

mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py CHANGED Viewed

@@ -12,7 +12,7 @@ class RayServeHandler(BaseMLEngine):
         - A Ray Serve server should be running
     Example:
     """  # noqa
     name = 'ray_serve'
@@ -42,9 +42,11 @@ class RayServeHandler(BaseMLEngine):
             raise Exception("Error: Training failed: " + resp['status'])
     def predict(self, df, args=None):
-        args = self.model_storage.json_get('args')  # override any incoming args for now
+        args = {**(self.model_storage.json_get('args')), **args}  # merge incoming args
+        pred_args = args.get('predict_params', {})
+        args = {**args, **pred_args}  # merge pred_args
         resp = requests.post(args['predict_url'],
-                             json={'df': df.to_json(orient='records')},
+                             json={'df': df.to_json(orient='records'), 'pred_args': pred_args},
                              headers={'content-type': 'application/json; format=pandas-records'})
         response = resp.json()

mindsdb/integrations/handlers/slack_handler/slack_handler.py CHANGED Viewed

@@ -231,6 +231,9 @@ class SlackHandler(APIChatHandler):
             'polling': {
                 'type': 'realtime',
             },
+            'memory': {
+                'type': 'handler',
+            },
             'tables': [
                 {
                     'chat_table': {
@@ -238,7 +241,7 @@ class SlackHandler(APIChatHandler):
                         'chat_id_col': 'channel_id',
                         'username_col': 'user',
                         'text_col': 'text',
-                        'time_col': 'thread_ts',
+                        'time_col': 'created_at',
                     }
                 },
                 {
@@ -264,7 +267,7 @@ class SlackHandler(APIChatHandler):
         user_info = web_connection.auth_test().data
         return user_info['bot_id']
-    def subscribe(self, stop_event: threading.Event, callback: Callable, **kwargs: Any) -> None:
+    def subscribe(self, stop_event: threading.Event, callback: Callable, table_name: Text, columns: List = None, **kwargs: Any) -> None:
         """
         Subscribes to the Slack API using the Socket Mode for real-time responses to messages.
@@ -274,6 +277,14 @@ class SlackHandler(APIChatHandler):
             table_name (Text): The name of the table to subscribe to.
             kwargs: Arbitrary keyword arguments.
         """
+        if table_name not in ['messages', 'threads']:
+            raise RuntimeError(f'Table {table_name} is not supported for subscription.')
+        # Raise an error if columns are provided.
+        # Since Slack subscriptions depend on events and not changes to the virtual tables, columns are not supported.
+        if columns:
+            raise RuntimeError('Columns are not supported for Slack subscriptions.')
         self._socket_connection = SocketModeClient(
             # This app-level token will be used only for establishing a connection.
             app_token=self.connection_data['app_token'],  # xapp-A111-222-xyz

mindsdb/integrations/handlers/slack_handler/slack_tables.py CHANGED Viewed

@@ -6,7 +6,7 @@ import pandas as pd
 from slack_sdk.errors import SlackApiError
 from mindsdb.integrations.libs.api_handler import APIResource
-from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, FilterCondition, FilterOperator
+from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, FilterCondition, FilterOperator, SortColumn
 from mindsdb.utilities import log
 logger = log.getLogger(__name__)
@@ -203,6 +203,7 @@ class SlackMessagesTable(APIResource):
         self,
         conditions: List[FilterCondition] = None,
         limit: int = None,
+        sort: List[SortColumn] = None,
         **kwargs: Any
     ) -> pd.DataFrame:
         """
@@ -222,6 +223,7 @@ class SlackMessagesTable(APIResource):
         Args:
             conditions (List[FilterCondition]): The conditions to filter the messages.
             limit (int): The limit of the messages to return.
+            sort (List[SortColumn]): The columns to sort the messages by.
             kwargs (Any): Arbitrary keyword arguments.
         Raises:
@@ -306,6 +308,14 @@ class SlackMessagesTable(APIResource):
         # Translate the time stamp into a 'created_at' field.
         result['created_at'] = pd.to_datetime(result['ts'].astype(float), unit='s').dt.strftime('%Y-%m-%d %H:%M:%S')
+        # Sort the messages by the specified columns.
+        if sort:
+            result.sort_values(
+                by=[col.column for col in sort],
+                ascending=[col.ascending for col in sort],
+                inplace=True
+            )
         return result
     def insert(self, query: Insert):
@@ -496,6 +506,7 @@ class SlackThreadsTable(APIResource):
         self,
         conditions: List[FilterCondition] = None,
         limit: int = None,
+        sort: List[SortColumn] = None,
         **kwargs: Any
     ) -> pd.DataFrame:
         """
@@ -514,6 +525,7 @@ class SlackThreadsTable(APIResource):
         Args:
             conditions (List[FilterCondition]): The conditions to filter the messages.
             limit (int): The limit of the messages to return.
+            sort (List[SortColumn]): The columns to sort the messages by.
             kwargs (Any): Arbitrary keyword arguments.
         Raises:
@@ -591,6 +603,14 @@ class SlackThreadsTable(APIResource):
         result['channel_id'] = params['channel']
         result['channel_name'] = channel['name'] if 'name' in channel else None
+        # Sort the messages by the specified columns.
+        if sort:
+            result.sort_values(
+                by=[col.column for col in sort],
+                ascending=[col.ascending for col in sort],
+                inplace=True
+            )
         return result
     def insert(self, query: Insert):

mindsdb/integrations/handlers/web_handler/requirements.txt CHANGED Viewed

@@ -1,3 +1,2 @@
-pymupdf
 html2text
 bs4

mindsdb/integrations/libs/ml_handler_process/learn_process.py CHANGED Viewed

@@ -8,7 +8,7 @@ from sqlalchemy.orm.attributes import flag_modified
 from mindsdb_sql_parser import parse_sql
 from mindsdb_sql_parser.ast import Identifier, Select, Star, NativeQuery
-from mindsdb.api.executor import SQLQuery
+from mindsdb.api.executor.sql_query import SQLQuery
 import mindsdb.utilities.profiler as profiler
 from mindsdb.utilities.functions import mark_process
 from mindsdb.utilities.config import Config
@@ -72,7 +72,7 @@ def learn_process(data_integration_ref: dict, problem_definition: dict, fetch_da
                 elif data_integration_ref['type'] == 'view':
                     project = database_controller.get_project(project_name)
                     query_ast = parse_sql(fetch_data_query)
-                    view_meta = project.query_view(query_ast)
+                    view_meta = project.get_view_meta(query_ast)
                     sqlquery = SQLQuery(view_meta['query_ast'], session=sql_session)
                 elif data_integration_ref['type'] == 'project':
                     query_ast = parse_sql(fetch_data_query)

mindsdb/integrations/utilities/files/__init__.py ADDED Viewed

File without changes

MindsDB 25.1.2.0__py3-none-any.whl → 25.1.5.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.1.2.0py3-none-any.whl → 25.1.5.0py3-none-any.whl