PyPI - MindsDB - Versions diffs - 25.9.1.2__py3-none-any.whl → 25.9.2.0a1__py3-none-any.whl - Mend

MindsDB 25.9.1.2py3-none-any.whl → 25.9.2.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (13) hide show

mindsdb/__about__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 __title__ = "MindsDB"
 __package_name__ = "mindsdb"
-__version__ = "25.9.1.2"
+__version__ = "25.9.2.0a1"
 __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
 __email__ = "jorge@mindsdb.com"
 __author__ = "MindsDB Inc"

mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py CHANGED Viewed

@@ -17,7 +17,6 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
     TableField,
     VectorStoreHandler,
 )
-from mindsdb.interfaces.storage.model_fs import HandlerStorage
 from mindsdb.utilities import log
 logger = log.getLogger(__name__)
@@ -55,7 +54,7 @@ class ChromaDBHandler(VectorStoreHandler):
     def __init__(self, name: str, **kwargs):
         super().__init__(name)
-        self.handler_storage = HandlerStorage(kwargs.get("integration_id"))
+        self.handler_storage = kwargs["handler_storage"]
         self._client = None
         self.persist_directory = None
         self.is_connected = False
@@ -73,8 +72,6 @@ class ChromaDBHandler(VectorStoreHandler):
             "hnsw:space": config.distance,
         }
-        self.connect()
     def validate_connection_parameters(self, name, **kwargs):
         """
         Validate the connection parameters.
@@ -88,7 +85,7 @@ class ChromaDBHandler(VectorStoreHandler):
         if config.persist_directory:
             if os.path.isabs(config.persist_directory):
                 self.persist_directory = config.persist_directory
-            elif not self.handler_storage.is_temporal:
+            else:
                 # get full persistence directory from handler storage
                 self.persist_directory = self.handler_storage.folder_get(config.persist_directory)
                 self._use_handler_storage = True
@@ -149,6 +146,7 @@ class ChromaDBHandler(VectorStoreHandler):
         need_to_close = self.is_connected is False
         try:
+            self.connect()
             self._client.heartbeat()
             response_code.success = True
         except Exception as e:
@@ -233,6 +231,7 @@ class ChromaDBHandler(VectorStoreHandler):
         offset: int = None,
         limit: int = None,
     ) -> pd.DataFrame:
+        self.connect()
         collection = self._client.get_collection(table_name)
         filters = self._translate_metadata_condition(conditions)
@@ -399,6 +398,7 @@ class ChromaDBHandler(VectorStoreHandler):
         Insert/Upsert data into ChromaDB collection.
         If records with same IDs exist, they will be updated.
         """
+        self.connect()
         collection = self._client.get_or_create_collection(collection_name, metadata=self.create_collection_metadata)
         # Convert metadata from string to dict if needed
@@ -449,6 +449,7 @@ class ChromaDBHandler(VectorStoreHandler):
         """
         Update data in the ChromaDB database.
         """
+        self.connect()
         collection = self._client.get_collection(table_name)
         # drop columns with all None values
@@ -466,6 +467,7 @@ class ChromaDBHandler(VectorStoreHandler):
         self._sync()
     def delete(self, table_name: str, conditions: List[FilterCondition] = None):
+        self.connect()
         filters = self._translate_metadata_condition(conditions)
         # get id filters
         id_filters = [condition.value for condition in conditions if condition.column == TableField.ID.value] or None
@@ -480,6 +482,7 @@ class ChromaDBHandler(VectorStoreHandler):
         """
         Create a collection with the given name in the ChromaDB database.
         """
+        self.connect()
         self._client.create_collection(
             table_name, get_or_create=if_not_exists, metadata=self.create_collection_metadata
         )
@@ -489,6 +492,7 @@ class ChromaDBHandler(VectorStoreHandler):
         """
         Delete a collection from the ChromaDB database.
         """
+        self.connect()
         try:
             self._client.delete_collection(table_name)
             self._sync()
@@ -502,6 +506,7 @@ class ChromaDBHandler(VectorStoreHandler):
         """
         Get the list of collections in the ChromaDB database.
         """
+        self.connect()
         collections = self._client.list_collections()
         collections_name = pd.DataFrame(
             columns=["table_name"],
@@ -511,6 +516,7 @@ class ChromaDBHandler(VectorStoreHandler):
     def get_columns(self, table_name: str) -> HandlerResponse:
         # check if collection exists
+        self.connect()
         try:
             _ = self._client.get_collection(table_name)
         except ValueError:

mindsdb/integrations/handlers/openai_handler/openai_handler.py CHANGED Viewed

@@ -114,7 +114,7 @@ class OpenAIHandler(BaseMLEngine):
         except NotFoundError:
             pass
         except AuthenticationError as e:
-            if e.body["code"] == "invalid_api_key":
+            if isinstance(e.body, dict) and e.body.get("code") == "invalid_api_key":
                 raise Exception("Invalid api key")
             raise Exception(f"Something went wrong: {e}")

mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py CHANGED Viewed

@@ -17,7 +17,9 @@ from mindsdb_sql_parser.ast import (
     Delete,
     Update,
     Function,
+    DropTables,
 )
+from mindsdb_sql_parser.ast.base import ASTNode
 from pgvector.psycopg import register_vector
 from mindsdb.integrations.handlers.postgres_handler.postgres_handler import (
@@ -116,9 +118,22 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
             return Response(RESPONSE_TYPE.OK)
         return super().get_tables()
-    def native_query(self, query, params=None) -> Response:
+    def query(self, query: ASTNode) -> Response:
+        # Option to drop table of shared pgvector connection
+        if isinstance(query, DropTables):
+            query.tables = [self._check_table(table.parts[-1]) for table in query.tables]
+            query_str, params = self.renderer.get_exec_params(query, with_failback=True)
+            return self.native_query(query_str, params, no_restrict=True)
+        return super().query(query)
+    def native_query(self, query, params=None, no_restrict=False) -> Response:
+        """
+        Altered `native_query` method of postgres handler.
+        Restrict usage of native query from executor with shared pg vector connection
+          Exceptions: if it is used by pgvector itself (with no_restrict = True)
+        """
         # Prevent execute native queries
-        if self._is_shared_db:
+        if self._is_shared_db and not no_restrict:
             return Response(RESPONSE_TYPE.OK)
         return super().native_query(query, params=params)
@@ -550,6 +565,9 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
     def create_table(self, table_name: str):
         """Create a table with a vector column."""
+        table_name = self._check_table(table_name)
         with self.connection.cursor() as cur:
             # For sparse vectors, use sparsevec type
             vector_column_type = "sparsevec" if self._is_sparse else "vector"

mindsdb/integrations/handlers/postgres_handler/postgres_handler.py CHANGED Viewed

@@ -279,7 +279,7 @@ class PostgresHandler(MetaDatabaseHandler):
         df.columns = columns
     @profiler.profile()
-    def native_query(self, query: str, params=None) -> Response:
+    def native_query(self, query: str, params=None, **kwargs) -> Response:
         """
         Executes a SQL query on the PostgreSQL database and returns the result.
@@ -466,7 +466,10 @@ class PostgresHandler(MetaDatabaseHandler):
             AND
                 table_schema = {schema_name}
         """
-        result = self.native_query(query)
+        # If it is used by pgvector handler - `native_query` method of pgvector handler will be used
+        #   in that case if shared pgvector db is used - `native_query` will be skipped (return  empty result)
+        #   `no_restrict` flag allows to execute native query, and it will call `native_query` of postgres handler
+        result = self.native_query(query, no_restrict=True)
         result.to_columns_table_response(map_type_fn=_map_type)
         return result

mindsdb/interfaces/knowledge_base/controller.py CHANGED Viewed

@@ -547,7 +547,7 @@ class KnowledgeBaseTable:
                 if processed_chunks:
                     content.value = processed_chunks[0].content
-            query.update_columns[emb_col] = Constant(self._content_to_embeddings(content))
+            query.update_columns[emb_col] = Constant(self._content_to_embeddings(content.value))
         if "metadata" not in query.update_columns:
             query.update_columns["metadata"] = Constant({})
@@ -1110,6 +1110,9 @@ class KnowledgeBaseController:
             model_record = db.Predictor.query.get(model["id"])
             embedding_model_id = model_record.id
+            if model_record.learn_args.get("using", {}).get("sparse"):
+                is_sparse = True
         # if params.get("reranking_model", {}) is bool and False we evaluate it to empty dictionary
         reranking_model_params = params.get("reranking_model", {})
@@ -1138,7 +1141,6 @@ class KnowledgeBaseController:
                 # Add sparse vector support for pgvector
                 vector_db_params = {}
                 # Check both explicit parameter and model configuration
-                is_sparse = is_sparse or model_record.learn_args.get("using", {}).get("sparse")
                 if is_sparse:
                     vector_db_params["is_sparse"] = True
                     if vector_size is not None:

mindsdb/interfaces/storage/model_fs.py CHANGED Viewed

@@ -11,18 +11,16 @@ from .fs import RESOURCE_GROUP, FileStorageFactory, SERVICE_FILES_NAMES
 from .json import get_json_storage, get_encrypted_json_storage
-JSON_STORAGE_FILE = 'json_storage.json'
+JSON_STORAGE_FILE = "json_storage.json"
 class ModelStorage:
     """
     This class deals with all model-related storage requirements, from setting status to storing artifacts.
     """
     def __init__(self, predictor_id):
-        storageFactory = FileStorageFactory(
-            resource_group=RESOURCE_GROUP.PREDICTOR,
-            sync=True
-        )
+        storageFactory = FileStorageFactory(resource_group=RESOURCE_GROUP.PREDICTOR, sync=True)
         self.fileStorage = storageFactory(predictor_id)
         self.predictor_id = predictor_id
@@ -43,15 +41,12 @@ class ModelStorage:
         """
         model_record = db.Predictor.query.get(self.predictor_id)
         if check_exists is True and model_record is None:
-            raise KeyError('Model does not exists')
+            raise KeyError("Model does not exists")
         return model_record
     def get_info(self):
         rec = self._get_model_record(self.predictor_id)
-        return dict(status=rec.status,
-                    to_predict=rec.to_predict,
-                    data=rec.data,
-                    learn_args=rec.learn_args)
+        return dict(status=rec.status, to_predict=rec.to_predict, data=rec.data, learn_args=rec.learn_args)
     def status_set(self, status, status_info=None):
         rec = self._get_model_record(self.predictor_id)
@@ -95,67 +90,52 @@ class ModelStorage:
     def folder_get(self, name):
         # pull folder and return path
-        name = name.lower().replace(' ', '_')
-        name = re.sub(r'([^a-z^A-Z^_\d]+)', '_', name)
+        name = name.lower().replace(" ", "_")
+        name = re.sub(r"([^a-z^A-Z^_\d]+)", "_", name)
         self.fileStorage.pull_path(name)
         return str(self.fileStorage.get_path(name))
     def folder_sync(self, name):
         # sync abs path
-        name = name.lower().replace(' ', '_')
-        name = re.sub(r'([^a-z^A-Z^_\d]+)', '_', name)
+        name = name.lower().replace(" ", "_")
+        name = re.sub(r"([^a-z^A-Z^_\d]+)", "_", name)
         self.fileStorage.push_path(name)
-    def file_list(self):
-        ...
+    def file_list(self): ...
-    def file_del(self, name):
-        ...
+    def file_del(self, name): ...
     # jsons
     def json_set(self, name, data):
-        json_storage = get_json_storage(
-            resource_id=self.predictor_id,
-            resource_group=RESOURCE_GROUP.PREDICTOR
-        )
+        json_storage = get_json_storage(resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR)
         return json_storage.set(name, data)
     def encrypted_json_set(self, name: str, data: dict) -> None:
         json_storage = get_encrypted_json_storage(
-            resource_id=self.predictor_id,
-            resource_group=RESOURCE_GROUP.PREDICTOR
+            resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR
         )
         return json_storage.set(name, data)
     def json_get(self, name):
-        json_storage = get_json_storage(
-            resource_id=self.predictor_id,
-            resource_group=RESOURCE_GROUP.PREDICTOR
-        )
+        json_storage = get_json_storage(resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR)
         return json_storage.get(name)
     def encrypted_json_get(self, name: str) -> dict:
         json_storage = get_encrypted_json_storage(
-            resource_id=self.predictor_id,
-            resource_group=RESOURCE_GROUP.PREDICTOR
+            resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR
         )
         return json_storage.get(name)
-    def json_list(self):
-        ...
+    def json_list(self): ...
-    def json_del(self, name):
-        ...
+    def json_del(self, name): ...
     def delete(self):
         self.fileStorage.delete()
-        json_storage = get_json_storage(
-            resource_id=self.predictor_id,
-            resource_group=RESOURCE_GROUP.PREDICTOR
-        )
+        json_storage = get_json_storage(resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR)
         json_storage.clean()
@@ -164,29 +144,26 @@ class HandlerStorage:
     This class deals with all handler-related storage requirements, from storing metadata to synchronizing folders
     across instances.
     """
     def __init__(self, integration_id: int, root_dir: str = None, is_temporal=False):
         args = {}
         if root_dir is not None:
-            args['root_dir'] = root_dir
-        storageFactory = FileStorageFactory(
-            resource_group=RESOURCE_GROUP.INTEGRATION,
-            sync=False,
-            **args
-        )
+            args["root_dir"] = root_dir
+        storageFactory = FileStorageFactory(resource_group=RESOURCE_GROUP.INTEGRATION, sync=False, **args)
         self.fileStorage = storageFactory(integration_id)
         self.integration_id = integration_id
         self.is_temporal = is_temporal
         # do not sync with remote storage
     def __convert_name(self, name):
-        name = name.lower().replace(' ', '_')
-        return re.sub(r'([^a-z^A-Z^_\d]+)', '_', name)
+        name = name.lower().replace(" ", "_")
+        return re.sub(r"([^a-z^A-Z^_\d]+)", "_", name)
     def is_empty(self):
-        """ check if storage directory is empty
+        """check if storage directory is empty
-            Returns:
-                bool: true if dir is empty
+        Returns:
+            bool: true if dir is empty
         """
         for path in self.fileStorage.folder_path.iterdir():
             if path.is_file() and path.name in SERVICE_FILES_NAMES:
@@ -221,19 +198,17 @@ class HandlerStorage:
         if not self.is_temporal:
             self.fileStorage.push_path(name)
-    def file_list(self):
-        ...
+    def file_list(self): ...
-    def file_del(self, name):
-        ...
+    def file_del(self, name): ...
     # folder
     def folder_get(self, name):
-        ''' Copies folder from remote to local file system and returns its path
+        """Copies folder from remote to local file system and returns its path
         :param name: name of the folder
-        '''
+        """
         name = self.__convert_name(name)
         self.fileStorage.pull_path(name)
@@ -249,38 +224,28 @@ class HandlerStorage:
     # jsons
     def json_set(self, name, content):
-        json_storage = get_json_storage(
-            resource_id=self.integration_id,
-            resource_group=RESOURCE_GROUP.INTEGRATION
-        )
+        json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
         return json_storage.set(name, content)
     def encrypted_json_set(self, name: str, content: dict) -> None:
         json_storage = get_encrypted_json_storage(
-            resource_id=self.integration_id,
-            resource_group=RESOURCE_GROUP.INTEGRATION
+            resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION
         )
         return json_storage.set(name, content)
     def json_get(self, name):
-        json_storage = get_json_storage(
-            resource_id=self.integration_id,
-            resource_group=RESOURCE_GROUP.INTEGRATION
-        )
+        json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
         return json_storage.get(name)
     def encrypted_json_get(self, name: str) -> dict:
         json_storage = get_encrypted_json_storage(
-            resource_id=self.integration_id,
-            resource_group=RESOURCE_GROUP.INTEGRATION
+            resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION
         )
         return json_storage.get(name)
-    def json_list(self):
-        ...
+    def json_list(self): ...
-    def json_del(self, name):
-        ...
+    def json_del(self, name): ...
     def export_files(self) -> bytes:
         json_storage = self.export_json_storage()
@@ -288,11 +253,11 @@ class HandlerStorage:
         if self.is_empty() and not json_storage:
             return None
-        folder_path = self.folder_get('')
+        folder_path = self.folder_get("")
         zip_fd = io.BytesIO()
-        with zipfile.ZipFile(zip_fd, 'w', zipfile.ZIP_DEFLATED) as zipf:
+        with zipfile.ZipFile(zip_fd, "w", zipfile.ZIP_DEFLATED) as zipf:
             for root, dirs, files in os.walk(folder_path):
                 for file_name in files:
                     if file_name in SERVICE_FILES_NAMES:
@@ -309,14 +274,13 @@ class HandlerStorage:
         return zip_fd.read()
     def import_files(self, content: bytes):
-        folder_path = self.folder_get('')
+        folder_path = self.folder_get("")
         zip_fd = io.BytesIO()
         zip_fd.write(content)
         zip_fd.seek(0)
-        with zipfile.ZipFile(zip_fd, 'r') as zip_ref:
+        with zipfile.ZipFile(zip_fd, "r") as zip_ref:
             for name in zip_ref.namelist():
                 # If JSON storage file is in the zip file, import the content to the JSON storage.
                 # Thereafter, remove the file from the folder.
@@ -327,38 +291,36 @@ class HandlerStorage:
                 else:
                     zip_ref.extract(name, folder_path)
-        self.folder_sync('')
+        self.folder_sync("")
     def export_json_storage(self) -> list[dict]:
-        json_storage = get_json_storage(
-            resource_id=self.integration_id,
-            resource_group=RESOURCE_GROUP.INTEGRATION
-        )
+        json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
         records = []
         for record in json_storage.get_all_records():
             record_dict = record.to_dict()
-            if record_dict.get('encrypted_content'):
-                record_dict['encrypted_content'] = record_dict['encrypted_content'].decode()
+            if record_dict.get("encrypted_content"):
+                record_dict["encrypted_content"] = record_dict["encrypted_content"].decode()
             records.append(record_dict)
         return records
     def import_json_storage(self, records: bytes) -> None:
-        json_storage = get_json_storage(
-            resource_id=self.integration_id,
-            resource_group=RESOURCE_GROUP.INTEGRATION
-        )
+        json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
         encrypted_json_storage = get_encrypted_json_storage(
-            resource_id=self.integration_id,
-            resource_group=RESOURCE_GROUP.INTEGRATION
+            resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION
         )
         records = json.loads(records.decode())
         for record in records:
-            if record['encrypted_content']:
-                encrypted_json_storage.set_str(record['name'], record['encrypted_content'])
+            if record["encrypted_content"]:
+                encrypted_json_storage.set_str(record["name"], record["encrypted_content"])
             else:
-                json_storage.set(record['name'], record['content'])
+                json_storage.set(record["name"], record["content"])
+    def delete(self):
+        self.fileStorage.delete()
+        json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
+        json_storage.clean()

mindsdb/utilities/render/sqlalchemy_render.py CHANGED Viewed

@@ -47,7 +47,7 @@ def _compile_interval(element, compiler, **kw):
         if items[1].upper().endswith("S"):
             items[1] = items[1][:-1]
-    if compiler.dialect.driver in ["snowflake"] or compiler.dialect.name in ["postgresql"]:
+    if getattr(compiler.dialect, "driver", None) == "snowflake" or compiler.dialect.name == "postgresql":
         # quote all
         args = " ".join(map(str, items))
         args = f"'{args}'"

MindsDB 25.9.1.2__py3-none-any.whl → 25.9.2.0a1__py3-none-any.whl

Potentially problematic release.

MindsDB 25.9.1.2py3-none-any.whl → 25.9.2.0a1py3-none-any.whl