PyPI - MindsDB - Versions diffs - 25.7.2.0__py3-none-any.whl → 25.7.4.0__py3-none-any.whl - Mend

MindsDB 25.7.2.0py3-none-any.whl → 25.7.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (69) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +1 -1
mindsdb/api/a2a/common/server/server.py +16 -6
mindsdb/api/executor/command_executor.py +213 -137
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
mindsdb/api/executor/planner/plan_join.py +3 -0
mindsdb/api/executor/planner/plan_join_ts.py +117 -100
mindsdb/api/executor/planner/query_planner.py +1 -0
mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
mindsdb/api/http/initialize.py +16 -43
mindsdb/api/http/namespaces/agents.py +24 -21
mindsdb/api/http/namespaces/chatbots.py +83 -120
mindsdb/api/http/namespaces/file.py +1 -1
mindsdb/api/http/namespaces/jobs.py +38 -60
mindsdb/api/http/namespaces/tree.py +69 -61
mindsdb/api/mcp/start.py +2 -0
mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -76
mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +16 -3
mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
mindsdb/integrations/handlers/salesforce_handler/constants.py +208 -0
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +142 -81
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -4
mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
mindsdb/integrations/utilities/handler_utils.py +32 -12
mindsdb/interfaces/agents/agents_controller.py +169 -110
mindsdb/interfaces/agents/langchain_agent.py +10 -3
mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -8
mindsdb/interfaces/database/database.py +38 -13
mindsdb/interfaces/database/integrations.py +20 -5
mindsdb/interfaces/database/projects.py +63 -16
mindsdb/interfaces/database/views.py +86 -60
mindsdb/interfaces/jobs/jobs_controller.py +103 -110
mindsdb/interfaces/knowledge_base/controller.py +33 -5
mindsdb/interfaces/knowledge_base/evaluate.py +53 -9
mindsdb/interfaces/knowledge_base/executor.py +24 -0
mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
mindsdb/interfaces/query_context/context_controller.py +100 -133
mindsdb/interfaces/skills/skills_controller.py +18 -6
mindsdb/interfaces/storage/db.py +40 -6
mindsdb/interfaces/variables/variables_controller.py +8 -15
mindsdb/utilities/config.py +3 -3
mindsdb/utilities/functions.py +72 -60
mindsdb/utilities/log.py +38 -6
mindsdb/utilities/ps.py +7 -7
{mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/METADATA +262 -263
{mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/RECORD +69 -68
{mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/WHEEL +0 -0
{mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/top_level.txt +0 -0

mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py CHANGED Viewed

@@ -5,7 +5,19 @@ from urllib.parse import urlparse
 import pandas as pd
 import psycopg
-from mindsdb_sql_parser.ast import Parameter, Identifier, Update, BinaryOperation
+from mindsdb_sql_parser.ast import (
+    Parameter,
+    Identifier,
+    BinaryOperation,
+    Tuple as AstTuple,
+    Constant,
+    Select,
+    OrderBy,
+    TypeCast,
+    Delete,
+    Update,
+    Function,
+)
 from pgvector.psycopg import register_vector
 from mindsdb.integrations.handlers.postgres_handler.postgres_handler import (
@@ -17,6 +29,7 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
     VectorStoreHandler,
     DistanceFunction,
     TableField,
+    FilterOperator,
 )
 from mindsdb.integrations.libs.keyword_search_base import KeywordSearchBase
 from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs
@@ -169,31 +182,42 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
         embedding_condition = None
         for condition in conditions:
+            is_embedding = condition.column == "embeddings"
             parts = condition.column.split(".")
-            key = parts[0]
+            key = Identifier(parts[0])
             # converts 'col.el1.el2' to col->'el1'->>'el2'
             if len(parts) > 1:
                 # intermediate elements
                 for el in parts[1:-1]:
-                    key += f" -> '{el}'"
+                    key = BinaryOperation(op="->", args=[key, Constant(el)])
                 # last element
-                key += f" ->> '{parts[-1]}'"
+                key = BinaryOperation(op="->>", args=[key, Constant(parts[-1])])
             type_cast = None
-            if isinstance(condition.value, int):
+            value = condition.value
+            if (
+                isinstance(value, list)
+                and len(value) > 0
+                and condition.op in (FilterOperator.IN, FilterOperator.NOT_IN)
+            ):
+                value = condition.value[0]
+            if isinstance(value, int):
                 type_cast = "int"
-            elif isinstance(condition.value, float):
+            elif isinstance(value, float):
                 type_cast = "float"
             if type_cast is not None:
-                key = f"({key})::{type_cast}"
+                key = TypeCast(type_cast, key)
             item = {
                 "name": key,
                 "op": condition.op.value,
                 "value": condition.value,
             }
-            if key == "embeddings":
+            if is_embedding:
                 embedding_condition = item
             else:
                 filter_conditions.append(item)
@@ -205,64 +229,24 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
         """
         Construct where clauses from filter conditions
         """
-        if filter_conditions is None:
-            return ""
-        where_clauses = []
+        where_clause = None
         for item in filter_conditions:
             key = item["name"]
             if item["op"].lower() in ("in", "not in"):
-                values = list(repr(i) for i in item["value"])
-                item["value"] = "({})".format(", ".join(values))
+                values = [Constant(i) for i in item["value"]]
+                value = AstTuple(values)
             else:
-                if item["value"] is None:
-                    item["value"] = "null"
-                else:
-                    item["value"] = repr(item["value"])
-            where_clauses.append(f"{key} {item['op']} {item['value']}")
-        if len(where_clauses) > 1:
-            return f"WHERE {' AND '.join(where_clauses)}"
-        elif len(where_clauses) == 1:
-            return f"WHERE {where_clauses[0]}"
-        else:
-            return ""
-    @staticmethod
-    def _construct_where_clause_with_keywords(filter_conditions=None, keyword_query=None, content_column_name=None):
-        if not keyword_query or not content_column_name:
-            return PgVectorHandler._construct_where_clause(filter_conditions)
-        keyword_query_condition = (
-            f"""to_tsvector('english', {content_column_name}) @@ websearch_to_tsquery('english', '{keyword_query}')"""
-        )
-        if filter_conditions is None:
-            return ""
-        where_clauses = []
-        for item in filter_conditions:
-            key = item["name"]
+                value = Constant(item["value"])
+            condition = BinaryOperation(op=item["op"], args=[key, value])
-            if item["op"].lower() in ("in", "not in"):
-                values = list(repr(i) for i in item["value"])
-                item["value"] = "({})".format(", ".join(values))
+            if where_clause is None:
+                where_clause = condition
             else:
-                if item["value"] is None:
-                    item["value"] = "null"
-                else:
-                    item["value"] = repr(item["value"])
-            where_clauses.append(f"{key} {item['op']} {item['value']}")
-        where_clauses.append(keyword_query_condition)
-        if len(where_clauses) > 1:
-            return f"WHERE {' AND '.join(where_clauses)}"
-        elif len(where_clauses) == 1:
-            return f"WHERE {where_clauses[0]}"
-        else:
-            return ""
+                where_clause = BinaryOperation(op="AND", args=[where_clause, condition])
+        return where_clause
     @staticmethod
     def _construct_full_after_from_clause(
@@ -275,9 +259,8 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
     def _build_keyword_bm25_query(
         self,
         table_name: str,
-        query: str,
+        keyword_search_args: KeywordSearchArgs,
         columns: List[str] = None,
-        content_column_name: str = "content",
         conditions: List[FilterCondition] = None,
         limit: int = None,
         offset: int = None,
@@ -286,21 +269,44 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
             columns = ["id", "content", "metadata"]
         filter_conditions, _ = self._translate_conditions(conditions)
+        where_clause = self._construct_where_clause(filter_conditions)
-        # given filter conditions, construct where clause
-        where_clause = self._construct_where_clause_with_keywords(filter_conditions, query, content_column_name)
-        query = f"""
-            SELECT
-                {", ".join(columns)},
-                ts_rank_cd(to_tsvector('english', {content_column_name}), websearch_to_tsquery('english', '{query}')) as distance
-            FROM
-                {table_name}
-            {where_clause if where_clause else ""}
-            {f"LIMIT {limit}" if limit else ""}
-            {f"OFFSET {offset}" if offset else ""};"""
+        if keyword_search_args:
+            keyword_query_condition = BinaryOperation(
+                op="@@",
+                args=[
+                    Function("to_tsvector", args=[Constant("english"), Identifier(keyword_search_args.column)]),
+                    Function("websearch_to_tsquery", args=[Constant("english"), Constant(keyword_search_args.query)]),
+                ],
+            )
-        return query
+            if where_clause:
+                where_clause = BinaryOperation(op="AND", args=[where_clause, keyword_query_condition])
+            else:
+                where_clause = keyword_query_condition
+        distance = Function(
+            "ts_rank_cd",
+            args=[
+                Function("to_tsvector", args=[Constant("english"), Identifier(keyword_search_args.column)]),
+                Function("websearch_to_tsquery", args=[Constant("english"), Constant(keyword_search_args.query)]),
+            ],
+            alias=Identifier("distance"),
+        )
+        targets = [Identifier(col) for col in columns]
+        targets.append(distance)
+        limit_clause = Constant(limit) if limit else None
+        offset_clause = Constant(offset) if offset else None
+        return Select(
+            targets=targets,
+            from_table=Identifier(table_name),
+            where=where_clause,
+            limit=limit_clause,
+            offset=offset_clause,
+        )
     def _build_select_query(
         self,
@@ -309,12 +315,12 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
         conditions: List[FilterCondition] = None,
         limit: int = None,
         offset: int = None,
-    ) -> str:
+    ) -> Select:
         """
         given inputs, build string query
         """
-        limit_clause = f"LIMIT {limit}" if limit else ""
-        offset_clause = f"OFFSET {offset}" if offset else ""
+        limit_clause = Constant(limit) if limit else None
+        offset_clause = Constant(offset) if offset else None
         # translate filter conditions to dictionary
         filter_conditions, embedding_search = self._translate_conditions(conditions)
@@ -335,7 +341,15 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
             modified_columns = ["id", "content", "embeddings", "metadata"]
             has_distance = True
-        targets = ", ".join(modified_columns)
+        targets = [Identifier(col) for col in modified_columns]
+        query = Select(
+            targets=targets,
+            from_table=Identifier(table_name),
+            where=where_clause,
+            limit=limit_clause,
+            offset=offset_clause,
+        )
         if embedding_search:
             search_vector = embedding_search["value"]
@@ -352,15 +366,18 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
                 if isinstance(search_vector, list):
                     search_vector = f"[{','.join(str(x) for x in search_vector)}]"
+            vector_op = BinaryOperation(
+                op=self.distance_op,
+                args=[Identifier("embeddings"), Constant(search_vector)],
+                alias=Identifier("distance"),
+            )
             # Calculate distance as part of the query if needed
             if has_distance:
-                targets = f"{targets}, (embeddings {self.distance_op} '{search_vector}') as distance"
+                query.targets.append(vector_op)
-            return f"SELECT {targets} FROM {table_name} {where_clause} ORDER BY embeddings {self.distance_op} '{search_vector}' ASC {limit_clause} {offset_clause} "
+            query.order_by = [OrderBy(vector_op, direction="ASC")]
-        else:
-            # if filter conditions, return rows that satisfy the conditions
-            return f"SELECT {targets} FROM {table_name} {where_clause} {limit_clause} {offset_clause}"
+        return query
     def _check_table(self, table_name: str):
         # Apply namespace for a user
@@ -386,8 +403,8 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
             columns = ["id", "content", "embeddings", "metadata"]
         query = self._build_select_query(table_name, columns, conditions, limit, offset)
-        result = self.raw_query(query)
+        query_str = self.renderer.get_string(query, with_failback=True)
+        result = self.raw_query(query_str)
         # ensure embeddings are returned as string so they can be parsed by mindsdb
         if "embeddings" in columns:
@@ -408,12 +425,10 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
         if columns is None:
             columns = ["id", "content", "embeddings", "metadata"]
-        content_column_name = keyword_search_args.column
-        query = self._build_keyword_bm25_query(
-            table_name, keyword_search_args.query, columns, content_column_name, conditions, limit, offset
-        )
-        result = self.raw_query(query)
+        query = self._build_keyword_bm25_query(table_name, keyword_search_args, columns, conditions, limit, offset)
+        query_str = self.renderer.get_string(query, with_failback=True)
+        result = self.raw_query(query_str)
         # ensure embeddings are returned as string so they can be parsed by mindsdb
         if "embeddings" in columns:
@@ -622,8 +637,9 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
         filter_conditions, _ = self._translate_conditions(conditions)
         where_clause = self._construct_where_clause(filter_conditions)
-        query = f"DELETE FROM {table_name} {where_clause}"
-        self.raw_query(query)
+        query = Delete(table=Identifier(table_name), where=where_clause)
+        query_str = self.renderer.get_string(query, with_failback=True)
+        self.raw_query(query_str)
     def drop_table(self, table_name: str, if_exists=True):
         """

mindsdb/integrations/handlers/postgres_handler/postgres_handler.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import csv
-import io
 import time
 import json
 from typing import Optional, Any
@@ -625,7 +623,7 @@ class PostgresHandler(MetaDatabaseHandler):
         result = self.native_query(query)
         return result
-    def meta_get_column_statistics(self, table_names: Optional[list] = None) -> dict:
+    def meta_get_column_statistics(self, table_names: Optional[list] = None) -> Response:
         """
         Retrieves column statistics (e.g., most common values, frequencies, null percentage, and distinct value count)
         for the specified tables or all tables if no list is provided.
@@ -634,54 +632,58 @@ class PostgresHandler(MetaDatabaseHandler):
             table_names (list): A list of table names for which to retrieve column statistics.
         Returns:
-            dict: A dictionary containing the column statistics.
+            Response: A response object containing the column statistics.
         """
-        query = """
+        table_filter = ""
+        if table_names is not None and len(table_names) > 0:
+            quoted_names = [f"'{t}'" for t in table_names]
+            table_filter = f" AND ps.tablename IN ({','.join(quoted_names)})"
+        query = (
+            """
             SELECT
-                ps.attname AS column_name,
-                ps.tablename AS table_name,
-                ps.most_common_vals AS most_common_values,
-                ps.most_common_freqs::text AS most_common_frequencies,
-                ps.null_frac * 100 AS null_percentage,
-                ps.n_distinct AS distinct_values_count,
-                ps.histogram_bounds AS histogram_bounds
+                ps.tablename AS TABLE_NAME,
+                ps.attname AS COLUMN_NAME,
+                ROUND(ps.null_frac::numeric * 100, 2) AS NULL_PERCENTAGE,
+                CASE
+                    WHEN ps.n_distinct < 0 THEN NULL
+                    ELSE ps.n_distinct::bigint
+                END AS DISTINCT_VALUES_COUNT,
+                ps.most_common_vals AS MOST_COMMON_VALUES,
+                ps.most_common_freqs AS MOST_COMMON_FREQUENCIES,
+                ps.histogram_bounds
             FROM pg_stats ps
             WHERE ps.schemaname = current_schema()
             AND ps.tablename NOT LIKE 'pg_%'
             AND ps.tablename NOT LIKE 'sql_%'
         """
-        if table_names is not None and len(table_names) > 0:
-            table_names = [f"'{t}'" for t in table_names]
-            query += f" AND ps.tablename IN ({','.join(table_names)})"
+            + table_filter
+            + """
+            ORDER BY ps.tablename, ps.attname
+        """
+        )
         result = self.native_query(query)
-        df = result.data_frame
-        def parse_pg_array_string(x):
-            try:
-                return (
-                    [item.strip(" ,") for row in csv.reader(io.StringIO(x.strip("{}"))) for item in row if item.strip()]
-                    if x
-                    else []
-                )
-            except IndexError:
-                logger.error(f"Error parsing PostgreSQL array string: {x}")
-                return []
-        # Convert most_common_values and most_common_frequencies from string representation to lists.
-        df["most_common_values"] = df["most_common_values"].apply(lambda x: parse_pg_array_string(x))
-        df["most_common_frequencies"] = df["most_common_frequencies"].apply(lambda x: parse_pg_array_string(x))
-        # Get the minimum and maximum values from the histogram bounds.
-        df["minimum_value"] = df["histogram_bounds"].apply(lambda x: parse_pg_array_string(x)[0] if x else None)
-        df["maximum_value"] = df["histogram_bounds"].apply(lambda x: parse_pg_array_string(x)[-1] if x else None)
-        # Handle cases where distinct_values_count is negative (indicating an approximation).
-        df["distinct_values_count"] = df["distinct_values_count"].apply(lambda x: x if x >= 0 else None)
+        if result.type == RESPONSE_TYPE.TABLE and result.data_frame is not None:
+            df = result.data_frame
-        result.data_frame = df.drop(columns=["histogram_bounds"])
+            # Extract min/max from histogram bounds
+            def extract_min_max(histogram_str):
+                if histogram_str and str(histogram_str) != "nan":
+                    clean = str(histogram_str).strip("{}")
+                    if clean:
+                        values = clean.split(",")
+                        min_val = values[0].strip(" \"'") if values else None
+                        max_val = values[-1].strip(" \"'") if values else None
+                        return min_val, max_val
+                return None, None
+            min_max_values = df["histogram_bounds"].apply(extract_min_max)
+            df["MINIMUM_VALUE"] = min_max_values.apply(lambda x: x[0])
+            df["MAXIMUM_VALUE"] = min_max_values.apply(lambda x: x[1])
+        result.data_frame = df.drop(columns=["histogram_bounds"])
         return result
     def meta_get_primary_keys(self, table_names: Optional[list] = None) -> Response:

MindsDB 25.7.2.0__py3-none-any.whl → 25.7.4.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.7.2.0py3-none-any.whl → 25.7.4.0py3-none-any.whl