MindsDB 25.7.1.0__py3-none-any.whl → 25.7.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +54 -95
- mindsdb/api/a2a/agent.py +30 -206
- mindsdb/api/a2a/common/server/server.py +26 -27
- mindsdb/api/a2a/task_manager.py +93 -227
- mindsdb/api/a2a/utils.py +21 -0
- mindsdb/api/executor/command_executor.py +7 -2
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
- mindsdb/api/executor/utilities/sql.py +97 -21
- mindsdb/api/http/namespaces/agents.py +127 -202
- mindsdb/api/http/namespaces/config.py +12 -1
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +11 -1
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +94 -1
- mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +4 -3
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -3
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
- mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
- mindsdb/integrations/libs/keyword_search_base.py +41 -0
- mindsdb/integrations/libs/vectordatabase_handler.py +35 -14
- mindsdb/integrations/utilities/sql_utils.py +11 -0
- mindsdb/interfaces/agents/agents_controller.py +2 -2
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +18 -4
- mindsdb/interfaces/database/projects.py +1 -3
- mindsdb/interfaces/functions/controller.py +54 -64
- mindsdb/interfaces/functions/to_markdown.py +47 -14
- mindsdb/interfaces/knowledge_base/controller.py +134 -35
- mindsdb/interfaces/knowledge_base/evaluate.py +53 -10
- mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
- mindsdb/utilities/config.py +46 -39
- mindsdb/utilities/exception.py +11 -0
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/METADATA +236 -236
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/RECORD +38 -36
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/top_level.txt +0 -0
|
@@ -6,13 +6,14 @@ from duckdb import InvalidInputException
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
|
|
8
8
|
from mindsdb_sql_parser import parse_sql
|
|
9
|
-
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
10
|
-
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
11
9
|
from mindsdb_sql_parser.ast import ASTNode, Select, Identifier, Function, Constant
|
|
12
|
-
from mindsdb.utilities.functions import resolve_table_identifier, resolve_model_identifier
|
|
13
10
|
|
|
11
|
+
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
14
12
|
from mindsdb.utilities import log
|
|
13
|
+
from mindsdb.utilities.exception import format_db_error_message
|
|
14
|
+
from mindsdb.utilities.functions import resolve_table_identifier, resolve_model_identifier
|
|
15
15
|
from mindsdb.utilities.json_encoder import CustomJSONEncoder
|
|
16
|
+
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
16
17
|
|
|
17
18
|
logger = log.getLogger(__name__)
|
|
18
19
|
|
|
@@ -64,29 +65,85 @@ def query_df_with_type_infer_fallback(query_str: str, dataframes: dict, user_fun
|
|
|
64
65
|
pandas.columns
|
|
65
66
|
"""
|
|
66
67
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
user_functions
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
68
|
+
try:
|
|
69
|
+
with duckdb.connect(database=":memory:") as con:
|
|
70
|
+
if user_functions:
|
|
71
|
+
user_functions.register(con)
|
|
72
|
+
|
|
73
|
+
for name, value in dataframes.items():
|
|
74
|
+
con.register(name, value)
|
|
75
|
+
|
|
76
|
+
exception = None
|
|
77
|
+
for sample_size in [1000, 10000, 1000000]:
|
|
78
|
+
try:
|
|
79
|
+
con.execute(f"set global pandas_analyze_sample={sample_size};")
|
|
80
|
+
result_df = con.execute(query_str).fetchdf()
|
|
81
|
+
except InvalidInputException as e:
|
|
82
|
+
exception = e
|
|
83
|
+
else:
|
|
84
|
+
break
|
|
81
85
|
else:
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
+
raise exception
|
|
87
|
+
description = con.description
|
|
88
|
+
except Exception as e:
|
|
89
|
+
raise Exception(
|
|
90
|
+
format_db_error_message(db_type="DuckDB", db_error_msg=str(e), failed_query=query_str, is_external=False)
|
|
91
|
+
) from e
|
|
86
92
|
|
|
87
93
|
return result_df, description
|
|
88
94
|
|
|
89
95
|
|
|
96
|
+
_duckdb_functions_and_kw_list = None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_duckdb_functions_and_kw_list() -> list[str] | None:
|
|
100
|
+
"""Returns a list of all functions and keywords supported by DuckDB.
|
|
101
|
+
The list is merge of:
|
|
102
|
+
- list of duckdb's functions: 'select * from duckdb_functions()' or 'pragma functions'
|
|
103
|
+
- ist of keywords, because of some functions are just sintax-sugar
|
|
104
|
+
and not present in the duckdb_functions (like 'if()').
|
|
105
|
+
- hardcoded list of window_functions, because there are no way to get if from duckdb,
|
|
106
|
+
and they are not present in the duckdb_functions()
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
list[str] | None: List of supported functions and keywords, or None if unable to retrieve the list.
|
|
110
|
+
"""
|
|
111
|
+
global _duckdb_functions_and_kw_list
|
|
112
|
+
window_functions_list = [
|
|
113
|
+
"cume_dist",
|
|
114
|
+
"dense_rank",
|
|
115
|
+
"first_value",
|
|
116
|
+
"lag",
|
|
117
|
+
"last_value",
|
|
118
|
+
"lead",
|
|
119
|
+
"nth_value",
|
|
120
|
+
"ntile",
|
|
121
|
+
"percent_rank",
|
|
122
|
+
"rank_dense",
|
|
123
|
+
"rank",
|
|
124
|
+
"row_number",
|
|
125
|
+
]
|
|
126
|
+
if _duckdb_functions_and_kw_list is None:
|
|
127
|
+
try:
|
|
128
|
+
df, _ = query_df_with_type_infer_fallback(
|
|
129
|
+
"""
|
|
130
|
+
select distinct name
|
|
131
|
+
from (
|
|
132
|
+
select function_name as name from duckdb_functions()
|
|
133
|
+
union all
|
|
134
|
+
select keyword_name as name from duckdb_keywords()
|
|
135
|
+
) ta;
|
|
136
|
+
""",
|
|
137
|
+
dataframes={},
|
|
138
|
+
)
|
|
139
|
+
df.columns = [name.lower() for name in df.columns]
|
|
140
|
+
_duckdb_functions_and_kw_list = df["name"].drop_duplicates().str.lower().to_list() + window_functions_list
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.warning(f"Unable to get DuckDB functions list: {e}")
|
|
143
|
+
|
|
144
|
+
return _duckdb_functions_and_kw_list
|
|
145
|
+
|
|
146
|
+
|
|
90
147
|
def query_df(df, query, session=None):
|
|
91
148
|
"""Perform simple query ('select' from one table, without subqueries and joins) on DataFrame.
|
|
92
149
|
|
|
@@ -100,8 +157,10 @@ def query_df(df, query, session=None):
|
|
|
100
157
|
|
|
101
158
|
if isinstance(query, str):
|
|
102
159
|
query_ast = parse_sql(query)
|
|
160
|
+
query_str = query
|
|
103
161
|
else:
|
|
104
162
|
query_ast = copy.deepcopy(query)
|
|
163
|
+
query_str = str(query)
|
|
105
164
|
|
|
106
165
|
if isinstance(query_ast, Select) is False or isinstance(query_ast.from_table, Identifier) is False:
|
|
107
166
|
raise Exception("Only 'SELECT from TABLE' statements supported for internal query")
|
|
@@ -125,6 +184,7 @@ def query_df(df, query, session=None):
|
|
|
125
184
|
return node
|
|
126
185
|
if isinstance(node, Function):
|
|
127
186
|
fnc_name = node.op.lower()
|
|
187
|
+
|
|
128
188
|
if fnc_name == "database" and len(node.args) == 0:
|
|
129
189
|
if session is not None:
|
|
130
190
|
cur_db = session.database
|
|
@@ -142,6 +202,22 @@ def query_df(df, query, session=None):
|
|
|
142
202
|
if user_functions is not None:
|
|
143
203
|
user_functions.check_function(node)
|
|
144
204
|
|
|
205
|
+
duckdb_functions_and_kw_list = get_duckdb_functions_and_kw_list() or []
|
|
206
|
+
custom_functions_list = [] if user_functions is None else list(user_functions.functions.keys())
|
|
207
|
+
all_functions_list = duckdb_functions_and_kw_list + custom_functions_list
|
|
208
|
+
if len(all_functions_list) > 0 and fnc_name not in all_functions_list:
|
|
209
|
+
raise Exception(
|
|
210
|
+
format_db_error_message(
|
|
211
|
+
db_type="DuckDB",
|
|
212
|
+
db_error_msg=(
|
|
213
|
+
f"Unknown function: '{fnc_name}'. This function is not recognized during internal query processing.\n"
|
|
214
|
+
"Please use DuckDB-supported functions instead."
|
|
215
|
+
),
|
|
216
|
+
failed_query=query_str,
|
|
217
|
+
is_external=False,
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
|
|
145
221
|
query_traversal(query_ast, adapt_query)
|
|
146
222
|
|
|
147
223
|
# convert json columns
|