MindsDB 25.6.2.0__py3-none-any.whl → 25.6.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +25 -4
- mindsdb/api/a2a/task_manager.py +68 -6
- mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
- mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +3 -0
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +94 -8
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +19 -1
- mindsdb/integrations/libs/api_handler.py +19 -1
- mindsdb/integrations/libs/base.py +86 -2
- mindsdb/interfaces/agents/agents_controller.py +32 -6
- mindsdb/interfaces/agents/constants.py +1 -0
- mindsdb/interfaces/agents/mindsdb_database_agent.py +23 -18
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -6
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +4 -0
- mindsdb/interfaces/database/integrations.py +4 -2
- mindsdb/interfaces/knowledge_base/controller.py +3 -15
- mindsdb/interfaces/knowledge_base/evaluate.py +0 -3
- mindsdb/interfaces/skills/skills_controller.py +0 -23
- mindsdb/interfaces/skills/sql_agent.py +8 -4
- mindsdb/interfaces/storage/db.py +20 -4
- mindsdb/utilities/config.py +5 -1
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/METADATA +250 -250
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/RECORD +30 -30
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,26 +1,28 @@
|
|
|
1
|
-
from
|
|
1
|
+
from google.cloud.bigquery import Client, QueryJobConfig
|
|
2
2
|
from google.api_core.exceptions import BadRequest
|
|
3
|
+
import pandas as pd
|
|
3
4
|
from sqlalchemy_bigquery.base import BigQueryDialect
|
|
4
|
-
from
|
|
5
|
+
from typing import Any, Dict, Optional, Text
|
|
5
6
|
|
|
6
7
|
from mindsdb.utilities import log
|
|
7
8
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
8
|
-
from mindsdb.integrations.libs.base import
|
|
9
|
+
from mindsdb.integrations.libs.base import MetaDatabaseHandler
|
|
9
10
|
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
10
11
|
from mindsdb.integrations.utilities.handlers.auth_utilities.google import GoogleServiceAccountOAuth2Manager
|
|
11
12
|
from mindsdb.integrations.libs.response import (
|
|
12
13
|
HandlerStatusResponse as StatusResponse,
|
|
13
14
|
HandlerResponse as Response,
|
|
14
|
-
RESPONSE_TYPE
|
|
15
|
+
RESPONSE_TYPE,
|
|
15
16
|
)
|
|
16
17
|
|
|
17
18
|
logger = log.getLogger(__name__)
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
class BigQueryHandler(
|
|
21
|
+
class BigQueryHandler(MetaDatabaseHandler):
|
|
21
22
|
"""
|
|
22
23
|
This handler handles connection and execution of Google BigQuery statements.
|
|
23
24
|
"""
|
|
25
|
+
|
|
24
26
|
name = "bigquery"
|
|
25
27
|
|
|
26
28
|
def __init__(self, name: Text, connection_data: Dict, **kwargs: Any):
|
|
@@ -49,19 +51,16 @@ class BigQueryHandler(DatabaseHandler):
|
|
|
49
51
|
return self.connection
|
|
50
52
|
|
|
51
53
|
# Mandatory connection parameters
|
|
52
|
-
if not all(key in self.connection_data for key in [
|
|
53
|
-
raise ValueError(
|
|
54
|
+
if not all(key in self.connection_data for key in ["project_id", "dataset"]):
|
|
55
|
+
raise ValueError("Required parameters (project_id, dataset) must be provided.")
|
|
54
56
|
|
|
55
57
|
google_sa_oauth2_manager = GoogleServiceAccountOAuth2Manager(
|
|
56
|
-
credentials_file=self.connection_data.get(
|
|
57
|
-
credentials_json=self.connection_data.get(
|
|
58
|
+
credentials_file=self.connection_data.get("service_account_keys"),
|
|
59
|
+
credentials_json=self.connection_data.get("service_account_json"),
|
|
58
60
|
)
|
|
59
61
|
credentials = google_sa_oauth2_manager.get_oauth2_credentials()
|
|
60
62
|
|
|
61
|
-
client = Client(
|
|
62
|
-
project=self.connection_data["project_id"],
|
|
63
|
-
credentials=credentials
|
|
64
|
-
)
|
|
63
|
+
client = Client(project=self.connection_data["project_id"], credentials=credentials)
|
|
65
64
|
self.is_connected = True
|
|
66
65
|
self.connection = client
|
|
67
66
|
return self.connection
|
|
@@ -86,14 +85,14 @@ class BigQueryHandler(DatabaseHandler):
|
|
|
86
85
|
|
|
87
86
|
try:
|
|
88
87
|
connection = self.connect()
|
|
89
|
-
connection.query(
|
|
88
|
+
connection.query("SELECT 1;")
|
|
90
89
|
|
|
91
90
|
# Check if the dataset exists
|
|
92
|
-
connection.get_dataset(self.connection_data[
|
|
91
|
+
connection.get_dataset(self.connection_data["dataset"])
|
|
93
92
|
|
|
94
93
|
response.success = True
|
|
95
94
|
except (BadRequest, ValueError) as e:
|
|
96
|
-
logger.error(f
|
|
95
|
+
logger.error(f"Error connecting to BigQuery {self.connection_data['project_id']}, {e}!")
|
|
97
96
|
response.error_message = e
|
|
98
97
|
|
|
99
98
|
if response.success is False and self.is_connected is True:
|
|
@@ -113,22 +112,18 @@ class BigQueryHandler(DatabaseHandler):
|
|
|
113
112
|
"""
|
|
114
113
|
connection = self.connect()
|
|
115
114
|
try:
|
|
116
|
-
job_config = QueryJobConfig(
|
|
115
|
+
job_config = QueryJobConfig(
|
|
116
|
+
default_dataset=f"{self.connection_data['project_id']}.{self.connection_data['dataset']}"
|
|
117
|
+
)
|
|
117
118
|
query = connection.query(query, job_config=job_config)
|
|
118
119
|
result = query.to_dataframe()
|
|
119
120
|
if not result.empty:
|
|
120
|
-
response = Response(
|
|
121
|
-
RESPONSE_TYPE.TABLE,
|
|
122
|
-
result
|
|
123
|
-
)
|
|
121
|
+
response = Response(RESPONSE_TYPE.TABLE, result)
|
|
124
122
|
else:
|
|
125
123
|
response = Response(RESPONSE_TYPE.OK)
|
|
126
124
|
except Exception as e:
|
|
127
|
-
logger.error(f
|
|
128
|
-
response = Response(
|
|
129
|
-
RESPONSE_TYPE.ERROR,
|
|
130
|
-
error_message=str(e)
|
|
131
|
-
)
|
|
125
|
+
logger.error(f"Error running query: {query} on {self.connection_data['project_id']}!")
|
|
126
|
+
response = Response(RESPONSE_TYPE.ERROR, error_message=str(e))
|
|
132
127
|
return response
|
|
133
128
|
|
|
134
129
|
def query(self, query: ASTNode) -> Response:
|
|
@@ -154,7 +149,7 @@ class BigQueryHandler(DatabaseHandler):
|
|
|
154
149
|
"""
|
|
155
150
|
query = f"""
|
|
156
151
|
SELECT table_name, table_schema, table_type
|
|
157
|
-
FROM `{self.connection_data[
|
|
152
|
+
FROM `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES`
|
|
158
153
|
WHERE table_type IN ('BASE TABLE', 'VIEW')
|
|
159
154
|
"""
|
|
160
155
|
result = self.native_query(query)
|
|
@@ -174,8 +169,204 @@ class BigQueryHandler(DatabaseHandler):
|
|
|
174
169
|
"""
|
|
175
170
|
query = f"""
|
|
176
171
|
SELECT column_name AS Field, data_type as Type
|
|
177
|
-
FROM `{self.connection_data[
|
|
172
|
+
FROM `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS`
|
|
178
173
|
WHERE table_name = '{table_name}'
|
|
179
174
|
"""
|
|
180
175
|
result = self.native_query(query)
|
|
181
176
|
return result
|
|
177
|
+
|
|
178
|
+
def meta_get_tables(self, table_names: Optional[list] = None) -> Response:
|
|
179
|
+
"""
|
|
180
|
+
Retrieves table metadata for the specified tables (or all tables if no list is provided).
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
table_names (list): A list of table names for which to retrieve metadata information.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Response: A response object containing the metadata information, formatted as per the `Response` class.
|
|
187
|
+
"""
|
|
188
|
+
query = f"""
|
|
189
|
+
SELECT
|
|
190
|
+
t.table_name,
|
|
191
|
+
t.table_schema,
|
|
192
|
+
t.table_type,
|
|
193
|
+
st.row_count
|
|
194
|
+
FROM
|
|
195
|
+
`{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES` AS t
|
|
196
|
+
JOIN
|
|
197
|
+
`{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.__TABLES__` AS st
|
|
198
|
+
ON
|
|
199
|
+
t.table_name = st.table_id
|
|
200
|
+
WHERE
|
|
201
|
+
t.table_type IN ('BASE TABLE', 'VIEW')
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
if table_names is not None and len(table_names) > 0:
|
|
205
|
+
table_names = [f"'{t}'" for t in table_names]
|
|
206
|
+
query += f" AND t.table_name IN ({','.join(table_names)})"
|
|
207
|
+
|
|
208
|
+
result = self.native_query(query)
|
|
209
|
+
return result
|
|
210
|
+
|
|
211
|
+
def meta_get_columns(self, table_names: Optional[list] = None) -> Response:
|
|
212
|
+
"""
|
|
213
|
+
Retrieves column metadata for the specified tables (or all tables if no list is provided).
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
table_names (list): A list of table names for which to retrieve column metadata.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Response: A response object containing the column metadata.
|
|
220
|
+
"""
|
|
221
|
+
query = f"""
|
|
222
|
+
SELECT
|
|
223
|
+
table_name,
|
|
224
|
+
column_name,
|
|
225
|
+
data_type,
|
|
226
|
+
column_default,
|
|
227
|
+
CASE is_nullable
|
|
228
|
+
WHEN 'YES' THEN TRUE
|
|
229
|
+
ELSE FALSE
|
|
230
|
+
END AS is_nullable
|
|
231
|
+
FROM
|
|
232
|
+
`{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS`
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
if table_names is not None and len(table_names) > 0:
|
|
236
|
+
table_names = [f"'{t}'" for t in table_names]
|
|
237
|
+
query += f" WHERE table_name IN ({','.join(table_names)})"
|
|
238
|
+
|
|
239
|
+
result = self.native_query(query)
|
|
240
|
+
return result
|
|
241
|
+
|
|
242
|
+
def meta_get_column_statistics_for_table(self, table_name: str, columns: list) -> Response:
|
|
243
|
+
"""
|
|
244
|
+
Retrieves statistics for the specified columns in a table.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
table_name (str): The name of the table.
|
|
248
|
+
columns (list): A list of column names to retrieve statistics for.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Response: A response object containing the column statistics.
|
|
252
|
+
"""
|
|
253
|
+
# To avoid hitting BigQuery's query size limits, we will chunk the columns into batches.
|
|
254
|
+
# This is because the queries are combined using UNION ALL, which can lead to very large queries if there are many columns.
|
|
255
|
+
BATCH_SIZE = 20
|
|
256
|
+
|
|
257
|
+
def chunked(lst, n):
|
|
258
|
+
"""
|
|
259
|
+
Yields successive n-sized chunks from lst.
|
|
260
|
+
"""
|
|
261
|
+
for i in range(0, len(lst), n):
|
|
262
|
+
yield lst[i : i + n]
|
|
263
|
+
|
|
264
|
+
queries = []
|
|
265
|
+
for column_batch in chunked(columns, BATCH_SIZE):
|
|
266
|
+
batch_queries = []
|
|
267
|
+
for column in column_batch:
|
|
268
|
+
batch_queries.append(
|
|
269
|
+
f"""
|
|
270
|
+
SELECT
|
|
271
|
+
'{table_name}' AS table_name,
|
|
272
|
+
'{column}' AS column_name,
|
|
273
|
+
SAFE_DIVIDE(COUNTIF({column} IS NULL), COUNT(*)) * 100 AS null_percentage,
|
|
274
|
+
CAST(MIN(`{column}`) AS STRING) AS minimum_value,
|
|
275
|
+
CAST(MAX(`{column}`) AS STRING) AS maximum_value,
|
|
276
|
+
COUNT(DISTINCT {column}) AS distinct_values_count
|
|
277
|
+
FROM
|
|
278
|
+
`{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.{table_name}`
|
|
279
|
+
"""
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
query = " UNION ALL ".join(batch_queries)
|
|
283
|
+
queries.append(query)
|
|
284
|
+
|
|
285
|
+
results = []
|
|
286
|
+
for query in queries:
|
|
287
|
+
try:
|
|
288
|
+
result = self.native_query(query)
|
|
289
|
+
if result.resp_type == RESPONSE_TYPE.TABLE:
|
|
290
|
+
results.append(result.data_frame)
|
|
291
|
+
else:
|
|
292
|
+
logger.error(f"Error retrieving column statistics for table {table_name}: {result.error_message}")
|
|
293
|
+
except Exception as e:
|
|
294
|
+
logger.error(f"Exception occurred while retrieving column statistics for table {table_name}: {e}")
|
|
295
|
+
|
|
296
|
+
if not results:
|
|
297
|
+
logger.warning(f"No column statistics could be retrieved for table {table_name}.")
|
|
298
|
+
return Response(
|
|
299
|
+
RESPONSE_TYPE.ERROR, error_message=f"No column statistics could be retrieved for table {table_name}."
|
|
300
|
+
)
|
|
301
|
+
return Response(RESPONSE_TYPE.TABLE, pd.concat(results, ignore_index=True) if results else pd.DataFrame())
|
|
302
|
+
|
|
303
|
+
def meta_get_primary_keys(self, table_names: Optional[list] = None) -> Response:
|
|
304
|
+
"""
|
|
305
|
+
Retrieves primary key information for the specified tables (or all tables if no list is provided).
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
table_names (list): A list of table names for which to retrieve primary key information.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
Response: A response object containing the primary key information.
|
|
312
|
+
"""
|
|
313
|
+
query = f"""
|
|
314
|
+
SELECT
|
|
315
|
+
tc.table_name,
|
|
316
|
+
kcu.column_name,
|
|
317
|
+
kcu.ordinal_position,
|
|
318
|
+
tc.constraint_name,
|
|
319
|
+
FROM
|
|
320
|
+
`{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc
|
|
321
|
+
JOIN
|
|
322
|
+
`{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu
|
|
323
|
+
ON
|
|
324
|
+
tc.constraint_name = kcu.constraint_name
|
|
325
|
+
WHERE
|
|
326
|
+
tc.constraint_type = 'PRIMARY KEY'
|
|
327
|
+
"""
|
|
328
|
+
|
|
329
|
+
if table_names is not None and len(table_names) > 0:
|
|
330
|
+
table_names = [f"'{t}'" for t in table_names]
|
|
331
|
+
query += f" AND tc.table_name IN ({','.join(table_names)})"
|
|
332
|
+
|
|
333
|
+
result = self.native_query(query)
|
|
334
|
+
return result
|
|
335
|
+
|
|
336
|
+
def meta_get_foreign_keys(self, table_names: Optional[list] = None) -> Response:
|
|
337
|
+
"""
|
|
338
|
+
Retrieves foreign key information for the specified tables (or all tables if no list is provided).
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
table_names (list): A list of table names for which to retrieve foreign key information.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
Response: A response object containing the foreign key information.
|
|
345
|
+
"""
|
|
346
|
+
query = f"""
|
|
347
|
+
SELECT
|
|
348
|
+
ccu.table_name AS parent_table_name,
|
|
349
|
+
ccu.column_name AS parent_column_name,
|
|
350
|
+
kcu.table_name AS child_table_name,
|
|
351
|
+
kcu.column_name AS child_column_name,
|
|
352
|
+
tc.constraint_name
|
|
353
|
+
FROM
|
|
354
|
+
`{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc
|
|
355
|
+
JOIN
|
|
356
|
+
`{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu
|
|
357
|
+
ON
|
|
358
|
+
tc.constraint_name = kcu.constraint_name
|
|
359
|
+
JOIN
|
|
360
|
+
`{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE` AS ccu
|
|
361
|
+
ON
|
|
362
|
+
tc.constraint_name = ccu.constraint_name
|
|
363
|
+
WHERE
|
|
364
|
+
tc.constraint_type = 'FOREIGN KEY'
|
|
365
|
+
"""
|
|
366
|
+
|
|
367
|
+
if table_names is not None and len(table_names) > 0:
|
|
368
|
+
table_names = [f"'{t}'" for t in table_names]
|
|
369
|
+
query += f" AND tc.table_name IN ({','.join(table_names)})"
|
|
370
|
+
|
|
371
|
+
result = self.native_query(query)
|
|
372
|
+
return result
|
|
@@ -179,6 +179,9 @@ class MySQLHandler(DatabaseHandler):
|
|
|
179
179
|
config["ssl_cert"] = ssl_cert
|
|
180
180
|
if ssl_key is not None:
|
|
181
181
|
config["ssl_key"] = ssl_key
|
|
182
|
+
elif ssl is False:
|
|
183
|
+
config["ssl_disabled"] = True
|
|
184
|
+
|
|
182
185
|
if "collation" not in config:
|
|
183
186
|
config["collation"] = "utf8mb4_general_ci"
|
|
184
187
|
if "use_pure" not in config:
|