MindsDB 25.6.2.0__py3-none-any.whl → 25.6.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (35) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +25 -4
  3. mindsdb/api/a2a/task_manager.py +68 -6
  4. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
  5. mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
  6. mindsdb/api/executor/utilities/sql.py +18 -19
  7. mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
  8. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
  9. mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -1
  10. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
  11. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +3 -0
  12. mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
  13. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +94 -8
  14. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +19 -1
  15. mindsdb/integrations/libs/api_handler.py +19 -1
  16. mindsdb/integrations/libs/base.py +86 -2
  17. mindsdb/interfaces/agents/agents_controller.py +32 -6
  18. mindsdb/interfaces/agents/constants.py +1 -0
  19. mindsdb/interfaces/agents/mindsdb_database_agent.py +27 -34
  20. mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -6
  21. mindsdb/interfaces/data_catalog/data_catalog_reader.py +4 -0
  22. mindsdb/interfaces/database/integrations.py +4 -2
  23. mindsdb/interfaces/knowledge_base/controller.py +29 -24
  24. mindsdb/interfaces/knowledge_base/evaluate.py +0 -3
  25. mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +17 -86
  26. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +28 -3
  27. mindsdb/interfaces/skills/skills_controller.py +0 -23
  28. mindsdb/interfaces/skills/sql_agent.py +9 -5
  29. mindsdb/interfaces/storage/db.py +20 -4
  30. mindsdb/utilities/config.py +5 -1
  31. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/METADATA +247 -247
  32. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/RECORD +35 -35
  33. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/WHEEL +0 -0
  34. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/licenses/LICENSE +0 -0
  35. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,28 @@
1
- from typing import Text, Dict, Any
1
+ from google.cloud.bigquery import Client, QueryJobConfig
2
2
  from google.api_core.exceptions import BadRequest
3
+ import pandas as pd
3
4
  from sqlalchemy_bigquery.base import BigQueryDialect
4
- from google.cloud.bigquery import Client, QueryJobConfig
5
+ from typing import Any, Dict, Optional, Text
5
6
 
6
7
  from mindsdb.utilities import log
7
8
  from mindsdb_sql_parser.ast.base import ASTNode
8
- from mindsdb.integrations.libs.base import DatabaseHandler
9
+ from mindsdb.integrations.libs.base import MetaDatabaseHandler
9
10
  from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
10
11
  from mindsdb.integrations.utilities.handlers.auth_utilities.google import GoogleServiceAccountOAuth2Manager
11
12
  from mindsdb.integrations.libs.response import (
12
13
  HandlerStatusResponse as StatusResponse,
13
14
  HandlerResponse as Response,
14
- RESPONSE_TYPE
15
+ RESPONSE_TYPE,
15
16
  )
16
17
 
17
18
  logger = log.getLogger(__name__)
18
19
 
19
20
 
20
- class BigQueryHandler(DatabaseHandler):
21
+ class BigQueryHandler(MetaDatabaseHandler):
21
22
  """
22
23
  This handler handles connection and execution of Google BigQuery statements.
23
24
  """
25
+
24
26
  name = "bigquery"
25
27
 
26
28
  def __init__(self, name: Text, connection_data: Dict, **kwargs: Any):
@@ -49,19 +51,16 @@ class BigQueryHandler(DatabaseHandler):
49
51
  return self.connection
50
52
 
51
53
  # Mandatory connection parameters
52
- if not all(key in self.connection_data for key in ['project_id', 'dataset']):
53
- raise ValueError('Required parameters (project_id, dataset) must be provided.')
54
+ if not all(key in self.connection_data for key in ["project_id", "dataset"]):
55
+ raise ValueError("Required parameters (project_id, dataset) must be provided.")
54
56
 
55
57
  google_sa_oauth2_manager = GoogleServiceAccountOAuth2Manager(
56
- credentials_file=self.connection_data.get('service_account_keys'),
57
- credentials_json=self.connection_data.get('service_account_json')
58
+ credentials_file=self.connection_data.get("service_account_keys"),
59
+ credentials_json=self.connection_data.get("service_account_json"),
58
60
  )
59
61
  credentials = google_sa_oauth2_manager.get_oauth2_credentials()
60
62
 
61
- client = Client(
62
- project=self.connection_data["project_id"],
63
- credentials=credentials
64
- )
63
+ client = Client(project=self.connection_data["project_id"], credentials=credentials)
65
64
  self.is_connected = True
66
65
  self.connection = client
67
66
  return self.connection
@@ -86,14 +85,14 @@ class BigQueryHandler(DatabaseHandler):
86
85
 
87
86
  try:
88
87
  connection = self.connect()
89
- connection.query('SELECT 1;')
88
+ connection.query("SELECT 1;")
90
89
 
91
90
  # Check if the dataset exists
92
- connection.get_dataset(self.connection_data['dataset'])
91
+ connection.get_dataset(self.connection_data["dataset"])
93
92
 
94
93
  response.success = True
95
94
  except (BadRequest, ValueError) as e:
96
- logger.error(f'Error connecting to BigQuery {self.connection_data["project_id"]}, {e}!')
95
+ logger.error(f"Error connecting to BigQuery {self.connection_data['project_id']}, {e}!")
97
96
  response.error_message = e
98
97
 
99
98
  if response.success is False and self.is_connected is True:
@@ -113,22 +112,18 @@ class BigQueryHandler(DatabaseHandler):
113
112
  """
114
113
  connection = self.connect()
115
114
  try:
116
- job_config = QueryJobConfig(default_dataset=f"{self.connection_data['project_id']}.{self.connection_data['dataset']}")
115
+ job_config = QueryJobConfig(
116
+ default_dataset=f"{self.connection_data['project_id']}.{self.connection_data['dataset']}"
117
+ )
117
118
  query = connection.query(query, job_config=job_config)
118
119
  result = query.to_dataframe()
119
120
  if not result.empty:
120
- response = Response(
121
- RESPONSE_TYPE.TABLE,
122
- result
123
- )
121
+ response = Response(RESPONSE_TYPE.TABLE, result)
124
122
  else:
125
123
  response = Response(RESPONSE_TYPE.OK)
126
124
  except Exception as e:
127
- logger.error(f'Error running query: {query} on {self.connection_data["project_id"]}!')
128
- response = Response(
129
- RESPONSE_TYPE.ERROR,
130
- error_message=str(e)
131
- )
125
+ logger.error(f"Error running query: {query} on {self.connection_data['project_id']}!")
126
+ response = Response(RESPONSE_TYPE.ERROR, error_message=str(e))
132
127
  return response
133
128
 
134
129
  def query(self, query: ASTNode) -> Response:
@@ -154,7 +149,7 @@ class BigQueryHandler(DatabaseHandler):
154
149
  """
155
150
  query = f"""
156
151
  SELECT table_name, table_schema, table_type
157
- FROM `{self.connection_data['project_id']}.{self.connection_data['dataset']}.INFORMATION_SCHEMA.TABLES`
152
+ FROM `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES`
158
153
  WHERE table_type IN ('BASE TABLE', 'VIEW')
159
154
  """
160
155
  result = self.native_query(query)
@@ -174,8 +169,204 @@ class BigQueryHandler(DatabaseHandler):
174
169
  """
175
170
  query = f"""
176
171
  SELECT column_name AS Field, data_type as Type
177
- FROM `{self.connection_data['project_id']}.{self.connection_data['dataset']}.INFORMATION_SCHEMA.COLUMNS`
172
+ FROM `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS`
178
173
  WHERE table_name = '{table_name}'
179
174
  """
180
175
  result = self.native_query(query)
181
176
  return result
177
+
178
+ def meta_get_tables(self, table_names: Optional[list] = None) -> Response:
179
+ """
180
+ Retrieves table metadata for the specified tables (or all tables if no list is provided).
181
+
182
+ Args:
183
+ table_names (list): A list of table names for which to retrieve metadata information.
184
+
185
+ Returns:
186
+ Response: A response object containing the metadata information, formatted as per the `Response` class.
187
+ """
188
+ query = f"""
189
+ SELECT
190
+ t.table_name,
191
+ t.table_schema,
192
+ t.table_type,
193
+ st.row_count
194
+ FROM
195
+ `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLES` AS t
196
+ JOIN
197
+ `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.__TABLES__` AS st
198
+ ON
199
+ t.table_name = st.table_id
200
+ WHERE
201
+ t.table_type IN ('BASE TABLE', 'VIEW')
202
+ """
203
+
204
+ if table_names is not None and len(table_names) > 0:
205
+ table_names = [f"'{t}'" for t in table_names]
206
+ query += f" AND t.table_name IN ({','.join(table_names)})"
207
+
208
+ result = self.native_query(query)
209
+ return result
210
+
211
+ def meta_get_columns(self, table_names: Optional[list] = None) -> Response:
212
+ """
213
+ Retrieves column metadata for the specified tables (or all tables if no list is provided).
214
+
215
+ Args:
216
+ table_names (list): A list of table names for which to retrieve column metadata.
217
+
218
+ Returns:
219
+ Response: A response object containing the column metadata.
220
+ """
221
+ query = f"""
222
+ SELECT
223
+ table_name,
224
+ column_name,
225
+ data_type,
226
+ column_default,
227
+ CASE is_nullable
228
+ WHEN 'YES' THEN TRUE
229
+ ELSE FALSE
230
+ END AS is_nullable
231
+ FROM
232
+ `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.COLUMNS`
233
+ """
234
+
235
+ if table_names is not None and len(table_names) > 0:
236
+ table_names = [f"'{t}'" for t in table_names]
237
+ query += f" WHERE table_name IN ({','.join(table_names)})"
238
+
239
+ result = self.native_query(query)
240
+ return result
241
+
242
+ def meta_get_column_statistics_for_table(self, table_name: str, columns: list) -> Response:
243
+ """
244
+ Retrieves statistics for the specified columns in a table.
245
+
246
+ Args:
247
+ table_name (str): The name of the table.
248
+ columns (list): A list of column names to retrieve statistics for.
249
+
250
+ Returns:
251
+ Response: A response object containing the column statistics.
252
+ """
253
+ # To avoid hitting BigQuery's query size limits, we will chunk the columns into batches.
254
+ # This is because the queries are combined using UNION ALL, which can lead to very large queries if there are many columns.
255
+ BATCH_SIZE = 20
256
+
257
+ def chunked(lst, n):
258
+ """
259
+ Yields successive n-sized chunks from lst.
260
+ """
261
+ for i in range(0, len(lst), n):
262
+ yield lst[i : i + n]
263
+
264
+ queries = []
265
+ for column_batch in chunked(columns, BATCH_SIZE):
266
+ batch_queries = []
267
+ for column in column_batch:
268
+ batch_queries.append(
269
+ f"""
270
+ SELECT
271
+ '{table_name}' AS table_name,
272
+ '{column}' AS column_name,
273
+ SAFE_DIVIDE(COUNTIF({column} IS NULL), COUNT(*)) * 100 AS null_percentage,
274
+ CAST(MIN(`{column}`) AS STRING) AS minimum_value,
275
+ CAST(MAX(`{column}`) AS STRING) AS maximum_value,
276
+ COUNT(DISTINCT {column}) AS distinct_values_count
277
+ FROM
278
+ `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.{table_name}`
279
+ """
280
+ )
281
+
282
+ query = " UNION ALL ".join(batch_queries)
283
+ queries.append(query)
284
+
285
+ results = []
286
+ for query in queries:
287
+ try:
288
+ result = self.native_query(query)
289
+ if result.resp_type == RESPONSE_TYPE.TABLE:
290
+ results.append(result.data_frame)
291
+ else:
292
+ logger.error(f"Error retrieving column statistics for table {table_name}: {result.error_message}")
293
+ except Exception as e:
294
+ logger.error(f"Exception occurred while retrieving column statistics for table {table_name}: {e}")
295
+
296
+ if not results:
297
+ logger.warning(f"No column statistics could be retrieved for table {table_name}.")
298
+ return Response(
299
+ RESPONSE_TYPE.ERROR, error_message=f"No column statistics could be retrieved for table {table_name}."
300
+ )
301
+ return Response(RESPONSE_TYPE.TABLE, pd.concat(results, ignore_index=True) if results else pd.DataFrame())
302
+
303
+ def meta_get_primary_keys(self, table_names: Optional[list] = None) -> Response:
304
+ """
305
+ Retrieves primary key information for the specified tables (or all tables if no list is provided).
306
+
307
+ Args:
308
+ table_names (list): A list of table names for which to retrieve primary key information.
309
+
310
+ Returns:
311
+ Response: A response object containing the primary key information.
312
+ """
313
+ query = f"""
314
+ SELECT
315
+ tc.table_name,
316
+ kcu.column_name,
317
+ kcu.ordinal_position,
318
+ tc.constraint_name,
319
+ FROM
320
+ `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc
321
+ JOIN
322
+ `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu
323
+ ON
324
+ tc.constraint_name = kcu.constraint_name
325
+ WHERE
326
+ tc.constraint_type = 'PRIMARY KEY'
327
+ """
328
+
329
+ if table_names is not None and len(table_names) > 0:
330
+ table_names = [f"'{t}'" for t in table_names]
331
+ query += f" AND tc.table_name IN ({','.join(table_names)})"
332
+
333
+ result = self.native_query(query)
334
+ return result
335
+
336
+ def meta_get_foreign_keys(self, table_names: Optional[list] = None) -> Response:
337
+ """
338
+ Retrieves foreign key information for the specified tables (or all tables if no list is provided).
339
+
340
+ Args:
341
+ table_names (list): A list of table names for which to retrieve foreign key information.
342
+
343
+ Returns:
344
+ Response: A response object containing the foreign key information.
345
+ """
346
+ query = f"""
347
+ SELECT
348
+ ccu.table_name AS parent_table_name,
349
+ ccu.column_name AS parent_column_name,
350
+ kcu.table_name AS child_table_name,
351
+ kcu.column_name AS child_column_name,
352
+ tc.constraint_name
353
+ FROM
354
+ `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` AS tc
355
+ JOIN
356
+ `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` AS kcu
357
+ ON
358
+ tc.constraint_name = kcu.constraint_name
359
+ JOIN
360
+ `{self.connection_data["project_id"]}.{self.connection_data["dataset"]}.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE` AS ccu
361
+ ON
362
+ tc.constraint_name = ccu.constraint_name
363
+ WHERE
364
+ tc.constraint_type = 'FOREIGN KEY'
365
+ """
366
+
367
+ if table_names is not None and len(table_names) > 0:
368
+ table_names = [f"'{t}'" for t in table_names]
369
+ query += f" AND tc.table_name IN ({','.join(table_names)})"
370
+
371
+ result = self.native_query(query)
372
+ return result
@@ -1,3 +1,3 @@
1
1
  pyphoenix
2
2
  phoenixdb
3
- protobuf==3.20.3
3
+ protobuf==4.25.8
@@ -1,4 +1,4 @@
1
- llama-index==0.12.21
1
+ llama-index==0.12.28
2
2
  pydantic-settings >= 2.1.0
3
3
  llama-index-readers-web
4
4
  llama-index-embeddings-openai
@@ -179,6 +179,9 @@ class MySQLHandler(DatabaseHandler):
179
179
  config["ssl_cert"] = ssl_cert
180
180
  if ssl_key is not None:
181
181
  config["ssl_key"] = ssl_key
182
+ elif ssl is False:
183
+ config["ssl_disabled"] = True
184
+
182
185
  if "collation" not in config:
183
186
  config["collation"] = "utf8mb4_general_ci"
184
187
  if "use_pure" not in config: