MindsDB 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (69) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +28 -25
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/executor/command_executor.py +69 -14
  5. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  6. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  7. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  8. mindsdb/api/executor/planner/plan_join.py +67 -77
  9. mindsdb/api/executor/planner/query_planner.py +176 -155
  10. mindsdb/api/executor/planner/steps.py +37 -12
  11. mindsdb/api/executor/sql_query/result_set.py +45 -64
  12. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  13. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  14. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  15. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  16. mindsdb/api/executor/utilities/sql.py +42 -48
  17. mindsdb/api/http/namespaces/config.py +1 -1
  18. mindsdb/api/http/namespaces/file.py +14 -23
  19. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  20. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  21. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  22. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  23. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  24. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  25. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  26. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  27. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
  28. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  29. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  30. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
  31. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  32. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
  33. mindsdb/integrations/libs/api_handler.py +261 -57
  34. mindsdb/integrations/libs/base.py +100 -29
  35. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  36. mindsdb/integrations/utilities/handler_utils.py +23 -8
  37. mindsdb/integrations/utilities/sql_utils.py +35 -40
  38. mindsdb/interfaces/agents/agents_controller.py +196 -192
  39. mindsdb/interfaces/agents/constants.py +7 -1
  40. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  41. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  42. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  43. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  44. mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
  45. mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
  46. mindsdb/interfaces/database/database.py +81 -57
  47. mindsdb/interfaces/database/integrations.py +220 -234
  48. mindsdb/interfaces/database/log.py +72 -104
  49. mindsdb/interfaces/database/projects.py +156 -193
  50. mindsdb/interfaces/file/file_controller.py +21 -65
  51. mindsdb/interfaces/knowledge_base/controller.py +63 -10
  52. mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
  53. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  54. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  55. mindsdb/interfaces/skills/skills_controller.py +54 -36
  56. mindsdb/interfaces/skills/sql_agent.py +109 -86
  57. mindsdb/interfaces/storage/db.py +223 -79
  58. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  59. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  60. mindsdb/utilities/config.py +9 -2
  61. mindsdb/utilities/log.py +35 -26
  62. mindsdb/utilities/ml_task_queue/task.py +19 -22
  63. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  64. mindsdb/utilities/starters.py +40 -0
  65. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +253 -253
  66. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +69 -61
  67. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
  68. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
  69. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Any, List
1
+ from typing import Any, List, Optional
2
2
  import ast as py_ast
3
3
 
4
4
  import pandas as pd
@@ -6,26 +6,29 @@ from mindsdb_sql_parser.ast import ASTNode, Select, Insert, Update, Delete, Star
6
6
  from mindsdb_sql_parser.ast.select.identifier import Identifier
7
7
 
8
8
  from mindsdb.integrations.utilities.sql_utils import (
9
- extract_comparison_conditions, filter_dataframe, sort_dataframe,
10
- FilterCondition, FilterOperator, SortColumn
9
+ extract_comparison_conditions,
10
+ filter_dataframe,
11
+ sort_dataframe,
12
+ FilterCondition,
13
+ FilterOperator,
14
+ SortColumn,
11
15
  )
12
16
  from mindsdb.integrations.libs.base import BaseHandler
13
17
  from mindsdb.integrations.libs.api_handler_exceptions import TableAlreadyExists, TableNotFound
14
18
 
15
- from mindsdb.integrations.libs.response import (
16
- HandlerResponse as Response,
17
- RESPONSE_TYPE
18
- )
19
+ from mindsdb.integrations.libs.response import HandlerResponse as Response, RESPONSE_TYPE
20
+ from mindsdb.utilities import log
19
21
 
20
22
 
21
- class FuncParser:
23
+ logger = log.getLogger("mindsdb")
22
24
 
23
- def from_string(self, query_string):
24
25
 
25
- body = py_ast.parse(query_string.strip(), mode='eval').body
26
+ class FuncParser:
27
+ def from_string(self, query_string):
28
+ body = py_ast.parse(query_string.strip(), mode="eval").body
26
29
 
27
30
  if not isinstance(body, py_ast.Call):
28
- raise RuntimeError(f'Api function not found {query_string}')
31
+ raise RuntimeError(f"Api function not found {query_string}")
29
32
 
30
33
  fnc_name = body.func.id
31
34
 
@@ -39,7 +42,6 @@ class FuncParser:
39
42
  return fnc_name, params
40
43
 
41
44
  def process(self, node):
42
-
43
45
  if isinstance(node, py_ast.List):
44
46
  elements = []
45
47
  for node2 in node.elts:
@@ -47,7 +49,6 @@ class FuncParser:
47
49
  return elements
48
50
 
49
51
  if isinstance(node, py_ast.Dict):
50
-
51
52
  keys = []
52
53
  for node2 in node.keys:
53
54
  if isinstance(node2, py_ast.Constant):
@@ -55,7 +56,7 @@ class FuncParser:
55
56
  elif isinstance(node2, py_ast.Str): # py37
56
57
  value = node2.s
57
58
  else:
58
- raise NotImplementedError(f'Unknown dict key {node2}')
59
+ raise NotImplementedError(f"Unknown dict key {node2}")
59
60
 
60
61
  keys.append(value)
61
62
 
@@ -68,11 +69,11 @@ class FuncParser:
68
69
  if isinstance(node, py_ast.Name):
69
70
  # special attributes
70
71
  name = node.id
71
- if name == 'true':
72
+ if name == "true":
72
73
  return True
73
- elif name == 'false':
74
+ elif name == "false":
74
75
  return False
75
- elif name == 'null':
76
+ elif name == "null":
76
77
  return None
77
78
 
78
79
  if isinstance(node, py_ast.Constant):
@@ -92,11 +93,10 @@ class FuncParser:
92
93
  value = self.process(node.operand)
93
94
  return -value
94
95
 
95
- raise NotImplementedError(f'Unknown node {node}')
96
+ raise NotImplementedError(f"Unknown node {node}")
96
97
 
97
98
 
98
99
  class APITable:
99
-
100
100
  def __init__(self, handler):
101
101
  self.handler = handler
102
102
 
@@ -154,7 +154,6 @@ class APITable:
154
154
 
155
155
 
156
156
  class APIResource(APITable):
157
-
158
157
  def __init__(self, *args, table_name=None, **kwargs):
159
158
  self.table_name = table_name
160
159
  super().__init__(*args, **kwargs)
@@ -179,26 +178,18 @@ class APIResource(APITable):
179
178
  if query.order_by and len(query.order_by) > 0:
180
179
  sort = []
181
180
  for an_order in query.order_by:
182
- sort.append(SortColumn(an_order.field.parts[-1],
183
- an_order.direction.upper() != 'DESC'))
181
+ sort.append(SortColumn(an_order.field.parts[-1], an_order.direction.upper() != "DESC"))
184
182
 
185
183
  targets = []
186
184
  for col in query.targets:
187
185
  if isinstance(col, Identifier):
188
186
  targets.append(col.parts[-1])
189
187
 
190
- kwargs = {
191
- 'conditions': conditions,
192
- 'limit': limit,
193
- 'sort': sort,
194
- 'targets': targets
195
- }
188
+ kwargs = {"conditions": conditions, "limit": limit, "sort": sort, "targets": targets}
196
189
  if self.table_name is not None:
197
- kwargs['table_name'] = self.table_name
190
+ kwargs["table_name"] = self.table_name
198
191
 
199
- result = self.list(
200
- **kwargs
201
- )
192
+ result = self.list(**kwargs)
202
193
 
203
194
  filters = []
204
195
  for cond in conditions:
@@ -216,17 +207,18 @@ class APIResource(APITable):
216
207
  result = sort_dataframe(result, sort_columns)
217
208
 
218
209
  if limit is not None and len(result) > limit:
219
- result = result[:int(limit)]
210
+ result = result[: int(limit)]
220
211
 
221
212
  return result
222
213
 
223
- def list(self,
224
- conditions: List[FilterCondition] = None,
225
- limit: int = None,
226
- sort: List[SortColumn] = None,
227
- targets: List[str] = None,
228
- **kwargs
229
- ):
214
+ def list(
215
+ self,
216
+ conditions: List[FilterCondition] = None,
217
+ limit: int = None,
218
+ sort: List[SortColumn] = None,
219
+ targets: List[str] = None,
220
+ **kwargs,
221
+ ):
230
222
  """
231
223
  List items based on specified conditions, limits, sorting, and targets.
232
224
 
@@ -254,13 +246,10 @@ class APIResource(APITable):
254
246
 
255
247
  columns = [col.name for col in query.columns]
256
248
 
257
- data = [
258
- dict(zip(columns, a_row))
259
- for a_row in query.values
260
- ]
249
+ data = [dict(zip(columns, a_row)) for a_row in query.values]
261
250
  kwargs = {}
262
251
  if self.table_name is not None:
263
- kwargs['table_name'] = self.table_name
252
+ kwargs["table_name"] = self.table_name
264
253
 
265
254
  self.add(data, **kwargs)
266
255
 
@@ -332,10 +321,105 @@ class APIResource(APITable):
332
321
  raise NotImplementedError()
333
322
 
334
323
  def _extract_conditions(self, where: ASTNode) -> List[FilterCondition]:
335
- return [
336
- FilterCondition(i[1], FilterOperator(i[0].upper()), i[2])
337
- for i in extract_comparison_conditions(where)
338
- ]
324
+ return [FilterCondition(i[1], FilterOperator(i[0].upper()), i[2]) for i in extract_comparison_conditions(where)]
325
+
326
+
327
+ class MetaAPIResource(APIResource):
328
+ # TODO: Add a meta_table_info() method in case metadata cannot be retrieved as expected below?
329
+
330
+ def meta_get_tables(self, table_name: str, **kwargs) -> dict:
331
+ """
332
+ Retrieves table metadata for the API resource.
333
+
334
+ Args:
335
+ table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
336
+ kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
337
+
338
+ Returns:
339
+ Dict: The dictionary should contain the following fields:
340
+ - TABLE_NAME (str): Name of the table.
341
+ - TABLE_TYPE (str): Type of the table, e.g. 'BASE TABLE', 'VIEW', etc. (optional).
342
+ - TABLE_SCHEMA (str): Schema of the table (optional).
343
+ - TABLE_DESCRIPTION (str): Description of the table (optional).
344
+ - ROW_COUNT (int): Estimated number of rows in the table (optional).
345
+ """
346
+ pass
347
+
348
+ def meta_get_columns(self, table_name: str, **kwargs) -> List[dict]:
349
+ """
350
+ Retrieves column metadata for the API resource.
351
+
352
+ Args:
353
+ table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
354
+ kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
355
+
356
+ Returns:
357
+ List[dict]: The list should contain dictionaries with the following fields:
358
+ - TABLE_NAME (str): Name of the table.
359
+ - COLUMN_NAME (str): Name of the column.
360
+ - DATA_TYPE (str): Data type of the column, e.g. 'VARCHAR', 'INT', etc.
361
+ - COLUMN_DESCRIPTION (str): Description of the column (optional).
362
+ - IS_NULLABLE (bool): Whether the column can contain NULL values (optional).
363
+ - COLUMN_DEFAULT (str): Default value of the column (optional).
364
+ """
365
+ pass
366
+
367
+ def meta_get_column_statistics(self, table_name: str, **kwargs) -> List[dict]:
368
+ """
369
+ Retrieves column statistics for the API resource.
370
+
371
+ Args:
372
+ table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
373
+ kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
374
+
375
+ Returns:
376
+ List[dict]: The list should contain dictionaries with the following fields:
377
+ - TABLE_NAME (str): Name of the table.
378
+ - COLUMN_NAME (str): Name of the column.
379
+ - MOST_COMMON_VALUES (List[str]): Most common values in the column (optional).
380
+ - MOST_COMMON_FREQUENCIES (List[str]): Frequencies of the most common values in the column (optional).
381
+ - NULL_PERCENTAGE: Percentage of NULL values in the column (optional).
382
+ - MINIMUM_VALUE (str): Minimum value in the column (optional).
383
+ - MAXIMUM_VALUE (str): Maximum value in the column (optional).
384
+ - DISTINCT_VALUES_COUNT (int): Count of distinct values in the column (optional).
385
+ """
386
+ pass
387
+
388
+ def meta_get_primary_keys(self, table_name: str, **kwargs) -> List[dict]:
389
+ """
390
+ Retrieves primary key metadata for the API resource.
391
+
392
+ Args:
393
+ table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
394
+ kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
395
+
396
+ Returns:
397
+ List[dict]: The list should contain dictionaries with the following fields:
398
+ - TABLE_NAME (str): Name of the table.
399
+ - COLUMN_NAME (str): Name of the column that is part of the primary key.
400
+ - ORDINAL_POSITION (int): Position of the column in the primary key (optional).
401
+ - CONSTRAINT_NAME (str): Name of the primary key constraint (optional).
402
+ """
403
+ pass
404
+
405
+ def meta_get_foreign_keys(self, table_name: str, all_tables: List[str], **kwargs) -> List[dict]:
406
+ """
407
+ Retrieves foreign key metadata for the API resource.
408
+
409
+ Args:
410
+ table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
411
+ all_tables (List[str]): A list of all table names in the API resource. This is used to identify relationships between tables.
412
+ kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
413
+
414
+ Returns:
415
+ List[dict]: The list should contain dictionaries with the following fields:
416
+ - PARENT_TABLE_NAME (str): Name of the parent table.
417
+ - PARENT_COLUMN_NAME (str): Name of the parent column that is part of the foreign key.
418
+ - CHILD_TABLE_NAME (str): Name of the child table.
419
+ - CHILD_COLUMN_NAME (str): Name of the child column that is part of the foreign key.
420
+ - CONSTRAINT_NAME (str): Name of the foreign key constraint (optional).
421
+ """
422
+ pass
339
423
 
340
424
 
341
425
  class APIHandler(BaseHandler):
@@ -368,14 +452,13 @@ class APIHandler(BaseHandler):
368
452
  """
369
453
  name = name.parts[-1]
370
454
  if name not in self._tables:
371
- raise TableNotFound(f'Table not found: {name}')
455
+ raise TableNotFound(f"Table not found: {name}")
372
456
  return self._tables[name]
373
457
 
374
458
  def query(self, query: ASTNode):
375
-
376
459
  if isinstance(query, Select):
377
460
  table = self._get_table(query.from_table)
378
- if not hasattr(table, 'list'):
461
+ if not hasattr(table, "list"):
379
462
  # for back compatibility, targets wasn't passed in previous version
380
463
  query.targets = [Star()]
381
464
  result = self._get_table(query.from_table).select(query)
@@ -406,8 +489,8 @@ class APIHandler(BaseHandler):
406
489
 
407
490
  result = self._get_table(Identifier(table_name)).get_columns()
408
491
 
409
- df = pd.DataFrame(result, columns=['Field'])
410
- df['Type'] = 'str'
492
+ df = pd.DataFrame(result, columns=["Field"])
493
+ df["Type"] = "str"
411
494
 
412
495
  return Response(RESPONSE_TYPE.TABLE, df)
413
496
 
@@ -419,14 +502,135 @@ class APIHandler(BaseHandler):
419
502
  """
420
503
  result = list(self._tables.keys())
421
504
 
422
- df = pd.DataFrame(result, columns=['table_name'])
423
- df['table_type'] = 'BASE TABLE'
505
+ df = pd.DataFrame(result, columns=["table_name"])
506
+ df["table_type"] = "BASE TABLE"
424
507
 
425
508
  return Response(RESPONSE_TYPE.TABLE, df)
426
509
 
427
510
 
428
- class APIChatHandler(APIHandler):
511
+ class MetaAPIHandler(APIHandler):
512
+ """
513
+ Base class for handlers associated to the applications APIs (e.g. twitter, slack, discord etc.)
429
514
 
515
+ This class is used when the handler is also needed to store information in the data catalog.
516
+ """
517
+
518
+ def meta_get_tables(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
519
+ """
520
+ Retrieves metadata for the specified tables (or all tables if no list is provided).
521
+
522
+ Args:
523
+ table_names (List): A list of table names for which to retrieve metadata.
524
+ kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
525
+
526
+ Returns:
527
+ Response: A response object containing the table metadata.
528
+ """
529
+ df = pd.DataFrame()
530
+ for table_name, table_class in self._tables.items():
531
+ if table_names is None or table_name in table_names:
532
+ try:
533
+ if hasattr(table_class, "meta_get_tables"):
534
+ table_metadata = table_class.meta_get_tables(table_name, **kwargs)
535
+ df = pd.concat([df, pd.DataFrame([table_metadata])], ignore_index=True)
536
+ except Exception as e:
537
+ logger.error(f"Error retrieving metadata for table {table_name}: {e}")
538
+
539
+ return Response(RESPONSE_TYPE.TABLE, df)
540
+
541
+ def meta_get_columns(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
542
+ """
543
+ Retrieves column metadata for the specified tables (or all tables if no list is provided).
544
+
545
+ Args:
546
+ table_names (List): A list of table names for which to retrieve column metadata.
547
+
548
+ Returns:
549
+ Response: A response object containing the column metadata.
550
+ """
551
+ df = pd.DataFrame()
552
+ for table_name, table_class in self._tables.items():
553
+ if table_names is None or table_name in table_names:
554
+ try:
555
+ if hasattr(table_class, "meta_get_columns"):
556
+ column_metadata = table_class.meta_get_columns(table_name, **kwargs)
557
+ df = pd.concat([df, pd.DataFrame(column_metadata)], ignore_index=True)
558
+ except Exception as e:
559
+ logger.error(f"Error retrieving column metadata for table {table_name}: {e}")
560
+
561
+ return Response(RESPONSE_TYPE.TABLE, df)
562
+
563
+ def meta_get_column_statistics(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
564
+ """
565
+ Retrieves column statistics for the specified tables (or all tables if no list is provided).
566
+
567
+ Args:
568
+ table_names (List): A list of table names for which to retrieve column statistics.
569
+
570
+ Returns:
571
+ Response: A response object containing the column statistics.
572
+ """
573
+ df = pd.DataFrame()
574
+ for table_name, table_class in self._tables.items():
575
+ if table_names is None or table_name in table_names:
576
+ try:
577
+ if hasattr(table_class, "meta_get_column_statistics"):
578
+ column_statistics = table_class.meta_get_column_statistics(table_name, **kwargs)
579
+ df = pd.concat([df, pd.DataFrame(column_statistics)], ignore_index=True)
580
+ except Exception as e:
581
+ logger.error(f"Error retrieving column statistics for table {table_name}: {e}")
582
+
583
+ return Response(RESPONSE_TYPE.TABLE, df)
584
+
585
+ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
586
+ """
587
+ Retrieves primary key metadata for the specified tables (or all tables if no list is provided).
588
+
589
+ Args:
590
+ table_names (List): A list of table names for which to retrieve primary key metadata.
591
+
592
+ Returns:
593
+ Response: A response object containing the primary key metadata.
594
+ """
595
+ df = pd.DataFrame()
596
+ for table_name, table_class in self._tables.items():
597
+ if table_names is None or table_name in table_names:
598
+ try:
599
+ if hasattr(table_class, "meta_get_primary_keys"):
600
+ primary_key_metadata = table_class.meta_get_primary_keys(table_name, **kwargs)
601
+ df = pd.concat([df, pd.DataFrame(primary_key_metadata)], ignore_index=True)
602
+ except Exception as e:
603
+ logger.error(f"Error retrieving primary keys for table {table_name}: {e}")
604
+
605
+ return Response(RESPONSE_TYPE.TABLE, df)
606
+
607
+ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
608
+ """
609
+ Retrieves foreign key metadata for the specified tables (or all tables if no list is provided).
610
+
611
+ Args:
612
+ table_names (List): A list of table names for which to retrieve foreign key metadata.
613
+
614
+ Returns:
615
+ Response: A response object containing the foreign key metadata.
616
+ """
617
+ df = pd.DataFrame()
618
+ all_tables = list(self._tables.keys())
619
+ for table_name, table_class in self._tables.items():
620
+ if table_names is None or table_name in table_names:
621
+ try:
622
+ if hasattr(table_class, "meta_get_foreign_keys"):
623
+ foreign_key_metadata = table_class.meta_get_foreign_keys(
624
+ table_name, all_tables=table_names if table_names else all_tables, **kwargs
625
+ )
626
+ df = pd.concat([df, pd.DataFrame(foreign_key_metadata)], ignore_index=True)
627
+ except Exception as e:
628
+ logger.error(f"Error retrieving foreign keys for table {table_name}: {e}")
629
+
630
+ return Response(RESPONSE_TYPE.TABLE, df)
631
+
632
+
633
+ class APIChatHandler(APIHandler):
430
634
  def get_chat_config(self):
431
635
  """Return configuration to connect to chatbot
432
636
 
@@ -14,14 +14,14 @@ logger = log.getLogger(__name__)
14
14
 
15
15
 
16
16
  class BaseHandler:
17
- """ Base class for database handlers
17
+ """Base class for database handlers
18
18
 
19
19
  Base class for handlers that associate a source of information with the
20
20
  broader MindsDB ecosystem via SQL commands.
21
21
  """
22
22
 
23
23
  def __init__(self, name: str):
24
- """ constructor
24
+ """constructor
25
25
  Args:
26
26
  name (str): the handler name
27
27
  """
@@ -29,7 +29,7 @@ class BaseHandler:
29
29
  self.name = name
30
30
 
31
31
  def connect(self):
32
- """ Set up any connections required by the handler
32
+ """Set up any connections required by the handler
33
33
 
34
34
  Should return connection
35
35
 
@@ -37,7 +37,7 @@ class BaseHandler:
37
37
  raise NotImplementedError()
38
38
 
39
39
  def disconnect(self):
40
- """ Close any existing connections
40
+ """Close any existing connections
41
41
 
42
42
  Should switch self.is_connected.
43
43
  """
@@ -45,7 +45,7 @@ class BaseHandler:
45
45
  return
46
46
 
47
47
  def check_connection(self) -> HandlerStatusResponse:
48
- """ Check connection to the handler
48
+ """Check connection to the handler
49
49
 
50
50
  Returns:
51
51
  HandlerStatusResponse
@@ -77,7 +77,7 @@ class BaseHandler:
77
77
  raise NotImplementedError()
78
78
 
79
79
  def get_tables(self) -> HandlerResponse:
80
- """ Return list of entities
80
+ """Return list of entities
81
81
 
82
82
  Return list of entities that will be accesible as tables.
83
83
 
@@ -89,7 +89,7 @@ class BaseHandler:
89
89
  raise NotImplementedError()
90
90
 
91
91
  def get_columns(self, table_name: str) -> HandlerResponse:
92
- """ Returns a list of entity columns
92
+ """Returns a list of entity columns
93
93
 
94
94
  Args:
95
95
  table_name (str): name of one of tables returned by self.get_tables()
@@ -113,6 +113,91 @@ class DatabaseHandler(BaseHandler):
113
113
  super().__init__(name)
114
114
 
115
115
 
116
+ class MetaDatabaseHandler(DatabaseHandler):
117
+ """
118
+ Base class for handlers associated to data storage systems (e.g. databases, data warehouses, streaming services, etc.)
119
+
120
+ This class is used when the handler is also needed to store information in the data catalog.
121
+ This information is typically avaiable in the information schema or system tables of the database.
122
+ """
123
+
124
+ def __init__(self, name: str):
125
+ super().__init__(name)
126
+
127
+ def meta_get_tables(self, table_names: Optional[List[str]]) -> HandlerResponse:
128
+ """
129
+ Returns metadata information about the tables to be stored in the data catalog.
130
+
131
+ Returns:
132
+ HandlerResponse: The response should consist of the following columns:
133
+ - TABLE_NAME (str): Name of the table.
134
+ - TABLE_TYPE (str): Type of the table, e.g. 'BASE TABLE', 'VIEW', etc. (optional).
135
+ - TABLE_SCHEMA (str): Schema of the table (optional).
136
+ - TABLE_DESCRIPTION (str): Description of the table (optional).
137
+ - ROW_COUNT (int): Estimated number of rows in the table (optional).
138
+ """
139
+ raise NotImplementedError()
140
+
141
+ def meta_get_columns(self, table_names: Optional[List[str]]) -> HandlerResponse:
142
+ """
143
+ Returns metadata information about the columns in the tables to be stored in the data catalog.
144
+
145
+ Returns:
146
+ HandlerResponse: The response should consist of the following columns:
147
+ - TABLE_NAME (str): Name of the table.
148
+ - COLUMN_NAME (str): Name of the column.
149
+ - DATA_TYPE (str): Data type of the column, e.g. 'VARCHAR', 'INT', etc.
150
+ - COLUMN_DESCRIPTION (str): Description of the column (optional).
151
+ - IS_NULLABLE (bool): Whether the column can contain NULL values (optional).
152
+ - COLUMN_DEFAULT (str): Default value of the column (optional).
153
+ """
154
+ raise NotImplementedError()
155
+
156
+ def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> HandlerResponse:
157
+ """
158
+ Returns metadata statisical information about the columns in the tables to be stored in the data catalog.
159
+
160
+ Returns:
161
+ HandlerResponse: The response should consist of the following columns:
162
+ - TABLE_NAME (str): Name of the table.
163
+ - COLUMN_NAME (str): Name of the column.
164
+ - MOST_COMMON_VALUES (List[str]): Most common values in the column (optional).
165
+ - MOST_COMMON_FREQUENCIES (List[str]): Frequencies of the most common values in the column (optional).
166
+ - NULL_PERCENTAGE: Percentage of NULL values in the column (optional).
167
+ - MINIMUM_VALUE (str): Minimum value in the column (optional).
168
+ - MAXIMUM_VALUE (str): Maximum value in the column (optional).
169
+ - DISTINCT_VALUES_COUNT (int): Count of distinct values in the column (optional).
170
+ """
171
+ raise NotImplementedError()
172
+
173
+ def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> HandlerResponse:
174
+ """
175
+ Returns metadata information about the primary keys in the tables to be stored in the data catalog.
176
+
177
+ Returns:
178
+ HandlerResponse: The response should consist of the following columns:
179
+ - TABLE_NAME (str): Name of the table.
180
+ - COLUMN_NAME (str): Name of the column that is part of the primary key.
181
+ - ORDINAL_POSITION (int): Position of the column in the primary key (optional).
182
+ - CONSTRAINT_NAME (str): Name of the primary key constraint (optional).
183
+ """
184
+ raise NotImplementedError()
185
+
186
+ def meta_get_foreign_keys(self, table_names: Optional[List[str]]) -> HandlerResponse:
187
+ """
188
+ Returns metadata information about the foreign keys in the tables to be stored in the data catalog.
189
+
190
+ Returns:
191
+ HandlerResponse: The response should consist of the following columns:
192
+ - PARENT_TABLE_NAME (str): Name of the parent table.
193
+ - PARENT_COLUMN_NAME (str): Name of the parent column that is part of the foreign key.
194
+ - CHILD_TABLE_NAME (str): Name of the child table.
195
+ - CHILD_COLUMN_NAME (str): Name of the child column that is part of the foreign key.
196
+ - CONSTRAINT_NAME (str): Name of the foreign key constraint (optional).
197
+ """
198
+ raise NotImplementedError()
199
+
200
+
116
201
  class ArgProbeMixin:
117
202
  """
118
203
  A mixin class that provides probing of arguments that
@@ -154,26 +239,16 @@ class ArgProbeMixin:
154
239
  self.visit(node.value)
155
240
 
156
241
  def visit_Subscript(self, node):
157
- if (
158
- isinstance(node.value, ast.Name)
159
- and node.value.id in self.var_names_to_track
160
- ):
161
- if isinstance(node.slice, ast.Index) and isinstance(
162
- node.slice.value, ast.Str
163
- ):
242
+ if isinstance(node.value, ast.Name) and node.value.id in self.var_names_to_track:
243
+ if isinstance(node.slice, ast.Index) and isinstance(node.slice.value, ast.Str):
164
244
  self.arg_keys.append({"name": node.slice.value.s, "required": True})
165
245
  self.generic_visit(node)
166
246
 
167
247
  def visit_Call(self, node):
168
248
  if isinstance(node.func, ast.Attribute) and node.func.attr == "get":
169
- if (
170
- isinstance(node.func.value, ast.Name)
171
- and node.func.value.id in self.var_names_to_track
172
- ):
249
+ if isinstance(node.func.value, ast.Name) and node.func.value.id in self.var_names_to_track:
173
250
  if isinstance(node.args[0], ast.Str):
174
- self.arg_keys.append(
175
- {"name": node.args[0].s, "required": False}
176
- )
251
+ self.arg_keys.append({"name": node.args[0].s, "required": False})
177
252
  self.generic_visit(node)
178
253
 
179
254
  @classmethod
@@ -197,9 +272,7 @@ class ArgProbeMixin:
197
272
  try:
198
273
  source_code = self.get_source_code(method_name)
199
274
  except Exception as e:
200
- logger.error(
201
- f"Failed to get source code of method {method_name} in {self.__class__.__name__}. Reason: {e}"
202
- )
275
+ logger.error(f"Failed to get source code of method {method_name} in {self.__class__.__name__}. Reason: {e}")
203
276
  return []
204
277
 
205
278
  # parse the source code
@@ -238,9 +311,7 @@ class ArgProbeMixin:
238
311
  """
239
312
  method = getattr(self, method_name)
240
313
  if method is None:
241
- raise Exception(
242
- f"Method {method_name} does not exist in {self.__class__.__name__}"
243
- )
314
+ raise Exception(f"Method {method_name} does not exist in {self.__class__.__name__}")
244
315
  source_code = inspect.getsource(method)
245
316
  return source_code
246
317
 
@@ -288,8 +359,8 @@ class BaseMLEngine(ArgProbeMixin):
288
359
  self.engine_storage = engine_storage
289
360
  self.generative = False # if True, the target column name does not have to be specified at creation time
290
361
 
291
- if kwargs.get('base_model_storage'):
292
- self.base_model_storage = kwargs['base_model_storage'] # available when updating a model
362
+ if kwargs.get("base_model_storage"):
363
+ self.base_model_storage = kwargs["base_model_storage"] # available when updating a model
293
364
  else:
294
365
  self.base_model_storage = None
295
366