MindsDB 25.6.3.1__py3-none-any.whl → 25.7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (55) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/command_executor.py +8 -6
  3. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +72 -44
  4. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +14 -1
  5. mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
  6. mindsdb/api/executor/datahub/datanodes/system_tables.py +314 -1
  7. mindsdb/api/executor/planner/plan_join.py +1 -1
  8. mindsdb/api/executor/planner/query_planner.py +7 -1
  9. mindsdb/api/executor/planner/query_prepare.py +68 -87
  10. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
  11. mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
  12. mindsdb/api/http/namespaces/file.py +49 -24
  13. mindsdb/api/mcp/start.py +45 -31
  14. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
  15. mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
  16. mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
  17. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
  18. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
  19. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
  20. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  21. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
  22. mindsdb/integrations/handlers/ludwig_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
  24. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
  25. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -0
  26. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  27. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  28. mindsdb/integrations/libs/api_handler.py +6 -7
  29. mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
  30. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
  31. mindsdb/interfaces/agents/agents_controller.py +29 -9
  32. mindsdb/interfaces/agents/constants.py +44 -0
  33. mindsdb/interfaces/agents/langchain_agent.py +15 -6
  34. mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
  35. mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
  36. mindsdb/interfaces/data_catalog/data_catalog_reader.py +22 -3
  37. mindsdb/interfaces/knowledge_base/controller.py +121 -102
  38. mindsdb/interfaces/knowledge_base/evaluate.py +19 -7
  39. mindsdb/interfaces/knowledge_base/executor.py +346 -0
  40. mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
  41. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
  42. mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
  43. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +26 -22
  44. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +40 -28
  45. mindsdb/interfaces/skills/skill_tool.py +91 -88
  46. mindsdb/interfaces/skills/sql_agent.py +181 -130
  47. mindsdb/interfaces/storage/db.py +9 -7
  48. mindsdb/utilities/config.py +12 -1
  49. mindsdb/utilities/exception.py +47 -7
  50. mindsdb/utilities/security.py +54 -11
  51. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +239 -251
  52. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +55 -54
  53. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
  54. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
  55. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = "MindsDB"
2
2
  __package_name__ = "mindsdb"
3
- __version__ = "25.6.3.1"
3
+ __version__ = "25.7.1.0"
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = "MindsDB Inc"
@@ -36,6 +36,8 @@ from mindsdb_sql_parser.ast import (
36
36
  Tuple,
37
37
  Function,
38
38
  Variable,
39
+ Intersect,
40
+ Except,
39
41
  )
40
42
 
41
43
  # typed models
@@ -580,9 +582,6 @@ class ExecuteCommands:
580
582
  return ret
581
583
  query = SQLQuery(statement, session=self.session, database=database_name)
582
584
  return self.answer_select(query)
583
- elif statement_type is Union:
584
- query = SQLQuery(statement, session=self.session, database=database_name)
585
- return self.answer_select(query)
586
585
  elif statement_type is Explain:
587
586
  return self.answer_show_columns(statement.target, database_name=database_name)
588
587
  elif statement_type is CreateTable:
@@ -627,6 +626,9 @@ class ExecuteCommands:
627
626
  return self.answer_create_kb_index(statement, database_name)
628
627
  elif statement_type is EvaluateKnowledgeBase:
629
628
  return self.answer_evaluate_kb(statement, database_name)
629
+ elif statement_type in (Union, Intersect, Except):
630
+ query = SQLQuery(statement, session=self.session, database=database_name)
631
+ return self.answer_select(query)
630
632
  else:
631
633
  logger.warning(f"Unknown SQL statement: {sql}")
632
634
  raise NotSupportedYet(f"Unknown SQL statement: {sql}")
@@ -1554,9 +1556,9 @@ class ExecuteCommands:
1554
1556
  if is_full:
1555
1557
  targets.extend(
1556
1558
  [
1557
- Constant("COLLATION", alias=Identifier("Collation")),
1558
- Constant("PRIVILEGES", alias=Identifier("Privileges")),
1559
- Constant("COMMENT", alias=Identifier("Comment")),
1559
+ Constant(None, alias=Identifier("Collation")),
1560
+ Constant("select", alias=Identifier("Privileges")),
1561
+ Constant(None, alias=Identifier("Comment")),
1560
1562
  ]
1561
1563
  )
1562
1564
  new_statement = Select(
@@ -15,12 +15,38 @@ from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
15
15
  from mindsdb.utilities import log
16
16
 
17
17
  from .system_tables import (
18
- SchemataTable, TablesTable, ColumnsTable, EventsTable, RoutinesTable,
19
- PluginsTable, EnginesTable, KeyColumnUsageTable, StatisticsTable,
20
- CharacterSetsTable, CollationsTable)
18
+ SchemataTable,
19
+ TablesTable,
20
+ MetaTablesTable,
21
+ ColumnsTable,
22
+ MetaColumnsTable,
23
+ EventsTable,
24
+ RoutinesTable,
25
+ PluginsTable,
26
+ EnginesTable,
27
+ MetaTableConstraintsTable,
28
+ KeyColumnUsageTable,
29
+ MetaColumnUsageTable,
30
+ StatisticsTable,
31
+ MetaColumnStatisticsTable,
32
+ CharacterSetsTable,
33
+ CollationsTable,
34
+ MetaHandlerInfoTable,
35
+ )
21
36
  from .mindsdb_tables import (
22
- ModelsTable, DatabasesTable, MLEnginesTable, HandlersTable, JobsTable, QueriesTable,
23
- ChatbotsTable, KBTable, SkillsTable, AgentsTable, ViewsTable, TriggersTable)
37
+ ModelsTable,
38
+ DatabasesTable,
39
+ MLEnginesTable,
40
+ HandlersTable,
41
+ JobsTable,
42
+ QueriesTable,
43
+ ChatbotsTable,
44
+ KBTable,
45
+ SkillsTable,
46
+ AgentsTable,
47
+ ViewsTable,
48
+ TriggersTable,
49
+ )
24
50
 
25
51
  from mindsdb.api.executor.datahub.classes.tables_row import TablesRow
26
52
 
@@ -32,12 +58,35 @@ class InformationSchemaDataNode(DataNode):
32
58
  type = "INFORMATION_SCHEMA"
33
59
 
34
60
  tables_list = [
35
- SchemataTable, TablesTable, ColumnsTable, EventsTable, RoutinesTable,
36
- PluginsTable, EnginesTable, KeyColumnUsageTable, StatisticsTable,
37
- CharacterSetsTable, CollationsTable,
38
- ModelsTable, DatabasesTable, MLEnginesTable, HandlersTable, JobsTable,
39
- ChatbotsTable, KBTable, SkillsTable, AgentsTable, ViewsTable, TriggersTable,
40
- QueriesTable
61
+ SchemataTable,
62
+ TablesTable,
63
+ MetaTablesTable,
64
+ ColumnsTable,
65
+ MetaColumnsTable,
66
+ EventsTable,
67
+ RoutinesTable,
68
+ PluginsTable,
69
+ EnginesTable,
70
+ MetaTableConstraintsTable,
71
+ KeyColumnUsageTable,
72
+ MetaColumnUsageTable,
73
+ StatisticsTable,
74
+ MetaColumnStatisticsTable,
75
+ CharacterSetsTable,
76
+ CollationsTable,
77
+ ModelsTable,
78
+ DatabasesTable,
79
+ MLEnginesTable,
80
+ HandlersTable,
81
+ JobsTable,
82
+ ChatbotsTable,
83
+ KBTable,
84
+ SkillsTable,
85
+ AgentsTable,
86
+ ViewsTable,
87
+ TriggersTable,
88
+ QueriesTable,
89
+ MetaHandlerInfoTable,
41
90
  ]
42
91
 
43
92
  def __init__(self, session):
@@ -46,9 +95,7 @@ class InformationSchemaDataNode(DataNode):
46
95
  self.project_controller = ProjectController()
47
96
  self.database_controller = session.database_controller
48
97
 
49
- self.persis_datanodes = {
50
- 'log': self.database_controller.logs_db_controller
51
- }
98
+ self.persis_datanodes = {"log": self.database_controller.logs_db_controller}
52
99
 
53
100
  databases = self.database_controller.get_dict()
54
101
  if "files" in databases:
@@ -69,15 +116,13 @@ class InformationSchemaDataNode(DataNode):
69
116
  if name_lower == "information_schema":
70
117
  return self
71
118
 
72
- if name_lower == 'log':
73
- return self.database_controller.get_system_db('log')
119
+ if name_lower == "log":
120
+ return self.database_controller.get_system_db("log")
74
121
 
75
122
  if name_lower in self.persis_datanodes:
76
123
  return self.persis_datanodes[name_lower]
77
124
 
78
- existing_databases_meta = (
79
- self.database_controller.get_dict()
80
- ) # filter_type='project'
125
+ existing_databases_meta = self.database_controller.get_dict() # filter_type='project'
81
126
  database_name = None
82
127
  for key in existing_databases_meta:
83
128
  if key.lower() == name_lower:
@@ -130,11 +175,9 @@ class InformationSchemaDataNode(DataNode):
130
175
  """
131
176
  table_name = table_name.upper()
132
177
  if table_name not in self.tables:
133
- raise exc.TableNotExistError(
134
- f"Table information_schema.{table_name} does not exists"
135
- )
178
+ raise exc.TableNotExistError(f"Table information_schema.{table_name} does not exists")
136
179
  table_columns_names = self.tables[table_name].columns
137
- df = pd.DataFrame([[table_columns_names]], columns=[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME])
180
+ df = pd.DataFrame(pd.Series(table_columns_names, name=INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME))
138
181
  for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES):
139
182
  if column_name == INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME:
140
183
  continue
@@ -153,9 +196,7 @@ class InformationSchemaDataNode(DataNode):
153
196
  """
154
197
  table_name = table_name.upper()
155
198
  if table_name not in self.tables:
156
- raise exc.TableNotExistError(
157
- f"Table information_schema.{table_name} does not exists"
158
- )
199
+ raise exc.TableNotExistError(f"Table information_schema.{table_name} does not exists")
159
200
  return self.tables[table_name].columns
160
201
 
161
202
  def get_integrations_names(self):
@@ -168,25 +209,16 @@ class InformationSchemaDataNode(DataNode):
168
209
  return [x.lower() for x in projects]
169
210
 
170
211
  def get_tables(self):
171
- return [
172
- TablesRow(TABLE_NAME=name)
173
- for name in self.tables.keys()
174
- ]
212
+ return [TablesRow(TABLE_NAME=name) for name in self.tables.keys()]
175
213
 
176
214
  def get_tree_tables(self):
177
- return {
178
- name: table
179
- for name, table in self.tables.items()
180
- if table.visible
181
- }
215
+ return {name: table for name, table in self.tables.items() if table.visible}
182
216
 
183
217
  def query(self, query: ASTNode, session=None) -> DataHubResponse:
184
218
  query_tables = [x[1] for x in get_query_tables(query)]
185
219
 
186
220
  if len(query_tables) != 1:
187
- raise exc.BadTableError(
188
- f"Only one table can be used in query to information_schema: {query}"
189
- )
221
+ raise exc.BadTableError(f"Only one table can be used in query to information_schema: {query}")
190
222
 
191
223
  table_name = query_tables[0].upper()
192
224
 
@@ -195,7 +227,7 @@ class InformationSchemaDataNode(DataNode):
195
227
 
196
228
  tbl = self.tables[table_name]
197
229
 
198
- if hasattr(tbl, 'get_data'):
230
+ if hasattr(tbl, "get_data"):
199
231
  dataframe = tbl.get_data(query=query, inf_schema=self, session=self.session)
200
232
  else:
201
233
  dataframe = self._get_empty_table(tbl)
@@ -203,11 +235,7 @@ class InformationSchemaDataNode(DataNode):
203
235
 
204
236
  columns_info = [{"name": k, "type": v} for k, v in data.dtypes.items()]
205
237
 
206
- return DataHubResponse(
207
- data_frame=data,
208
- columns=columns_info,
209
- affected_rows=0
210
- )
238
+ return DataHubResponse(data_frame=data, columns=columns_info, affected_rows=0)
211
239
 
212
240
  def _get_empty_table(self, table):
213
241
  columns = table.columns
@@ -20,6 +20,7 @@ from mindsdb.integrations.utilities.utils import get_class_name
20
20
  from mindsdb.metrics import metrics
21
21
  from mindsdb.utilities import log
22
22
  from mindsdb.utilities.profiler import profiler
23
+ from mindsdb.utilities.exception import format_db_error_message
23
24
  from mindsdb.api.executor.datahub.datanodes.system_tables import infer_mysql_type
24
25
 
25
26
  logger = log.getLogger(__name__)
@@ -240,7 +241,19 @@ class IntegrationDataNode(DataNode):
240
241
  raise DBHandlerException(msg) from e
241
242
 
242
243
  if result.type == RESPONSE_TYPE.ERROR:
243
- raise Exception(f"Error in {self.integration_name}: {result.error_message}")
244
+ failed_sql_query = native_query
245
+ if query is not None:
246
+ failed_sql_query = query.to_string()
247
+
248
+ raise Exception(
249
+ format_db_error_message(
250
+ db_name=self.integration_handler.name,
251
+ db_type=self.integration_handler.__class__.name,
252
+ db_error_msg=result.error_message,
253
+ failed_query=failed_sql_query,
254
+ )
255
+ )
256
+
244
257
  if result.type == RESPONSE_TYPE.OK:
245
258
  return DataHubResponse(affected_rows=result.affected_rows)
246
259
 
@@ -154,7 +154,7 @@ class ProjectDataNode(DataNode):
154
154
 
155
155
  return DataHubResponse(data_frame=df, columns=columns_info)
156
156
 
157
- raise EntityNotExistsError(f"Can't select from <{query_table}> in project")
157
+ raise EntityNotExistsError(f"Table '{query_table}' not found in database", self.project.name)
158
158
  else:
159
159
  raise NotImplementedError(f"Query not supported {query}")
160
160
 
@@ -1,4 +1,4 @@
1
- from typing import Optional, Literal
1
+ from typing import Optional, List, Literal
2
2
  from dataclasses import dataclass, fields
3
3
 
4
4
  import pandas as pd
@@ -8,6 +8,7 @@ from mindsdb.utilities import log
8
8
  from mindsdb.utilities.config import config
9
9
  from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
10
10
  from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
11
+ from mindsdb.interfaces.data_catalog.data_catalog_reader import DataCatalogReader
11
12
  from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE, MYSQL_DATA_TYPE_COLUMNS_DEFAULT
12
13
  from mindsdb.api.executor.datahub.classes.tables_row import TABLES_ROW_TYPE, TablesRow
13
14
 
@@ -503,3 +504,315 @@ class CollationsTable(Table):
503
504
 
504
505
  df = pd.DataFrame(data, columns=cls.columns)
505
506
  return df
507
+
508
+
509
+ # Data Catalog tables
510
+ # TODO: Should these be placed in a separate schema?
511
+
512
+
513
+ def _get_records_from_data_catalog(databases: List, tables: Optional[List[str]] = None) -> List:
514
+ """Get records from the data catalog based on the specified databases and tables."""
515
+ # TODO: Should we allow to query all databases?
516
+ if not databases:
517
+ raise ValueError("At least one database must be specified in the query.")
518
+
519
+ records = []
520
+ for database in databases:
521
+ data_catalog_reader = DataCatalogReader(database_name=database, table_names=tables)
522
+ records.extend(data_catalog_reader.read_metadata_as_records())
523
+
524
+ return records
525
+
526
+
527
+ # TODO: Combine with existing 'TablesTable'?
528
+ class MetaTablesTable(Table):
529
+ name = "META_TABLES"
530
+
531
+ columns = ["TABLE_CATALOG", "TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE", "TABLE_DESCRIPTION", "ROW_COUNT"]
532
+
533
+ @classmethod
534
+ def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
535
+ databases, _ = _get_scope(query)
536
+
537
+ records = _get_records_from_data_catalog(databases)
538
+
539
+ data = []
540
+ for record in records:
541
+ item = {
542
+ "TABLE_CATALOG": "def",
543
+ "TABLE_SCHEMA": record.integration.name,
544
+ "TABLE_NAME": record.name,
545
+ "TABLE_TYPE": record.type,
546
+ "TABLE_DESCRIPTION": record.description or "",
547
+ "ROW_COUNT": record.row_count,
548
+ }
549
+ data.append(item)
550
+
551
+ df = pd.DataFrame(data, columns=cls.columns)
552
+ return df
553
+
554
+
555
+ # TODO: Combine with existing 'ColumnsTable'?
556
+ class MetaColumnsTable(Table):
557
+ name = "META_COLUMNS"
558
+
559
+ columns = [
560
+ "TABLE_CATALOG",
561
+ "TABLE_SCHEMA",
562
+ "TABLE_NAME",
563
+ "COLUMN_NAME",
564
+ "DATA_TYPE",
565
+ "COLUMN_DESCRIPTION",
566
+ "COLUMN_DEFAULT",
567
+ "IS_NULLABLE",
568
+ ]
569
+
570
+ @classmethod
571
+ def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
572
+ databases, tables = _get_scope(query)
573
+
574
+ records = _get_records_from_data_catalog(databases, tables)
575
+
576
+ data = []
577
+ for record in records:
578
+ database_name = record.integration.name
579
+ table_name = record.name
580
+ columns = record.meta_columns
581
+
582
+ for column in columns:
583
+ item = {
584
+ "TABLE_CATALOG": "def",
585
+ "TABLE_SCHEMA": database_name,
586
+ "TABLE_NAME": table_name,
587
+ "COLUMN_NAME": column.name,
588
+ "DATA_TYPE": column.data_type,
589
+ "COLUMN_DESCRIPTION": column.description or "",
590
+ "COLUMN_DEFAULT": column.default_value,
591
+ "IS_NULLABLE": "YES" if column.is_nullable else "NO",
592
+ }
593
+ data.append(item)
594
+
595
+ df = pd.DataFrame(data, columns=cls.columns)
596
+ return df
597
+
598
+
599
+ class MetaColumnStatisticsTable(Table):
600
+ name = "META_COLUMN_STATISTICS"
601
+ columns = [
602
+ "TABLE_SCHEMA",
603
+ "TABLE_NAME",
604
+ "COLUMN_NAME",
605
+ "MOST_COMMON_VALS",
606
+ "MOST_COMMON_FREQS",
607
+ "NULL_FRAC",
608
+ "N_DISTINCT",
609
+ "MIN_VALUE",
610
+ "MAX_VALUE",
611
+ ]
612
+
613
+ @classmethod
614
+ def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
615
+ databases, tables = _get_scope(query)
616
+
617
+ records = _get_records_from_data_catalog(databases, tables)
618
+
619
+ data = []
620
+ for record in records:
621
+ database_name = record.integration.name
622
+ table_name = record.name
623
+ columns = record.meta_columns
624
+
625
+ for column in columns:
626
+ column_statistics = column.meta_column_statistics[0] if column.meta_column_statistics else None
627
+
628
+ item = {
629
+ "TABLE_SCHEMA": database_name,
630
+ "TABLE_NAME": table_name,
631
+ "COLUMN_NAME": column.name,
632
+ }
633
+
634
+ if column_statistics:
635
+ item.update(
636
+ {
637
+ "MOST_COMMON_VALS": column_statistics.most_common_values,
638
+ "MOST_COMMON_FREQS": column_statistics.most_common_frequencies,
639
+ "NULL_FRAC": column_statistics.null_percentage,
640
+ "N_DISTINCT": column_statistics.distinct_values_count,
641
+ "MIN_VALUE": column_statistics.minimum_value,
642
+ "MAX_VALUE": column_statistics.maximum_value,
643
+ }
644
+ )
645
+
646
+ data.append(item)
647
+
648
+ df = pd.DataFrame(data, columns=cls.columns)
649
+ return df
650
+
651
+
652
+ class MetaTableConstraintsTable(Table):
653
+ name = "META_TABLE_CONSTRAINTS"
654
+ columns = [
655
+ "CONSTRAINT_CATALOG",
656
+ "CONSTRAINT_SCHEMA",
657
+ "CONSTRAINT_NAME",
658
+ "TABLE_SCHEMA",
659
+ "TABLE_NAME",
660
+ "CONSTRAINT_TYPE",
661
+ "ENFORCED",
662
+ ]
663
+
664
+ @classmethod
665
+ def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
666
+ databases, tables = _get_scope(query)
667
+
668
+ records = _get_records_from_data_catalog(databases, tables)
669
+
670
+ data = []
671
+ for record in records:
672
+ database_name = record.integration.name
673
+ table_name = record.name
674
+ primary_keys = record.meta_primary_keys
675
+ foreign_keys_children = record.meta_foreign_keys_children
676
+ foreign_keys_parents = record.meta_foreign_keys_parents
677
+
678
+ for pk in primary_keys:
679
+ item = {
680
+ "CONSTRAINT_CATALOG": "def",
681
+ "CONSTRAINT_SCHEMA": database_name,
682
+ "CONSTRAINT_NAME": pk.constraint_name,
683
+ "TABLE_SCHEMA": database_name,
684
+ "TABLE_NAME": table_name,
685
+ "CONSTRAINT_TYPE": "PRIMARY KEY",
686
+ }
687
+ data.append(item)
688
+
689
+ for fk in foreign_keys_children:
690
+ item = {
691
+ "CONSTRAINT_CATALOG": "def",
692
+ "CONSTRAINT_SCHEMA": database_name,
693
+ "CONSTRAINT_NAME": fk.constraint_name,
694
+ "TABLE_SCHEMA": database_name,
695
+ "TABLE_NAME": table_name,
696
+ "CONSTRAINT_TYPE": "FOREIGN KEY",
697
+ }
698
+ data.append(item)
699
+
700
+ for fk in foreign_keys_parents:
701
+ item = {
702
+ "CONSTRAINT_CATALOG": "def",
703
+ "CONSTRAINT_SCHEMA": database_name,
704
+ "CONSTRAINT_NAME": fk.constraint_name,
705
+ "TABLE_SCHEMA": database_name,
706
+ "TABLE_NAME": table_name,
707
+ "CONSTRAINT_TYPE": "FOREIGN KEY",
708
+ }
709
+ data.append(item)
710
+
711
+ df = pd.DataFrame(data, columns=cls.columns)
712
+ return df
713
+
714
+
715
+ class MetaColumnUsageTable(Table):
716
+ name = "META_KEY_COLUMN_USAGE"
717
+ columns = [
718
+ "CONSTRAINT_CATALOG",
719
+ "CONSTRAINT_SCHEMA",
720
+ "CONSTRAINT_NAME",
721
+ "TABLE_CATALOG",
722
+ "TABLE_SCHEMA",
723
+ "TABLE_NAME",
724
+ "COLUMN_NAME",
725
+ "ORDINAL_POSITION",
726
+ "POSITION_IN_UNIQUE_CONSTRAINT",
727
+ "REFERENCED_TABLE_SCHEMA",
728
+ "REFERENCED_TABLE_NAME",
729
+ "REFERENCED_COLUMN_NAME",
730
+ ]
731
+
732
+ @classmethod
733
+ def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
734
+ databases, tables = _get_scope(query)
735
+
736
+ records = _get_records_from_data_catalog(databases, tables)
737
+
738
+ data = []
739
+ for record in records:
740
+ database_name = record.integration.name
741
+ table_name = record.name
742
+ primary_keys = record.meta_primary_keys
743
+ foreign_keys_children = record.meta_foreign_keys_children
744
+ foreign_keys_parents = record.meta_foreign_keys_parents
745
+
746
+ for pk in primary_keys:
747
+ column = pk.meta_columns
748
+
749
+ item = {
750
+ "CONSTRAINT_CATALOG": "def",
751
+ "CONSTRAINT_SCHEMA": database_name,
752
+ "CONSTRAINT_NAME": pk.constraint_name,
753
+ "TABLE_CATALOG": "def",
754
+ "TABLE_SCHEMA": database_name,
755
+ "TABLE_NAME": table_name,
756
+ "COLUMN_NAME": column.name,
757
+ "ORDINAL_POSITION": pk.ordinal_position,
758
+ "POSITION_IN_UNIQUE_CONSTRAINT": None,
759
+ "REFERENCED_TABLE_SCHEMA": None,
760
+ "REFERENCED_TABLE_NAME": None,
761
+ "REFERENCED_COLUMN_NAME": None,
762
+ }
763
+ data.append(item)
764
+
765
+ for fk in foreign_keys_children:
766
+ item = {
767
+ "CONSTRAINT_CATALOG": "def",
768
+ "CONSTRAINT_SCHEMA": database_name,
769
+ "CONSTRAINT_NAME": fk.constraint_name,
770
+ "TABLE_CATALOG": "def",
771
+ "TABLE_SCHEMA": database_name,
772
+ "TABLE_NAME": table_name,
773
+ "COLUMN_NAME": fk.child_column.name,
774
+ "ORDINAL_POSITION": None,
775
+ "POSITION_IN_UNIQUE_CONSTRAINT": None,
776
+ "REFERENCED_TABLE_SCHEMA": fk.parent_table.integration.name if fk.parent_table else None,
777
+ "REFERENCED_TABLE_NAME": fk.parent_table.name if fk.parent_table else None,
778
+ "REFERENCED_COLUMN_NAME": fk.parent_column.name if fk.parent_column else None,
779
+ }
780
+ data.append(item)
781
+
782
+ for fk in foreign_keys_parents:
783
+ item = {
784
+ "CONSTRAINT_CATALOG": "def",
785
+ "CONSTRAINT_SCHEMA": database_name,
786
+ "CONSTRAINT_NAME": fk.constraint_name,
787
+ "TABLE_CATALOG": "def",
788
+ "TABLE_SCHEMA": database_name,
789
+ "TABLE_NAME": table_name,
790
+ "COLUMN_NAME": fk.child_column.name,
791
+ "ORDINAL_POSITION": None,
792
+ "POSITION_IN_UNIQUE_CONSTRAINT": None,
793
+ "REFERENCED_TABLE_SCHEMA": fk.child_table.integration.name if fk.child_table else None,
794
+ "REFERENCED_TABLE_NAME": fk.child_table.name if fk.child_table else None,
795
+ "REFERENCED_COLUMN_NAME": fk.parent_column.name if fk.child_column else None,
796
+ }
797
+ data.append(item)
798
+
799
+ df = pd.DataFrame(data, columns=cls.columns)
800
+ return df
801
+
802
+
803
+ class MetaHandlerInfoTable(Table):
804
+ name = "META_HANDLER_INFO"
805
+ columns = ["HANDLER_INFO", "TABLE_SCHEMA"]
806
+
807
+ @classmethod
808
+ def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
809
+ databases, tables = _get_scope(query)
810
+
811
+ data = []
812
+ for database in databases:
813
+ data_catalog_reader = DataCatalogReader(database_name=database, table_names=tables)
814
+ handler_info = data_catalog_reader.get_handler_info()
815
+ data.append({"HANDLER_INFO": str(handler_info), "TABLE_SCHEMA": database})
816
+
817
+ df = pd.DataFrame(data, columns=cls.columns)
818
+ return df
@@ -158,7 +158,7 @@ class PlanJoinTablesQuery:
158
158
  integration = self.planner.default_namespace
159
159
 
160
160
  if integration is None and not hasattr(table, "sub_select"):
161
- raise PlanningException(f"Integration not found for: {table}")
161
+ raise PlanningException(f"Database not found for: {table}")
162
162
 
163
163
  sub_select = getattr(table, "sub_select", None)
164
164
 
@@ -255,12 +255,18 @@ class QueryPlanner:
255
255
 
256
256
  database = self.default_namespace
257
257
 
258
+ err_msg_suffix = ""
258
259
  if len(parts) > 1:
259
260
  if parts[0].lower() in self.databases:
260
261
  database = parts.pop(0).lower()
262
+ else:
263
+ err_msg_suffix = f"'{parts[0].lower()}' is not valid database name."
261
264
 
262
265
  if database is None:
263
- raise PlanningException(f"Integration not found for: {node}")
266
+ raise PlanningException(
267
+ f"Invalid or missing database name for identifier '{node}'. {err_msg_suffix}\n"
268
+ "Query must include a valid database name prefix in format: 'database_name.table_name' or 'database_name.schema_name.table_name'"
269
+ )
264
270
 
265
271
  return database, Identifier(parts=parts, alias=alias)
266
272