MindsDB 25.6.3.1__py3-none-any.whl → 25.6.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +71 -43
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +16 -1
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/system_tables.py +314 -1
- mindsdb/api/executor/planner/plan_join.py +1 -1
- mindsdb/api/executor/planner/query_planner.py +7 -1
- mindsdb/integrations/handlers/ludwig_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -0
- mindsdb/integrations/libs/api_handler.py +6 -7
- mindsdb/interfaces/agents/constants.py +44 -0
- mindsdb/interfaces/agents/langchain_agent.py +8 -1
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +19 -2
- mindsdb/interfaces/knowledge_base/controller.py +6 -13
- mindsdb/interfaces/knowledge_base/evaluate.py +3 -3
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +24 -22
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +40 -28
- mindsdb/interfaces/skills/skill_tool.py +91 -88
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.6.4.0.dist-info}/METADATA +259 -257
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.6.4.0.dist-info}/RECORD +23 -23
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.6.4.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.6.4.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.6.4.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
__title__ = "MindsDB"
|
|
2
2
|
__package_name__ = "mindsdb"
|
|
3
|
-
__version__ = "25.6.
|
|
3
|
+
__version__ = "25.6.4.0"
|
|
4
4
|
__description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
|
|
5
5
|
__email__ = "jorge@mindsdb.com"
|
|
6
6
|
__author__ = "MindsDB Inc"
|
|
@@ -15,12 +15,38 @@ from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
|
|
|
15
15
|
from mindsdb.utilities import log
|
|
16
16
|
|
|
17
17
|
from .system_tables import (
|
|
18
|
-
SchemataTable,
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
SchemataTable,
|
|
19
|
+
TablesTable,
|
|
20
|
+
MetaTablesTable,
|
|
21
|
+
ColumnsTable,
|
|
22
|
+
MetaColumnsTable,
|
|
23
|
+
EventsTable,
|
|
24
|
+
RoutinesTable,
|
|
25
|
+
PluginsTable,
|
|
26
|
+
EnginesTable,
|
|
27
|
+
MetaTableConstraintsTable,
|
|
28
|
+
KeyColumnUsageTable,
|
|
29
|
+
MetaColumnUsageTable,
|
|
30
|
+
StatisticsTable,
|
|
31
|
+
MetaColumnStatisticsTable,
|
|
32
|
+
CharacterSetsTable,
|
|
33
|
+
CollationsTable,
|
|
34
|
+
MetaHandlerInfoTable,
|
|
35
|
+
)
|
|
21
36
|
from .mindsdb_tables import (
|
|
22
|
-
ModelsTable,
|
|
23
|
-
|
|
37
|
+
ModelsTable,
|
|
38
|
+
DatabasesTable,
|
|
39
|
+
MLEnginesTable,
|
|
40
|
+
HandlersTable,
|
|
41
|
+
JobsTable,
|
|
42
|
+
QueriesTable,
|
|
43
|
+
ChatbotsTable,
|
|
44
|
+
KBTable,
|
|
45
|
+
SkillsTable,
|
|
46
|
+
AgentsTable,
|
|
47
|
+
ViewsTable,
|
|
48
|
+
TriggersTable,
|
|
49
|
+
)
|
|
24
50
|
|
|
25
51
|
from mindsdb.api.executor.datahub.classes.tables_row import TablesRow
|
|
26
52
|
|
|
@@ -32,12 +58,35 @@ class InformationSchemaDataNode(DataNode):
|
|
|
32
58
|
type = "INFORMATION_SCHEMA"
|
|
33
59
|
|
|
34
60
|
tables_list = [
|
|
35
|
-
SchemataTable,
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
61
|
+
SchemataTable,
|
|
62
|
+
TablesTable,
|
|
63
|
+
MetaTablesTable,
|
|
64
|
+
ColumnsTable,
|
|
65
|
+
MetaColumnsTable,
|
|
66
|
+
EventsTable,
|
|
67
|
+
RoutinesTable,
|
|
68
|
+
PluginsTable,
|
|
69
|
+
EnginesTable,
|
|
70
|
+
MetaTableConstraintsTable,
|
|
71
|
+
KeyColumnUsageTable,
|
|
72
|
+
MetaColumnUsageTable,
|
|
73
|
+
StatisticsTable,
|
|
74
|
+
MetaColumnStatisticsTable,
|
|
75
|
+
CharacterSetsTable,
|
|
76
|
+
CollationsTable,
|
|
77
|
+
ModelsTable,
|
|
78
|
+
DatabasesTable,
|
|
79
|
+
MLEnginesTable,
|
|
80
|
+
HandlersTable,
|
|
81
|
+
JobsTable,
|
|
82
|
+
ChatbotsTable,
|
|
83
|
+
KBTable,
|
|
84
|
+
SkillsTable,
|
|
85
|
+
AgentsTable,
|
|
86
|
+
ViewsTable,
|
|
87
|
+
TriggersTable,
|
|
88
|
+
QueriesTable,
|
|
89
|
+
MetaHandlerInfoTable,
|
|
41
90
|
]
|
|
42
91
|
|
|
43
92
|
def __init__(self, session):
|
|
@@ -46,9 +95,7 @@ class InformationSchemaDataNode(DataNode):
|
|
|
46
95
|
self.project_controller = ProjectController()
|
|
47
96
|
self.database_controller = session.database_controller
|
|
48
97
|
|
|
49
|
-
self.persis_datanodes = {
|
|
50
|
-
'log': self.database_controller.logs_db_controller
|
|
51
|
-
}
|
|
98
|
+
self.persis_datanodes = {"log": self.database_controller.logs_db_controller}
|
|
52
99
|
|
|
53
100
|
databases = self.database_controller.get_dict()
|
|
54
101
|
if "files" in databases:
|
|
@@ -69,15 +116,13 @@ class InformationSchemaDataNode(DataNode):
|
|
|
69
116
|
if name_lower == "information_schema":
|
|
70
117
|
return self
|
|
71
118
|
|
|
72
|
-
if name_lower ==
|
|
73
|
-
return self.database_controller.get_system_db(
|
|
119
|
+
if name_lower == "log":
|
|
120
|
+
return self.database_controller.get_system_db("log")
|
|
74
121
|
|
|
75
122
|
if name_lower in self.persis_datanodes:
|
|
76
123
|
return self.persis_datanodes[name_lower]
|
|
77
124
|
|
|
78
|
-
existing_databases_meta = (
|
|
79
|
-
self.database_controller.get_dict()
|
|
80
|
-
) # filter_type='project'
|
|
125
|
+
existing_databases_meta = self.database_controller.get_dict() # filter_type='project'
|
|
81
126
|
database_name = None
|
|
82
127
|
for key in existing_databases_meta:
|
|
83
128
|
if key.lower() == name_lower:
|
|
@@ -130,9 +175,7 @@ class InformationSchemaDataNode(DataNode):
|
|
|
130
175
|
"""
|
|
131
176
|
table_name = table_name.upper()
|
|
132
177
|
if table_name not in self.tables:
|
|
133
|
-
raise exc.TableNotExistError(
|
|
134
|
-
f"Table information_schema.{table_name} does not exists"
|
|
135
|
-
)
|
|
178
|
+
raise exc.TableNotExistError(f"Table information_schema.{table_name} does not exists")
|
|
136
179
|
table_columns_names = self.tables[table_name].columns
|
|
137
180
|
df = pd.DataFrame([[table_columns_names]], columns=[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME])
|
|
138
181
|
for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES):
|
|
@@ -153,9 +196,7 @@ class InformationSchemaDataNode(DataNode):
|
|
|
153
196
|
"""
|
|
154
197
|
table_name = table_name.upper()
|
|
155
198
|
if table_name not in self.tables:
|
|
156
|
-
raise exc.TableNotExistError(
|
|
157
|
-
f"Table information_schema.{table_name} does not exists"
|
|
158
|
-
)
|
|
199
|
+
raise exc.TableNotExistError(f"Table information_schema.{table_name} does not exists")
|
|
159
200
|
return self.tables[table_name].columns
|
|
160
201
|
|
|
161
202
|
def get_integrations_names(self):
|
|
@@ -168,25 +209,16 @@ class InformationSchemaDataNode(DataNode):
|
|
|
168
209
|
return [x.lower() for x in projects]
|
|
169
210
|
|
|
170
211
|
def get_tables(self):
|
|
171
|
-
return [
|
|
172
|
-
TablesRow(TABLE_NAME=name)
|
|
173
|
-
for name in self.tables.keys()
|
|
174
|
-
]
|
|
212
|
+
return [TablesRow(TABLE_NAME=name) for name in self.tables.keys()]
|
|
175
213
|
|
|
176
214
|
def get_tree_tables(self):
|
|
177
|
-
return {
|
|
178
|
-
name: table
|
|
179
|
-
for name, table in self.tables.items()
|
|
180
|
-
if table.visible
|
|
181
|
-
}
|
|
215
|
+
return {name: table for name, table in self.tables.items() if table.visible}
|
|
182
216
|
|
|
183
217
|
def query(self, query: ASTNode, session=None) -> DataHubResponse:
|
|
184
218
|
query_tables = [x[1] for x in get_query_tables(query)]
|
|
185
219
|
|
|
186
220
|
if len(query_tables) != 1:
|
|
187
|
-
raise exc.BadTableError(
|
|
188
|
-
f"Only one table can be used in query to information_schema: {query}"
|
|
189
|
-
)
|
|
221
|
+
raise exc.BadTableError(f"Only one table can be used in query to information_schema: {query}")
|
|
190
222
|
|
|
191
223
|
table_name = query_tables[0].upper()
|
|
192
224
|
|
|
@@ -195,7 +227,7 @@ class InformationSchemaDataNode(DataNode):
|
|
|
195
227
|
|
|
196
228
|
tbl = self.tables[table_name]
|
|
197
229
|
|
|
198
|
-
if hasattr(tbl,
|
|
230
|
+
if hasattr(tbl, "get_data"):
|
|
199
231
|
dataframe = tbl.get_data(query=query, inf_schema=self, session=self.session)
|
|
200
232
|
else:
|
|
201
233
|
dataframe = self._get_empty_table(tbl)
|
|
@@ -203,11 +235,7 @@ class InformationSchemaDataNode(DataNode):
|
|
|
203
235
|
|
|
204
236
|
columns_info = [{"name": k, "type": v} for k, v in data.dtypes.items()]
|
|
205
237
|
|
|
206
|
-
return DataHubResponse(
|
|
207
|
-
data_frame=data,
|
|
208
|
-
columns=columns_info,
|
|
209
|
-
affected_rows=0
|
|
210
|
-
)
|
|
238
|
+
return DataHubResponse(data_frame=data, columns=columns_info, affected_rows=0)
|
|
211
239
|
|
|
212
240
|
def _get_empty_table(self, table):
|
|
213
241
|
columns = table.columns
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import time
|
|
2
2
|
import inspect
|
|
3
|
+
from textwrap import dedent
|
|
3
4
|
from dataclasses import astuple
|
|
4
5
|
from typing import Iterable, List
|
|
5
6
|
|
|
@@ -240,7 +241,21 @@ class IntegrationDataNode(DataNode):
|
|
|
240
241
|
raise DBHandlerException(msg) from e
|
|
241
242
|
|
|
242
243
|
if result.type == RESPONSE_TYPE.ERROR:
|
|
243
|
-
|
|
244
|
+
failed_sql_query = native_query
|
|
245
|
+
if query is not None:
|
|
246
|
+
failed_sql_query = query.to_string()
|
|
247
|
+
raise Exception(
|
|
248
|
+
dedent(f"""\
|
|
249
|
+
Failed to execute external database query during query processing.
|
|
250
|
+
|
|
251
|
+
Database Details:
|
|
252
|
+
- Name: {self.integration_handler.name}
|
|
253
|
+
- Type: {self.integration_handler.__class__.name}
|
|
254
|
+
|
|
255
|
+
Error: {result.error_message}
|
|
256
|
+
Failed Query: {failed_sql_query}
|
|
257
|
+
""")
|
|
258
|
+
)
|
|
244
259
|
if result.type == RESPONSE_TYPE.OK:
|
|
245
260
|
return DataHubResponse(affected_rows=result.affected_rows)
|
|
246
261
|
|
|
@@ -154,7 +154,7 @@ class ProjectDataNode(DataNode):
|
|
|
154
154
|
|
|
155
155
|
return DataHubResponse(data_frame=df, columns=columns_info)
|
|
156
156
|
|
|
157
|
-
raise EntityNotExistsError(f"
|
|
157
|
+
raise EntityNotExistsError(f"Table '{query_table}' not found in database", self.project.name)
|
|
158
158
|
else:
|
|
159
159
|
raise NotImplementedError(f"Query not supported {query}")
|
|
160
160
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional, Literal
|
|
1
|
+
from typing import Optional, List, Literal
|
|
2
2
|
from dataclasses import dataclass, fields
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
@@ -8,6 +8,7 @@ from mindsdb.utilities import log
|
|
|
8
8
|
from mindsdb.utilities.config import config
|
|
9
9
|
from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
|
|
10
10
|
from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
|
|
11
|
+
from mindsdb.interfaces.data_catalog.data_catalog_reader import DataCatalogReader
|
|
11
12
|
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE, MYSQL_DATA_TYPE_COLUMNS_DEFAULT
|
|
12
13
|
from mindsdb.api.executor.datahub.classes.tables_row import TABLES_ROW_TYPE, TablesRow
|
|
13
14
|
|
|
@@ -503,3 +504,315 @@ class CollationsTable(Table):
|
|
|
503
504
|
|
|
504
505
|
df = pd.DataFrame(data, columns=cls.columns)
|
|
505
506
|
return df
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
# Data Catalog tables
|
|
510
|
+
# TODO: Should these be placed in a separate schema?
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def _get_records_from_data_catalog(databases: List, tables: Optional[List[str]] = None) -> List:
|
|
514
|
+
"""Get records from the data catalog based on the specified databases and tables."""
|
|
515
|
+
# TODO: Should we allow to query all databases?
|
|
516
|
+
if not databases:
|
|
517
|
+
raise ValueError("At least one database must be specified in the query.")
|
|
518
|
+
|
|
519
|
+
records = []
|
|
520
|
+
for database in databases:
|
|
521
|
+
data_catalog_reader = DataCatalogReader(database_name=database, table_names=tables)
|
|
522
|
+
records.extend(data_catalog_reader.read_metadata_as_records())
|
|
523
|
+
|
|
524
|
+
return records
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
# TODO: Combine with existing 'TablesTable'?
|
|
528
|
+
class MetaTablesTable(Table):
|
|
529
|
+
name = "META_TABLES"
|
|
530
|
+
|
|
531
|
+
columns = ["TABLE_CATALOG", "TABLE_SCHEMA", "TABLE_NAME", "TABLE_TYPE", "TABLE_DESCRIPTION", "ROW_COUNT"]
|
|
532
|
+
|
|
533
|
+
@classmethod
|
|
534
|
+
def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
|
|
535
|
+
databases, _ = _get_scope(query)
|
|
536
|
+
|
|
537
|
+
records = _get_records_from_data_catalog(databases)
|
|
538
|
+
|
|
539
|
+
data = []
|
|
540
|
+
for record in records:
|
|
541
|
+
item = {
|
|
542
|
+
"TABLE_CATALOG": "def",
|
|
543
|
+
"TABLE_SCHEMA": record.integration.name,
|
|
544
|
+
"TABLE_NAME": record.name,
|
|
545
|
+
"TABLE_TYPE": record.type,
|
|
546
|
+
"TABLE_DESCRIPTION": record.description or "",
|
|
547
|
+
"ROW_COUNT": record.row_count,
|
|
548
|
+
}
|
|
549
|
+
data.append(item)
|
|
550
|
+
|
|
551
|
+
df = pd.DataFrame(data, columns=cls.columns)
|
|
552
|
+
return df
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
# TODO: Combine with existing 'ColumnsTable'?
|
|
556
|
+
class MetaColumnsTable(Table):
|
|
557
|
+
name = "META_COLUMNS"
|
|
558
|
+
|
|
559
|
+
columns = [
|
|
560
|
+
"TABLE_CATALOG",
|
|
561
|
+
"TABLE_SCHEMA",
|
|
562
|
+
"TABLE_NAME",
|
|
563
|
+
"COLUMN_NAME",
|
|
564
|
+
"DATA_TYPE",
|
|
565
|
+
"COLUMN_DESCRIPTION",
|
|
566
|
+
"COLUMN_DEFAULT",
|
|
567
|
+
"IS_NULLABLE",
|
|
568
|
+
]
|
|
569
|
+
|
|
570
|
+
@classmethod
|
|
571
|
+
def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
|
|
572
|
+
databases, tables = _get_scope(query)
|
|
573
|
+
|
|
574
|
+
records = _get_records_from_data_catalog(databases, tables)
|
|
575
|
+
|
|
576
|
+
data = []
|
|
577
|
+
for record in records:
|
|
578
|
+
database_name = record.integration.name
|
|
579
|
+
table_name = record.name
|
|
580
|
+
columns = record.meta_columns
|
|
581
|
+
|
|
582
|
+
for column in columns:
|
|
583
|
+
item = {
|
|
584
|
+
"TABLE_CATALOG": "def",
|
|
585
|
+
"TABLE_SCHEMA": database_name,
|
|
586
|
+
"TABLE_NAME": table_name,
|
|
587
|
+
"COLUMN_NAME": column.name,
|
|
588
|
+
"DATA_TYPE": column.data_type,
|
|
589
|
+
"COLUMN_DESCRIPTION": column.description or "",
|
|
590
|
+
"COLUMN_DEFAULT": column.default_value,
|
|
591
|
+
"IS_NULLABLE": "YES" if column.is_nullable else "NO",
|
|
592
|
+
}
|
|
593
|
+
data.append(item)
|
|
594
|
+
|
|
595
|
+
df = pd.DataFrame(data, columns=cls.columns)
|
|
596
|
+
return df
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
class MetaColumnStatisticsTable(Table):
|
|
600
|
+
name = "META_COLUMN_STATISTICS"
|
|
601
|
+
columns = [
|
|
602
|
+
"TABLE_SCHEMA",
|
|
603
|
+
"TABLE_NAME",
|
|
604
|
+
"COLUMN_NAME",
|
|
605
|
+
"MOST_COMMON_VALS",
|
|
606
|
+
"MOST_COMMON_FREQS",
|
|
607
|
+
"NULL_FRAC",
|
|
608
|
+
"N_DISTINCT",
|
|
609
|
+
"MIN_VALUE",
|
|
610
|
+
"MAX_VALUE",
|
|
611
|
+
]
|
|
612
|
+
|
|
613
|
+
@classmethod
|
|
614
|
+
def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
|
|
615
|
+
databases, tables = _get_scope(query)
|
|
616
|
+
|
|
617
|
+
records = _get_records_from_data_catalog(databases, tables)
|
|
618
|
+
|
|
619
|
+
data = []
|
|
620
|
+
for record in records:
|
|
621
|
+
database_name = record.integration.name
|
|
622
|
+
table_name = record.name
|
|
623
|
+
columns = record.meta_columns
|
|
624
|
+
|
|
625
|
+
for column in columns:
|
|
626
|
+
column_statistics = column.meta_column_statistics[0]
|
|
627
|
+
|
|
628
|
+
item = {
|
|
629
|
+
"TABLE_SCHEMA": database_name,
|
|
630
|
+
"TABLE_NAME": table_name,
|
|
631
|
+
"COLUMN_NAME": column.name,
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
if column_statistics:
|
|
635
|
+
item.update(
|
|
636
|
+
{
|
|
637
|
+
"MOST_COMMON_VALS": column_statistics.most_common_values,
|
|
638
|
+
"MOST_COMMON_FREQS": column_statistics.most_common_frequencies,
|
|
639
|
+
"NULL_FRAC": column_statistics.null_percentage,
|
|
640
|
+
"N_DISTINCT": column_statistics.distinct_values_count,
|
|
641
|
+
"MIN_VALUE": column_statistics.minimum_value,
|
|
642
|
+
"MAX_VALUE": column_statistics.maximum_value,
|
|
643
|
+
}
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
data.append(item)
|
|
647
|
+
|
|
648
|
+
df = pd.DataFrame(data, columns=cls.columns)
|
|
649
|
+
return df
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
class MetaTableConstraintsTable(Table):
|
|
653
|
+
name = "META_TABLE_CONSTRAINTS"
|
|
654
|
+
columns = [
|
|
655
|
+
"CONSTRAINT_CATALOG",
|
|
656
|
+
"CONSTRAINT_SCHEMA",
|
|
657
|
+
"CONSTRAINT_NAME",
|
|
658
|
+
"TABLE_SCHEMA",
|
|
659
|
+
"TABLE_NAME",
|
|
660
|
+
"CONSTRAINT_TYPE",
|
|
661
|
+
"ENFORCED",
|
|
662
|
+
]
|
|
663
|
+
|
|
664
|
+
@classmethod
|
|
665
|
+
def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
|
|
666
|
+
databases, tables = _get_scope(query)
|
|
667
|
+
|
|
668
|
+
records = _get_records_from_data_catalog(databases, tables)
|
|
669
|
+
|
|
670
|
+
data = []
|
|
671
|
+
for record in records:
|
|
672
|
+
database_name = record.integration.name
|
|
673
|
+
table_name = record.name
|
|
674
|
+
primary_keys = record.meta_primary_keys
|
|
675
|
+
foreign_keys_children = record.meta_foreign_keys_children
|
|
676
|
+
foreign_keys_parents = record.meta_foreign_keys_parents
|
|
677
|
+
|
|
678
|
+
for pk in primary_keys:
|
|
679
|
+
item = {
|
|
680
|
+
"CONSTRAINT_CATALOG": "def",
|
|
681
|
+
"CONSTRAINT_SCHEMA": database_name,
|
|
682
|
+
"CONSTRAINT_NAME": pk.constraint_name,
|
|
683
|
+
"TABLE_SCHEMA": database_name,
|
|
684
|
+
"TABLE_NAME": table_name,
|
|
685
|
+
"CONSTRAINT_TYPE": "PRIMARY KEY",
|
|
686
|
+
}
|
|
687
|
+
data.append(item)
|
|
688
|
+
|
|
689
|
+
for fk in foreign_keys_children:
|
|
690
|
+
item = {
|
|
691
|
+
"CONSTRAINT_CATALOG": "def",
|
|
692
|
+
"CONSTRAINT_SCHEMA": database_name,
|
|
693
|
+
"CONSTRAINT_NAME": fk.constraint_name,
|
|
694
|
+
"TABLE_SCHEMA": database_name,
|
|
695
|
+
"TABLE_NAME": table_name,
|
|
696
|
+
"CONSTRAINT_TYPE": "FOREIGN KEY",
|
|
697
|
+
}
|
|
698
|
+
data.append(item)
|
|
699
|
+
|
|
700
|
+
for fk in foreign_keys_parents:
|
|
701
|
+
item = {
|
|
702
|
+
"CONSTRAINT_CATALOG": "def",
|
|
703
|
+
"CONSTRAINT_SCHEMA": database_name,
|
|
704
|
+
"CONSTRAINT_NAME": fk.constraint_name,
|
|
705
|
+
"TABLE_SCHEMA": database_name,
|
|
706
|
+
"TABLE_NAME": table_name,
|
|
707
|
+
"CONSTRAINT_TYPE": "FOREIGN KEY",
|
|
708
|
+
}
|
|
709
|
+
data.append(item)
|
|
710
|
+
|
|
711
|
+
df = pd.DataFrame(data, columns=cls.columns)
|
|
712
|
+
return df
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
class MetaColumnUsageTable(Table):
|
|
716
|
+
name = "META_KEY_COLUMN_USAGE"
|
|
717
|
+
columns = [
|
|
718
|
+
"CONSTRAINT_CATALOG",
|
|
719
|
+
"CONSTRAINT_SCHEMA",
|
|
720
|
+
"CONSTRAINT_NAME",
|
|
721
|
+
"TABLE_CATALOG",
|
|
722
|
+
"TABLE_SCHEMA",
|
|
723
|
+
"TABLE_NAME",
|
|
724
|
+
"COLUMN_NAME",
|
|
725
|
+
"ORDINAL_POSITION",
|
|
726
|
+
"POSITION_IN_UNIQUE_CONSTRAINT",
|
|
727
|
+
"REFERENCED_TABLE_SCHEMA",
|
|
728
|
+
"REFERENCED_TABLE_NAME",
|
|
729
|
+
"REFERENCED_COLUMN_NAME",
|
|
730
|
+
]
|
|
731
|
+
|
|
732
|
+
@classmethod
|
|
733
|
+
def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
|
|
734
|
+
databases, tables = _get_scope(query)
|
|
735
|
+
|
|
736
|
+
records = _get_records_from_data_catalog(databases, tables)
|
|
737
|
+
|
|
738
|
+
data = []
|
|
739
|
+
for record in records:
|
|
740
|
+
database_name = record.integration.name
|
|
741
|
+
table_name = record.name
|
|
742
|
+
primary_keys = record.meta_primary_keys
|
|
743
|
+
foreign_keys_children = record.meta_foreign_keys_children
|
|
744
|
+
foreign_keys_parents = record.meta_foreign_keys_parents
|
|
745
|
+
|
|
746
|
+
for pk in primary_keys:
|
|
747
|
+
column = pk.meta_columns
|
|
748
|
+
|
|
749
|
+
item = {
|
|
750
|
+
"CONSTRAINT_CATALOG": "def",
|
|
751
|
+
"CONSTRAINT_SCHEMA": database_name,
|
|
752
|
+
"CONSTRAINT_NAME": pk.constraint_name,
|
|
753
|
+
"TABLE_CATALOG": "def",
|
|
754
|
+
"TABLE_SCHEMA": database_name,
|
|
755
|
+
"TABLE_NAME": table_name,
|
|
756
|
+
"COLUMN_NAME": column.name,
|
|
757
|
+
"ORDINAL_POSITION": pk.ordinal_position,
|
|
758
|
+
"POSITION_IN_UNIQUE_CONSTRAINT": None,
|
|
759
|
+
"REFERENCED_TABLE_SCHEMA": None,
|
|
760
|
+
"REFERENCED_TABLE_NAME": None,
|
|
761
|
+
"REFERENCED_COLUMN_NAME": None,
|
|
762
|
+
}
|
|
763
|
+
data.append(item)
|
|
764
|
+
|
|
765
|
+
for fk in foreign_keys_children:
|
|
766
|
+
item = {
|
|
767
|
+
"CONSTRAINT_CATALOG": "def",
|
|
768
|
+
"CONSTRAINT_SCHEMA": database_name,
|
|
769
|
+
"CONSTRAINT_NAME": fk.constraint_name,
|
|
770
|
+
"TABLE_CATALOG": "def",
|
|
771
|
+
"TABLE_SCHEMA": database_name,
|
|
772
|
+
"TABLE_NAME": table_name,
|
|
773
|
+
"COLUMN_NAME": fk.child_column.name,
|
|
774
|
+
"ORDINAL_POSITION": None,
|
|
775
|
+
"POSITION_IN_UNIQUE_CONSTRAINT": None,
|
|
776
|
+
"REFERENCED_TABLE_SCHEMA": fk.parent_table.integration.name if fk.parent_table else None,
|
|
777
|
+
"REFERENCED_TABLE_NAME": fk.parent_table.name if fk.parent_table else None,
|
|
778
|
+
"REFERENCED_COLUMN_NAME": fk.parent_column.name if fk.parent_column else None,
|
|
779
|
+
}
|
|
780
|
+
data.append(item)
|
|
781
|
+
|
|
782
|
+
for fk in foreign_keys_parents:
|
|
783
|
+
item = {
|
|
784
|
+
"CONSTRAINT_CATALOG": "def",
|
|
785
|
+
"CONSTRAINT_SCHEMA": database_name,
|
|
786
|
+
"CONSTRAINT_NAME": fk.constraint_name,
|
|
787
|
+
"TABLE_CATALOG": "def",
|
|
788
|
+
"TABLE_SCHEMA": database_name,
|
|
789
|
+
"TABLE_NAME": table_name,
|
|
790
|
+
"COLUMN_NAME": fk.child_column.name,
|
|
791
|
+
"ORDINAL_POSITION": None,
|
|
792
|
+
"POSITION_IN_UNIQUE_CONSTRAINT": None,
|
|
793
|
+
"REFERENCED_TABLE_SCHEMA": fk.child_table.integration.name if fk.child_table else None,
|
|
794
|
+
"REFERENCED_TABLE_NAME": fk.child_table.name if fk.child_table else None,
|
|
795
|
+
"REFERENCED_COLUMN_NAME": fk.parent_column.name if fk.child_column else None,
|
|
796
|
+
}
|
|
797
|
+
data.append(item)
|
|
798
|
+
|
|
799
|
+
df = pd.DataFrame(data, columns=cls.columns)
|
|
800
|
+
return df
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
class MetaHandlerInfoTable(Table):
|
|
804
|
+
name = "META_HANDLER_INFO"
|
|
805
|
+
columns = ["HANDLER_INFO", "TABLE_SCHEMA"]
|
|
806
|
+
|
|
807
|
+
@classmethod
|
|
808
|
+
def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
|
|
809
|
+
databases, tables = _get_scope(query)
|
|
810
|
+
|
|
811
|
+
data = []
|
|
812
|
+
for database in databases:
|
|
813
|
+
data_catalog_reader = DataCatalogReader(database_name=database, table_names=tables)
|
|
814
|
+
handler_info = data_catalog_reader.get_handler_info()
|
|
815
|
+
data.append({"HANDLER_INFO": str(handler_info), "TABLE_SCHEMA": database})
|
|
816
|
+
|
|
817
|
+
df = pd.DataFrame(data, columns=cls.columns)
|
|
818
|
+
return df
|
|
@@ -158,7 +158,7 @@ class PlanJoinTablesQuery:
|
|
|
158
158
|
integration = self.planner.default_namespace
|
|
159
159
|
|
|
160
160
|
if integration is None and not hasattr(table, "sub_select"):
|
|
161
|
-
raise PlanningException(f"
|
|
161
|
+
raise PlanningException(f"Database not found for: {table}")
|
|
162
162
|
|
|
163
163
|
sub_select = getattr(table, "sub_select", None)
|
|
164
164
|
|
|
@@ -255,12 +255,18 @@ class QueryPlanner:
|
|
|
255
255
|
|
|
256
256
|
database = self.default_namespace
|
|
257
257
|
|
|
258
|
+
err_msg_suffix = ""
|
|
258
259
|
if len(parts) > 1:
|
|
259
260
|
if parts[0].lower() in self.databases:
|
|
260
261
|
database = parts.pop(0).lower()
|
|
262
|
+
else:
|
|
263
|
+
err_msg_suffix = f"'{parts[0].lower()}' is not valid database name."
|
|
261
264
|
|
|
262
265
|
if database is None:
|
|
263
|
-
raise PlanningException(
|
|
266
|
+
raise PlanningException(
|
|
267
|
+
f"Invalid or missing database name for identifier '{node}'. {err_msg_suffix}\n"
|
|
268
|
+
"Query must include a valid database name prefix in format: 'database_name.table_name' or 'database_name.schema_name.table_name'"
|
|
269
|
+
)
|
|
264
270
|
|
|
265
271
|
return database, Identifier(parts=parts, alias=alias)
|
|
266
272
|
|
|
@@ -203,6 +203,8 @@ def create_table_class(resource_name: Text) -> MetaAPIResource:
|
|
|
203
203
|
"column_name": field["name"],
|
|
204
204
|
"data_type": field["type"],
|
|
205
205
|
"is_nullable": field.get("nillable", False),
|
|
206
|
+
"default_value": field.get("defaultValue", ""),
|
|
207
|
+
"description": field.get("inlineHelpText", ""),
|
|
206
208
|
}
|
|
207
209
|
)
|
|
208
210
|
|
|
@@ -433,16 +433,15 @@ class APIHandler(BaseHandler):
|
|
|
433
433
|
Args:
|
|
434
434
|
name (str): the handler name
|
|
435
435
|
"""
|
|
436
|
-
|
|
437
436
|
self._tables = {}
|
|
438
437
|
|
|
439
438
|
def _register_table(self, table_name: str, table_class: Any):
|
|
440
439
|
"""
|
|
441
440
|
Register the data resource. For e.g if you are using Twitter API it registers the `tweets` resource from `/api/v2/tweets`.
|
|
442
441
|
"""
|
|
443
|
-
if table_name in self._tables:
|
|
442
|
+
if table_name.lower() in self._tables:
|
|
444
443
|
raise TableAlreadyExists(f"Table with name {table_name} already exists for this handler")
|
|
445
|
-
self._tables[table_name] = table_class
|
|
444
|
+
self._tables[table_name.lower()] = table_class
|
|
446
445
|
|
|
447
446
|
def _get_table(self, name: Identifier):
|
|
448
447
|
"""
|
|
@@ -450,10 +449,10 @@ class APIHandler(BaseHandler):
|
|
|
450
449
|
Args:
|
|
451
450
|
name (Identifier): the table name
|
|
452
451
|
"""
|
|
453
|
-
name = name.parts[-1]
|
|
454
|
-
if name
|
|
455
|
-
|
|
456
|
-
|
|
452
|
+
name = name.parts[-1].lower()
|
|
453
|
+
if name in self._tables:
|
|
454
|
+
return self._tables[name]
|
|
455
|
+
raise TableNotFound(f"Table not found: {name}")
|
|
457
456
|
|
|
458
457
|
def query(self, query: ASTNode):
|
|
459
458
|
if isinstance(query, Select):
|