MindsDB 25.5.4.2__py3-none-any.whl → 25.6.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +50 -26
- mindsdb/api/a2a/common/server/server.py +32 -26
- mindsdb/api/a2a/task_manager.py +68 -6
- mindsdb/api/executor/command_executor.py +69 -14
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
- mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
- mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
- mindsdb/api/executor/planner/plan_join.py +67 -77
- mindsdb/api/executor/planner/query_planner.py +176 -155
- mindsdb/api/executor/planner/steps.py +37 -12
- mindsdb/api/executor/sql_query/result_set.py +45 -64
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
- mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
- mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
- mindsdb/api/executor/utilities/sql.py +42 -48
- mindsdb/api/http/namespaces/config.py +1 -1
- mindsdb/api/http/namespaces/file.py +14 -23
- mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
- mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +29 -33
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +145 -40
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +352 -83
- mindsdb/integrations/libs/api_handler.py +279 -57
- mindsdb/integrations/libs/base.py +185 -30
- mindsdb/integrations/utilities/files/file_reader.py +99 -73
- mindsdb/integrations/utilities/handler_utils.py +23 -8
- mindsdb/integrations/utilities/sql_utils.py +35 -40
- mindsdb/interfaces/agents/agents_controller.py +226 -196
- mindsdb/interfaces/agents/constants.py +8 -1
- mindsdb/interfaces/agents/langchain_agent.py +42 -11
- mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
- mindsdb/interfaces/agents/mindsdb_database_agent.py +23 -18
- mindsdb/interfaces/data_catalog/__init__.py +0 -0
- mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +375 -0
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +38 -0
- mindsdb/interfaces/database/database.py +81 -57
- mindsdb/interfaces/database/integrations.py +222 -234
- mindsdb/interfaces/database/log.py +72 -104
- mindsdb/interfaces/database/projects.py +156 -193
- mindsdb/interfaces/file/file_controller.py +21 -65
- mindsdb/interfaces/knowledge_base/controller.py +66 -25
- mindsdb/interfaces/knowledge_base/evaluate.py +516 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
- mindsdb/interfaces/skills/skills_controller.py +31 -36
- mindsdb/interfaces/skills/sql_agent.py +113 -86
- mindsdb/interfaces/storage/db.py +242 -82
- mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
- mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
- mindsdb/utilities/config.py +13 -2
- mindsdb/utilities/log.py +35 -26
- mindsdb/utilities/ml_task_queue/task.py +19 -22
- mindsdb/utilities/render/sqlalchemy_render.py +129 -181
- mindsdb/utilities/starters.py +40 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/METADATA +257 -257
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/RECORD +76 -68
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
from typing import List, Union
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from mindsdb.integrations.libs.response import RESPONSE_TYPE
|
|
6
|
+
from mindsdb.interfaces.data_catalog.base_data_catalog import BaseDataCatalog
|
|
7
|
+
from mindsdb.interfaces.storage import db
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DataCatalogLoader(BaseDataCatalog):
|
|
11
|
+
"""
|
|
12
|
+
This class is responsible for loading the metadata from a data source (via the handler) and storing it in the data catalog.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def load_metadata(self) -> None:
|
|
16
|
+
"""
|
|
17
|
+
Load the metadata from the handler and store it in the database.
|
|
18
|
+
"""
|
|
19
|
+
if not self.is_data_catalog_supported():
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
loaded_table_names = self._get_loaded_table_names()
|
|
23
|
+
|
|
24
|
+
tables = self._load_table_metadata(loaded_table_names)
|
|
25
|
+
|
|
26
|
+
if tables:
|
|
27
|
+
columns = self._load_column_metadata(tables)
|
|
28
|
+
|
|
29
|
+
self._load_column_statistics(tables, columns)
|
|
30
|
+
|
|
31
|
+
self._load_primary_keys(tables, columns)
|
|
32
|
+
|
|
33
|
+
self._load_foreign_keys(tables, columns)
|
|
34
|
+
|
|
35
|
+
self.logger.info(f"Metadata loading completed for {self.database_name}.")
|
|
36
|
+
|
|
37
|
+
def _get_loaded_table_names(self) -> List[str]:
|
|
38
|
+
"""
|
|
39
|
+
Retrieve the names of tables that are already present in the data catalog for the current integration.
|
|
40
|
+
If table_names are provided, only those tables will be checked.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List[str]: Names of tables already loaded in the data catalog.
|
|
44
|
+
"""
|
|
45
|
+
query = db.session.query(db.MetaTables).filter_by(integration_id=self.integration_id)
|
|
46
|
+
if self.table_names:
|
|
47
|
+
query = query.filter(db.MetaTables.name.in_(self.table_names))
|
|
48
|
+
|
|
49
|
+
tables = query.all()
|
|
50
|
+
table_names = [table.name for table in tables]
|
|
51
|
+
|
|
52
|
+
if table_names:
|
|
53
|
+
self.logger.info(f"Tables already loaded in the data catalog: {', '.join(table_names)}.")
|
|
54
|
+
|
|
55
|
+
return table_names
|
|
56
|
+
|
|
57
|
+
def _load_table_metadata(self, loaded_table_names: List[str] = None) -> List[Union[db.MetaTables, None]]:
|
|
58
|
+
"""
|
|
59
|
+
Load the table metadata from the handler.
|
|
60
|
+
"""
|
|
61
|
+
self.logger.info(f"Loading tables for {self.database_name}")
|
|
62
|
+
response = self.data_handler.meta_get_tables(self.table_names)
|
|
63
|
+
if response.resp_type == RESPONSE_TYPE.ERROR:
|
|
64
|
+
self.logger.error(f"Failed to load tables for {self.database_name}: {response.error_message}")
|
|
65
|
+
return []
|
|
66
|
+
elif response.resp_type == RESPONSE_TYPE.OK:
|
|
67
|
+
self.logger.error(f"No tables found for {self.database_name}.")
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
df = response.data_frame
|
|
71
|
+
if df.empty:
|
|
72
|
+
self.logger.info(f"No tables to add for {self.database_name}.")
|
|
73
|
+
return []
|
|
74
|
+
|
|
75
|
+
df.columns = df.columns.str.lower()
|
|
76
|
+
|
|
77
|
+
# Filter out tables that are already loaded in the data catalog
|
|
78
|
+
if loaded_table_names:
|
|
79
|
+
df = df[~df["table_name"].isin(loaded_table_names)]
|
|
80
|
+
|
|
81
|
+
if df.empty:
|
|
82
|
+
self.logger.info(f"No new tables to load for {self.database_name}.")
|
|
83
|
+
return []
|
|
84
|
+
|
|
85
|
+
tables = self._add_table_metadata(df)
|
|
86
|
+
self.logger.info(f"Tables loaded for {self.database_name}.")
|
|
87
|
+
return tables
|
|
88
|
+
|
|
89
|
+
def _add_table_metadata(self, df: pd.DataFrame) -> List[db.MetaTables]:
|
|
90
|
+
"""
|
|
91
|
+
Add the table metadata to the database.
|
|
92
|
+
"""
|
|
93
|
+
tables = []
|
|
94
|
+
try:
|
|
95
|
+
for row in df.to_dict(orient="records"):
|
|
96
|
+
# Convert the distinct_values_count to an integer if it is not NaN, otherwise set it to None.
|
|
97
|
+
val = row.get("row_count")
|
|
98
|
+
row_count = int(val) if pd.notna(val) else None
|
|
99
|
+
|
|
100
|
+
record = db.MetaTables(
|
|
101
|
+
integration_id=self.integration_id,
|
|
102
|
+
name=row.get("table_name") or row.get("name"),
|
|
103
|
+
schema=row.get("table_schema"),
|
|
104
|
+
description=row.get("table_description"),
|
|
105
|
+
type=row.get("table_type"),
|
|
106
|
+
row_count=row_count,
|
|
107
|
+
)
|
|
108
|
+
tables.append(record)
|
|
109
|
+
|
|
110
|
+
db.session.add_all(tables)
|
|
111
|
+
db.session.commit()
|
|
112
|
+
except Exception as e:
|
|
113
|
+
self.logger.error(f"Failed to add tables: {e}")
|
|
114
|
+
db.session.rollback()
|
|
115
|
+
raise
|
|
116
|
+
return tables
|
|
117
|
+
|
|
118
|
+
def _load_column_metadata(self, tables: db.MetaTables) -> List[db.MetaColumns]:
|
|
119
|
+
"""
|
|
120
|
+
Load the column metadata from the handler.
|
|
121
|
+
"""
|
|
122
|
+
self.logger.info(f"Loading columns for {self.database_name}")
|
|
123
|
+
response = self.data_handler.meta_get_columns(self.table_names)
|
|
124
|
+
if response.resp_type == RESPONSE_TYPE.ERROR:
|
|
125
|
+
self.logger.error(f"Failed to load columns for {self.database_name}: {response.error_message}")
|
|
126
|
+
return []
|
|
127
|
+
elif response.resp_type == RESPONSE_TYPE.OK:
|
|
128
|
+
self.logger.error(f"No columns found for {self.database_name}.")
|
|
129
|
+
return []
|
|
130
|
+
|
|
131
|
+
df = response.data_frame
|
|
132
|
+
if df.empty:
|
|
133
|
+
self.logger.info(f"No columns to load for {self.database_name}.")
|
|
134
|
+
return []
|
|
135
|
+
|
|
136
|
+
df.columns = df.columns.str.lower()
|
|
137
|
+
columns = self._add_column_metadata(df, tables)
|
|
138
|
+
self.logger.info(f"Columns loaded for {self.database_name}.")
|
|
139
|
+
return columns
|
|
140
|
+
|
|
141
|
+
def _add_column_metadata(self, df: pd.DataFrame, tables: db.MetaTables) -> List[db.MetaColumns]:
|
|
142
|
+
"""
|
|
143
|
+
Add the column metadata to the database.
|
|
144
|
+
"""
|
|
145
|
+
columns = []
|
|
146
|
+
try:
|
|
147
|
+
for row in df.to_dict(orient="records"):
|
|
148
|
+
record = db.MetaColumns(
|
|
149
|
+
table_id=next((table.id for table in tables if table.name == row.get("table_name"))),
|
|
150
|
+
name=row.get("column_name"),
|
|
151
|
+
data_type=row.get("data_type"),
|
|
152
|
+
default_value=row.get("column_default"),
|
|
153
|
+
description=row.get("description"),
|
|
154
|
+
is_nullable=row.get("is_nullable"),
|
|
155
|
+
)
|
|
156
|
+
columns.append(record)
|
|
157
|
+
|
|
158
|
+
db.session.add_all(columns)
|
|
159
|
+
db.session.commit()
|
|
160
|
+
except Exception as e:
|
|
161
|
+
self.logger.error(f"Failed to add columns: {e}")
|
|
162
|
+
db.session.rollback()
|
|
163
|
+
raise
|
|
164
|
+
return columns
|
|
165
|
+
|
|
166
|
+
def _load_column_statistics(self, tables: db.MetaTables, columns: db.MetaColumns) -> None:
|
|
167
|
+
"""
|
|
168
|
+
Load the column statistics metadata from the handler.
|
|
169
|
+
"""
|
|
170
|
+
self.logger.info(f"Loading column statistics for {self.database_name}")
|
|
171
|
+
response = self.data_handler.meta_get_column_statistics(self.table_names)
|
|
172
|
+
if response.resp_type == RESPONSE_TYPE.ERROR:
|
|
173
|
+
self.logger.error(f"Failed to load column statistics for {self.database_name}: {response.error_message}")
|
|
174
|
+
return
|
|
175
|
+
elif response.resp_type == RESPONSE_TYPE.OK:
|
|
176
|
+
self.logger.error(f"No column statistics found for {self.database_name}.")
|
|
177
|
+
return
|
|
178
|
+
|
|
179
|
+
df = response.data_frame
|
|
180
|
+
if df.empty:
|
|
181
|
+
self.logger.info(f"No column statistics to load for {self.database_name}.")
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
df.columns = df.columns.str.lower()
|
|
185
|
+
self._add_column_statistics(df, tables, columns)
|
|
186
|
+
self.logger.info(f"Column statistics loaded for {self.database_name}.")
|
|
187
|
+
|
|
188
|
+
def _add_column_statistics(self, df: pd.DataFrame, tables: db.MetaTables, columns: db.MetaColumns) -> None:
|
|
189
|
+
"""
|
|
190
|
+
Add the column statistics metadata to the database.
|
|
191
|
+
"""
|
|
192
|
+
column_statistics = []
|
|
193
|
+
try:
|
|
194
|
+
for row in df.to_dict(orient="records"):
|
|
195
|
+
table_id = next((table.id for table in tables if table.name == row.get("table_name")))
|
|
196
|
+
column_id = next(
|
|
197
|
+
(
|
|
198
|
+
column.id
|
|
199
|
+
for column in columns
|
|
200
|
+
if column.name == row.get("column_name") and column.table_id == table_id
|
|
201
|
+
)
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Convert the distinct_values_count to an integer if it is not NaN, otherwise set it to None.
|
|
205
|
+
val = row.get("distinct_values_count")
|
|
206
|
+
distinct_values_count = int(val) if pd.notna(val) else None
|
|
207
|
+
|
|
208
|
+
# Convert the most_common_frequencies to a list of strings.
|
|
209
|
+
most_common_frequencies = [str(val) for val in row.get("most_common_frequencies") or []]
|
|
210
|
+
|
|
211
|
+
record = db.MetaColumnStatistics(
|
|
212
|
+
column_id=column_id,
|
|
213
|
+
most_common_values=row.get("most_common_values"),
|
|
214
|
+
most_common_frequencies=most_common_frequencies,
|
|
215
|
+
null_percentage=row.get("null_percentage"),
|
|
216
|
+
distinct_values_count=distinct_values_count,
|
|
217
|
+
minimum_value=row.get("minimum_value"),
|
|
218
|
+
maximum_value=row.get("maximum_value"),
|
|
219
|
+
)
|
|
220
|
+
column_statistics.append(record)
|
|
221
|
+
|
|
222
|
+
db.session.add_all(column_statistics)
|
|
223
|
+
db.session.commit()
|
|
224
|
+
except Exception as e:
|
|
225
|
+
self.logger.error(f"Failed to add column statistics: {e}")
|
|
226
|
+
db.session.rollback()
|
|
227
|
+
raise
|
|
228
|
+
|
|
229
|
+
def _load_primary_keys(self, tables: db.MetaTables, columns: db.MetaColumns) -> None:
|
|
230
|
+
"""
|
|
231
|
+
Load the primary keys metadata from the handler.
|
|
232
|
+
"""
|
|
233
|
+
self.logger.info(f"Loading primary keys for {self.database_name}")
|
|
234
|
+
response = self.data_handler.meta_get_primary_keys(self.table_names)
|
|
235
|
+
if response.resp_type == RESPONSE_TYPE.ERROR:
|
|
236
|
+
self.logger.error(f"Failed to load primary keys for {self.database_name}: {response.error_message}")
|
|
237
|
+
return
|
|
238
|
+
elif response.resp_type == RESPONSE_TYPE.OK:
|
|
239
|
+
self.logger.error(f"No primary keys found for {self.database_name}.")
|
|
240
|
+
return
|
|
241
|
+
|
|
242
|
+
df = response.data_frame
|
|
243
|
+
if df.empty:
|
|
244
|
+
self.logger.info(f"No primary keys to load for {self.database_name}.")
|
|
245
|
+
return
|
|
246
|
+
|
|
247
|
+
df.columns = df.columns.str.lower()
|
|
248
|
+
self._add_primary_keys(df, tables, columns)
|
|
249
|
+
self.logger.info(f"Primary keys loaded for {self.database_name}.")
|
|
250
|
+
|
|
251
|
+
def _add_primary_keys(self, df: pd.DataFrame, tables: db.MetaTables, columns: db.MetaColumns) -> None:
|
|
252
|
+
"""
|
|
253
|
+
Add the primary keys metadata to the database.
|
|
254
|
+
"""
|
|
255
|
+
primary_keys = []
|
|
256
|
+
try:
|
|
257
|
+
for row in df.to_dict(orient="records"):
|
|
258
|
+
table_id = next((table.id for table in tables if table.name == row.get("table_name")))
|
|
259
|
+
column_id = next(
|
|
260
|
+
(
|
|
261
|
+
column.id
|
|
262
|
+
for column in columns
|
|
263
|
+
if column.name == row.get("column_name") and column.table_id == table_id
|
|
264
|
+
)
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
record = db.MetaPrimaryKeys(
|
|
268
|
+
table_id=table_id,
|
|
269
|
+
column_id=column_id,
|
|
270
|
+
constraint_name=row.get("constraint_name"),
|
|
271
|
+
)
|
|
272
|
+
primary_keys.append(record)
|
|
273
|
+
|
|
274
|
+
db.session.add_all(primary_keys)
|
|
275
|
+
db.session.commit()
|
|
276
|
+
except Exception as e:
|
|
277
|
+
self.logger.error(f"Failed to add primary keys: {e}")
|
|
278
|
+
db.session.rollback()
|
|
279
|
+
raise
|
|
280
|
+
|
|
281
|
+
def _load_foreign_keys(self, tables: db.MetaTables, columns: db.MetaColumns) -> None:
|
|
282
|
+
"""
|
|
283
|
+
Load the foreign keys metadata from the handler.
|
|
284
|
+
"""
|
|
285
|
+
self.logger.info(f"Loading foreign keys for {self.database_name}")
|
|
286
|
+
response = self.data_handler.meta_get_foreign_keys(self.table_names)
|
|
287
|
+
if response.resp_type == RESPONSE_TYPE.ERROR:
|
|
288
|
+
self.logger.error(f"Failed to foreign keys for {self.database_name}: {response.error_message}")
|
|
289
|
+
return
|
|
290
|
+
elif response.resp_type == RESPONSE_TYPE.OK:
|
|
291
|
+
self.logger.error(f"No foreign keys found for {self.database_name}.")
|
|
292
|
+
return
|
|
293
|
+
|
|
294
|
+
df = response.data_frame
|
|
295
|
+
if df.empty:
|
|
296
|
+
self.logger.info(f"No foreign keys to load for {self.database_name}.")
|
|
297
|
+
return
|
|
298
|
+
|
|
299
|
+
df.columns = df.columns.str.lower()
|
|
300
|
+
self._add_foreign_keys(df, tables, columns)
|
|
301
|
+
self.logger.info(f"Foreign keys loaded for {self.database_name}.")
|
|
302
|
+
|
|
303
|
+
def _add_foreign_keys(self, df: pd.DataFrame, tables: db.MetaTables, columns: db.MetaColumns) -> None:
|
|
304
|
+
"""
|
|
305
|
+
Add the foreign keys metadata to the database.
|
|
306
|
+
"""
|
|
307
|
+
foreign_keys = []
|
|
308
|
+
try:
|
|
309
|
+
for row in df.to_dict(orient="records"):
|
|
310
|
+
try:
|
|
311
|
+
parent_table_id = next((table.id for table in tables if table.name == row.get("parent_table_name")))
|
|
312
|
+
parent_column_id = next(
|
|
313
|
+
(
|
|
314
|
+
column.id
|
|
315
|
+
for column in columns
|
|
316
|
+
if column.name == row.get("parent_column_name") and column.table_id == parent_table_id
|
|
317
|
+
)
|
|
318
|
+
)
|
|
319
|
+
child_table_id = next((table.id for table in tables if table.name == row.get("child_table_name")))
|
|
320
|
+
child_column_id = next(
|
|
321
|
+
(
|
|
322
|
+
column.id
|
|
323
|
+
for column in columns
|
|
324
|
+
if column.name == row.get("child_column_name") and column.table_id == child_table_id
|
|
325
|
+
)
|
|
326
|
+
)
|
|
327
|
+
except StopIteration:
|
|
328
|
+
self.logger.warning(
|
|
329
|
+
f"The foreign key relationship for {row.get('parent_table_name')} -> {row.get('child_table_name')} "
|
|
330
|
+
f"could not be established. One or more tables or columns may not exist in the metadata."
|
|
331
|
+
)
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
record = db.MetaForeignKeys(
|
|
335
|
+
parent_table_id=parent_table_id,
|
|
336
|
+
parent_column_id=parent_column_id,
|
|
337
|
+
child_table_id=child_table_id,
|
|
338
|
+
child_column_id=child_column_id,
|
|
339
|
+
constraint_name=row.get("constraint_name"),
|
|
340
|
+
)
|
|
341
|
+
foreign_keys.append(record)
|
|
342
|
+
|
|
343
|
+
db.session.add_all(foreign_keys)
|
|
344
|
+
db.session.commit()
|
|
345
|
+
except Exception as e:
|
|
346
|
+
self.logger.error(f"Failed to add foreign keys: {e}")
|
|
347
|
+
db.session.rollback()
|
|
348
|
+
raise
|
|
349
|
+
|
|
350
|
+
def unload_metadata(self) -> None:
|
|
351
|
+
"""
|
|
352
|
+
Remove the metadata for the specified database from the data catalog.
|
|
353
|
+
"""
|
|
354
|
+
if not self.is_data_catalog_supported():
|
|
355
|
+
return
|
|
356
|
+
|
|
357
|
+
meta_tables = db.session.query(db.MetaTables).filter_by(integration_id=self.integration_id).all()
|
|
358
|
+
|
|
359
|
+
if not meta_tables:
|
|
360
|
+
self.logger.info(f"No metadata found for {self.database_name}. Nothing to remove.")
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
for table in meta_tables:
|
|
364
|
+
db.session.query(db.MetaPrimaryKeys).filter_by(table_id=table.id).delete()
|
|
365
|
+
db.session.query(db.MetaForeignKeys).filter(
|
|
366
|
+
(db.MetaForeignKeys.parent_table_id == table.id) | (db.MetaForeignKeys.child_table_id == table.id)
|
|
367
|
+
).delete()
|
|
368
|
+
meta_columns = db.session.query(db.MetaColumns).filter_by(table_id=table.id).all()
|
|
369
|
+
for col in meta_columns:
|
|
370
|
+
db.session.query(db.MetaColumnStatistics).filter_by(column_id=col.id).delete()
|
|
371
|
+
db.session.delete(col)
|
|
372
|
+
|
|
373
|
+
db.session.delete(table)
|
|
374
|
+
db.session.commit()
|
|
375
|
+
self.logger.info(f"Metadata for {self.database_name} removed successfully.")
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from mindsdb.interfaces.data_catalog.base_data_catalog import BaseDataCatalog
|
|
2
|
+
from mindsdb.interfaces.storage import db
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DataCatalogReader(BaseDataCatalog):
|
|
6
|
+
"""
|
|
7
|
+
This class is responsible for reading the metadata from the data catalog and providing it in a structured format.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def read_metadata_as_string(self) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Read the metadata from the data catalog and return it as a string.
|
|
13
|
+
"""
|
|
14
|
+
if not self.is_data_catalog_supported():
|
|
15
|
+
return f"Data catalog is not supported for database '{self.database_name}'."
|
|
16
|
+
tables = self._read_metadata()
|
|
17
|
+
if not tables:
|
|
18
|
+
self.logger.warning(f"No metadata found for database '{self.database_name}'")
|
|
19
|
+
return f"No metadata found for database '{self.database_name}'"
|
|
20
|
+
|
|
21
|
+
metadata_str = "Data Catalog: \n"
|
|
22
|
+
if hasattr(self.data_handler, "meta_get_handler_info"):
|
|
23
|
+
metadata_str += self.data_handler.meta_get_handler_info() + "\n\n"
|
|
24
|
+
|
|
25
|
+
for table in tables:
|
|
26
|
+
metadata_str += table.as_string() + "\n\n"
|
|
27
|
+
return metadata_str
|
|
28
|
+
|
|
29
|
+
def _read_metadata(self) -> list:
|
|
30
|
+
"""
|
|
31
|
+
Read the metadata from the data catalog and return it in a structured format.
|
|
32
|
+
"""
|
|
33
|
+
query = db.session.query(db.MetaTables).filter_by(integration_id=self.integration_id)
|
|
34
|
+
if self.table_names:
|
|
35
|
+
cleaned_table_names = [name.strip("`").split(".")[-1] for name in self.table_names]
|
|
36
|
+
query = query.filter(db.MetaTables.name.in_(cleaned_table_names))
|
|
37
|
+
tables = query.all()
|
|
38
|
+
return tables
|
|
@@ -11,6 +11,7 @@ from mindsdb.interfaces.database.log import LogDBController
|
|
|
11
11
|
class DatabaseController:
|
|
12
12
|
def __init__(self):
|
|
13
13
|
from mindsdb.interfaces.database.integrations import integration_controller
|
|
14
|
+
|
|
14
15
|
self.integration_controller = integration_controller
|
|
15
16
|
self.project_controller = ProjectController()
|
|
16
17
|
|
|
@@ -21,13 +22,13 @@ class DatabaseController:
|
|
|
21
22
|
databases = self.get_dict()
|
|
22
23
|
name = name.lower()
|
|
23
24
|
if name not in databases:
|
|
24
|
-
raise EntityNotExistsError(
|
|
25
|
-
db_type = databases[name][
|
|
26
|
-
if db_type ==
|
|
25
|
+
raise EntityNotExistsError("Database does not exists", name)
|
|
26
|
+
db_type = databases[name]["type"]
|
|
27
|
+
if db_type == "project":
|
|
27
28
|
project = self.get_project(name)
|
|
28
29
|
project.delete()
|
|
29
30
|
return
|
|
30
|
-
elif db_type ==
|
|
31
|
+
elif db_type == "data":
|
|
31
32
|
self.integration_controller.delete(name)
|
|
32
33
|
return
|
|
33
34
|
else:
|
|
@@ -37,59 +38,52 @@ class DatabaseController:
|
|
|
37
38
|
def get_list(self, filter_type: Optional[str] = None, with_secrets: Optional[bool] = True):
|
|
38
39
|
projects = self.project_controller.get_list()
|
|
39
40
|
integrations = self.integration_controller.get_all(show_secrets=with_secrets)
|
|
40
|
-
result = [
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
'engine': None,
|
|
52
|
-
'visible': True,
|
|
53
|
-
'deletable': False
|
|
54
|
-
}]
|
|
41
|
+
result = [
|
|
42
|
+
{
|
|
43
|
+
"name": "information_schema",
|
|
44
|
+
"type": "system",
|
|
45
|
+
"id": None,
|
|
46
|
+
"engine": None,
|
|
47
|
+
"visible": True,
|
|
48
|
+
"deletable": False,
|
|
49
|
+
},
|
|
50
|
+
{"name": "log", "type": "system", "id": None, "engine": None, "visible": True, "deletable": False},
|
|
51
|
+
]
|
|
55
52
|
for x in projects:
|
|
56
|
-
result.append(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
53
|
+
result.append(
|
|
54
|
+
{
|
|
55
|
+
"name": x.name,
|
|
56
|
+
"type": "project",
|
|
57
|
+
"id": x.id,
|
|
58
|
+
"engine": None,
|
|
59
|
+
"visible": True,
|
|
60
|
+
"deletable": x.name.lower() != config.get("default_project"),
|
|
61
|
+
}
|
|
62
|
+
)
|
|
64
63
|
for key, value in integrations.items():
|
|
65
|
-
db_type = value.get(
|
|
66
|
-
if db_type !=
|
|
67
|
-
result.append(
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
64
|
+
db_type = value.get("type", "data")
|
|
65
|
+
if db_type != "ml":
|
|
66
|
+
result.append(
|
|
67
|
+
{
|
|
68
|
+
"name": key,
|
|
69
|
+
"type": value.get("type", "data"),
|
|
70
|
+
"id": value.get("id"),
|
|
71
|
+
"engine": value.get("engine"),
|
|
72
|
+
"class_type": value.get("class_type"),
|
|
73
|
+
"connection_data": value.get("connection_data"),
|
|
74
|
+
"visible": True,
|
|
75
|
+
"deletable": value.get("permanent", False) is False,
|
|
76
|
+
}
|
|
77
|
+
)
|
|
77
78
|
|
|
78
79
|
if filter_type is not None:
|
|
79
|
-
result = [x for x in result if x[
|
|
80
|
+
result = [x for x in result if x["type"] == filter_type]
|
|
80
81
|
|
|
81
82
|
return result
|
|
82
83
|
|
|
83
84
|
def get_dict(self, filter_type: Optional[str] = None):
|
|
84
85
|
return OrderedDict(
|
|
85
|
-
(
|
|
86
|
-
x['name'].lower(),
|
|
87
|
-
{
|
|
88
|
-
'type': x['type'],
|
|
89
|
-
'engine': x['engine'],
|
|
90
|
-
'id': x['id']
|
|
91
|
-
}
|
|
92
|
-
)
|
|
86
|
+
(x["name"].lower(), {"type": x["type"], "engine": x["engine"], "id": x["id"]})
|
|
93
87
|
for x in self.get_list(filter_type=filter_type)
|
|
94
88
|
)
|
|
95
89
|
|
|
@@ -98,13 +92,8 @@ class DatabaseController:
|
|
|
98
92
|
|
|
99
93
|
# TODO get directly from db?
|
|
100
94
|
for rec in self.get_list():
|
|
101
|
-
if rec[
|
|
102
|
-
return {
|
|
103
|
-
'name': rec['name'],
|
|
104
|
-
'type': rec['type'],
|
|
105
|
-
'engine': rec['engine'],
|
|
106
|
-
'id': rec['id']
|
|
107
|
-
}
|
|
95
|
+
if rec["id"] == integration_id and rec["type"] == "data":
|
|
96
|
+
return {"name": rec["name"], "type": rec["type"], "engine": rec["engine"], "id": rec["id"]}
|
|
108
97
|
|
|
109
98
|
def exists(self, db_name: str) -> bool:
|
|
110
99
|
return db_name.lower() in self.get_dict()
|
|
@@ -113,11 +102,46 @@ class DatabaseController:
|
|
|
113
102
|
return self.project_controller.get(name=name)
|
|
114
103
|
|
|
115
104
|
def get_system_db(self, name: str):
|
|
116
|
-
if name ==
|
|
105
|
+
if name == "log":
|
|
117
106
|
return self.logs_db_controller
|
|
118
|
-
elif name ==
|
|
107
|
+
elif name == "information_schema":
|
|
119
108
|
from mindsdb.api.executor.controllers.session_controller import SessionController
|
|
109
|
+
|
|
120
110
|
session = SessionController()
|
|
121
111
|
return session.datahub
|
|
122
112
|
else:
|
|
123
113
|
raise Exception(f"Database '{name}' does not exists")
|
|
114
|
+
|
|
115
|
+
def update(self, name: str, data: dict):
|
|
116
|
+
"""
|
|
117
|
+
Updates the database with the given name using the provided data.
|
|
118
|
+
|
|
119
|
+
Parameters:
|
|
120
|
+
name (str): The name of the database to update.
|
|
121
|
+
data (dict): The data to update the database with.
|
|
122
|
+
|
|
123
|
+
Raises:
|
|
124
|
+
EntityNotExistsError: If the database does not exist.
|
|
125
|
+
"""
|
|
126
|
+
databases = self.get_dict()
|
|
127
|
+
name = name.lower()
|
|
128
|
+
if name not in databases:
|
|
129
|
+
raise EntityNotExistsError("Database does not exist.", name)
|
|
130
|
+
|
|
131
|
+
db_type = databases[name]["type"]
|
|
132
|
+
if db_type == "project":
|
|
133
|
+
# Only the name of the project can be updated.
|
|
134
|
+
if {"name"} != set(data):
|
|
135
|
+
raise ValueError("Only the 'name' field can be updated for projects.")
|
|
136
|
+
self.project_controller.update(name=name, new_name=str(data["name"]))
|
|
137
|
+
return
|
|
138
|
+
|
|
139
|
+
elif db_type == "data":
|
|
140
|
+
# Only the parameters (connection data) of the integration can be updated.
|
|
141
|
+
if {"parameters"} != set(data):
|
|
142
|
+
raise ValueError("Only the 'parameters' field can be updated for integrations.")
|
|
143
|
+
self.integration_controller.modify(name, data["parameters"])
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
else:
|
|
147
|
+
raise ValueError(f"Database with type '{db_type}' cannot be updated")
|