MindsDB 25.5.4.1__py3-none-any.whl → 25.6.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +28 -25
- mindsdb/api/a2a/common/server/server.py +32 -26
- mindsdb/api/a2a/run_a2a.py +1 -1
- mindsdb/api/executor/command_executor.py +69 -14
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
- mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
- mindsdb/api/executor/planner/plan_join.py +67 -77
- mindsdb/api/executor/planner/query_planner.py +176 -155
- mindsdb/api/executor/planner/steps.py +37 -12
- mindsdb/api/executor/sql_query/result_set.py +45 -64
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
- mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
- mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
- mindsdb/api/executor/utilities/sql.py +42 -48
- mindsdb/api/http/namespaces/config.py +1 -1
- mindsdb/api/http/namespaces/file.py +14 -23
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
- mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
- mindsdb/integrations/libs/api_handler.py +261 -57
- mindsdb/integrations/libs/base.py +100 -29
- mindsdb/integrations/utilities/files/file_reader.py +99 -73
- mindsdb/integrations/utilities/handler_utils.py +23 -8
- mindsdb/integrations/utilities/sql_utils.py +35 -40
- mindsdb/interfaces/agents/agents_controller.py +196 -192
- mindsdb/interfaces/agents/constants.py +7 -1
- mindsdb/interfaces/agents/langchain_agent.py +42 -11
- mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
- mindsdb/interfaces/data_catalog/__init__.py +0 -0
- mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
- mindsdb/interfaces/database/database.py +81 -57
- mindsdb/interfaces/database/integrations.py +220 -234
- mindsdb/interfaces/database/log.py +72 -104
- mindsdb/interfaces/database/projects.py +156 -193
- mindsdb/interfaces/file/file_controller.py +21 -65
- mindsdb/interfaces/knowledge_base/controller.py +63 -10
- mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
- mindsdb/interfaces/skills/skills_controller.py +54 -36
- mindsdb/interfaces/skills/sql_agent.py +109 -86
- mindsdb/interfaces/storage/db.py +223 -79
- mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
- mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
- mindsdb/utilities/config.py +9 -2
- mindsdb/utilities/log.py +35 -26
- mindsdb/utilities/ml_task_queue/task.py +19 -22
- mindsdb/utilities/render/sqlalchemy_render.py +129 -181
- mindsdb/utilities/starters.py +49 -1
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +268 -268
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +70 -62
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, List
|
|
1
|
+
from typing import Any, List, Optional
|
|
2
2
|
import ast as py_ast
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
@@ -6,26 +6,29 @@ from mindsdb_sql_parser.ast import ASTNode, Select, Insert, Update, Delete, Star
|
|
|
6
6
|
from mindsdb_sql_parser.ast.select.identifier import Identifier
|
|
7
7
|
|
|
8
8
|
from mindsdb.integrations.utilities.sql_utils import (
|
|
9
|
-
extract_comparison_conditions,
|
|
10
|
-
|
|
9
|
+
extract_comparison_conditions,
|
|
10
|
+
filter_dataframe,
|
|
11
|
+
sort_dataframe,
|
|
12
|
+
FilterCondition,
|
|
13
|
+
FilterOperator,
|
|
14
|
+
SortColumn,
|
|
11
15
|
)
|
|
12
16
|
from mindsdb.integrations.libs.base import BaseHandler
|
|
13
17
|
from mindsdb.integrations.libs.api_handler_exceptions import TableAlreadyExists, TableNotFound
|
|
14
18
|
|
|
15
|
-
from mindsdb.integrations.libs.response import
|
|
16
|
-
|
|
17
|
-
RESPONSE_TYPE
|
|
18
|
-
)
|
|
19
|
+
from mindsdb.integrations.libs.response import HandlerResponse as Response, RESPONSE_TYPE
|
|
20
|
+
from mindsdb.utilities import log
|
|
19
21
|
|
|
20
22
|
|
|
21
|
-
|
|
23
|
+
logger = log.getLogger("mindsdb")
|
|
22
24
|
|
|
23
|
-
def from_string(self, query_string):
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
class FuncParser:
|
|
27
|
+
def from_string(self, query_string):
|
|
28
|
+
body = py_ast.parse(query_string.strip(), mode="eval").body
|
|
26
29
|
|
|
27
30
|
if not isinstance(body, py_ast.Call):
|
|
28
|
-
raise RuntimeError(f
|
|
31
|
+
raise RuntimeError(f"Api function not found {query_string}")
|
|
29
32
|
|
|
30
33
|
fnc_name = body.func.id
|
|
31
34
|
|
|
@@ -39,7 +42,6 @@ class FuncParser:
|
|
|
39
42
|
return fnc_name, params
|
|
40
43
|
|
|
41
44
|
def process(self, node):
|
|
42
|
-
|
|
43
45
|
if isinstance(node, py_ast.List):
|
|
44
46
|
elements = []
|
|
45
47
|
for node2 in node.elts:
|
|
@@ -47,7 +49,6 @@ class FuncParser:
|
|
|
47
49
|
return elements
|
|
48
50
|
|
|
49
51
|
if isinstance(node, py_ast.Dict):
|
|
50
|
-
|
|
51
52
|
keys = []
|
|
52
53
|
for node2 in node.keys:
|
|
53
54
|
if isinstance(node2, py_ast.Constant):
|
|
@@ -55,7 +56,7 @@ class FuncParser:
|
|
|
55
56
|
elif isinstance(node2, py_ast.Str): # py37
|
|
56
57
|
value = node2.s
|
|
57
58
|
else:
|
|
58
|
-
raise NotImplementedError(f
|
|
59
|
+
raise NotImplementedError(f"Unknown dict key {node2}")
|
|
59
60
|
|
|
60
61
|
keys.append(value)
|
|
61
62
|
|
|
@@ -68,11 +69,11 @@ class FuncParser:
|
|
|
68
69
|
if isinstance(node, py_ast.Name):
|
|
69
70
|
# special attributes
|
|
70
71
|
name = node.id
|
|
71
|
-
if name ==
|
|
72
|
+
if name == "true":
|
|
72
73
|
return True
|
|
73
|
-
elif name ==
|
|
74
|
+
elif name == "false":
|
|
74
75
|
return False
|
|
75
|
-
elif name ==
|
|
76
|
+
elif name == "null":
|
|
76
77
|
return None
|
|
77
78
|
|
|
78
79
|
if isinstance(node, py_ast.Constant):
|
|
@@ -92,11 +93,10 @@ class FuncParser:
|
|
|
92
93
|
value = self.process(node.operand)
|
|
93
94
|
return -value
|
|
94
95
|
|
|
95
|
-
raise NotImplementedError(f
|
|
96
|
+
raise NotImplementedError(f"Unknown node {node}")
|
|
96
97
|
|
|
97
98
|
|
|
98
99
|
class APITable:
|
|
99
|
-
|
|
100
100
|
def __init__(self, handler):
|
|
101
101
|
self.handler = handler
|
|
102
102
|
|
|
@@ -154,7 +154,6 @@ class APITable:
|
|
|
154
154
|
|
|
155
155
|
|
|
156
156
|
class APIResource(APITable):
|
|
157
|
-
|
|
158
157
|
def __init__(self, *args, table_name=None, **kwargs):
|
|
159
158
|
self.table_name = table_name
|
|
160
159
|
super().__init__(*args, **kwargs)
|
|
@@ -179,26 +178,18 @@ class APIResource(APITable):
|
|
|
179
178
|
if query.order_by and len(query.order_by) > 0:
|
|
180
179
|
sort = []
|
|
181
180
|
for an_order in query.order_by:
|
|
182
|
-
sort.append(SortColumn(an_order.field.parts[-1],
|
|
183
|
-
an_order.direction.upper() != 'DESC'))
|
|
181
|
+
sort.append(SortColumn(an_order.field.parts[-1], an_order.direction.upper() != "DESC"))
|
|
184
182
|
|
|
185
183
|
targets = []
|
|
186
184
|
for col in query.targets:
|
|
187
185
|
if isinstance(col, Identifier):
|
|
188
186
|
targets.append(col.parts[-1])
|
|
189
187
|
|
|
190
|
-
kwargs = {
|
|
191
|
-
'conditions': conditions,
|
|
192
|
-
'limit': limit,
|
|
193
|
-
'sort': sort,
|
|
194
|
-
'targets': targets
|
|
195
|
-
}
|
|
188
|
+
kwargs = {"conditions": conditions, "limit": limit, "sort": sort, "targets": targets}
|
|
196
189
|
if self.table_name is not None:
|
|
197
|
-
kwargs[
|
|
190
|
+
kwargs["table_name"] = self.table_name
|
|
198
191
|
|
|
199
|
-
result = self.list(
|
|
200
|
-
**kwargs
|
|
201
|
-
)
|
|
192
|
+
result = self.list(**kwargs)
|
|
202
193
|
|
|
203
194
|
filters = []
|
|
204
195
|
for cond in conditions:
|
|
@@ -216,17 +207,18 @@ class APIResource(APITable):
|
|
|
216
207
|
result = sort_dataframe(result, sort_columns)
|
|
217
208
|
|
|
218
209
|
if limit is not None and len(result) > limit:
|
|
219
|
-
result = result[:int(limit)]
|
|
210
|
+
result = result[: int(limit)]
|
|
220
211
|
|
|
221
212
|
return result
|
|
222
213
|
|
|
223
|
-
def list(
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
214
|
+
def list(
|
|
215
|
+
self,
|
|
216
|
+
conditions: List[FilterCondition] = None,
|
|
217
|
+
limit: int = None,
|
|
218
|
+
sort: List[SortColumn] = None,
|
|
219
|
+
targets: List[str] = None,
|
|
220
|
+
**kwargs,
|
|
221
|
+
):
|
|
230
222
|
"""
|
|
231
223
|
List items based on specified conditions, limits, sorting, and targets.
|
|
232
224
|
|
|
@@ -254,13 +246,10 @@ class APIResource(APITable):
|
|
|
254
246
|
|
|
255
247
|
columns = [col.name for col in query.columns]
|
|
256
248
|
|
|
257
|
-
data = [
|
|
258
|
-
dict(zip(columns, a_row))
|
|
259
|
-
for a_row in query.values
|
|
260
|
-
]
|
|
249
|
+
data = [dict(zip(columns, a_row)) for a_row in query.values]
|
|
261
250
|
kwargs = {}
|
|
262
251
|
if self.table_name is not None:
|
|
263
|
-
kwargs[
|
|
252
|
+
kwargs["table_name"] = self.table_name
|
|
264
253
|
|
|
265
254
|
self.add(data, **kwargs)
|
|
266
255
|
|
|
@@ -332,10 +321,105 @@ class APIResource(APITable):
|
|
|
332
321
|
raise NotImplementedError()
|
|
333
322
|
|
|
334
323
|
def _extract_conditions(self, where: ASTNode) -> List[FilterCondition]:
|
|
335
|
-
return [
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
324
|
+
return [FilterCondition(i[1], FilterOperator(i[0].upper()), i[2]) for i in extract_comparison_conditions(where)]
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
class MetaAPIResource(APIResource):
|
|
328
|
+
# TODO: Add a meta_table_info() method in case metadata cannot be retrieved as expected below?
|
|
329
|
+
|
|
330
|
+
def meta_get_tables(self, table_name: str, **kwargs) -> dict:
|
|
331
|
+
"""
|
|
332
|
+
Retrieves table metadata for the API resource.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
|
|
336
|
+
kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Dict: The dictionary should contain the following fields:
|
|
340
|
+
- TABLE_NAME (str): Name of the table.
|
|
341
|
+
- TABLE_TYPE (str): Type of the table, e.g. 'BASE TABLE', 'VIEW', etc. (optional).
|
|
342
|
+
- TABLE_SCHEMA (str): Schema of the table (optional).
|
|
343
|
+
- TABLE_DESCRIPTION (str): Description of the table (optional).
|
|
344
|
+
- ROW_COUNT (int): Estimated number of rows in the table (optional).
|
|
345
|
+
"""
|
|
346
|
+
pass
|
|
347
|
+
|
|
348
|
+
def meta_get_columns(self, table_name: str, **kwargs) -> List[dict]:
|
|
349
|
+
"""
|
|
350
|
+
Retrieves column metadata for the API resource.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
|
|
354
|
+
kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
List[dict]: The list should contain dictionaries with the following fields:
|
|
358
|
+
- TABLE_NAME (str): Name of the table.
|
|
359
|
+
- COLUMN_NAME (str): Name of the column.
|
|
360
|
+
- DATA_TYPE (str): Data type of the column, e.g. 'VARCHAR', 'INT', etc.
|
|
361
|
+
- COLUMN_DESCRIPTION (str): Description of the column (optional).
|
|
362
|
+
- IS_NULLABLE (bool): Whether the column can contain NULL values (optional).
|
|
363
|
+
- COLUMN_DEFAULT (str): Default value of the column (optional).
|
|
364
|
+
"""
|
|
365
|
+
pass
|
|
366
|
+
|
|
367
|
+
def meta_get_column_statistics(self, table_name: str, **kwargs) -> List[dict]:
|
|
368
|
+
"""
|
|
369
|
+
Retrieves column statistics for the API resource.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
|
|
373
|
+
kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
List[dict]: The list should contain dictionaries with the following fields:
|
|
377
|
+
- TABLE_NAME (str): Name of the table.
|
|
378
|
+
- COLUMN_NAME (str): Name of the column.
|
|
379
|
+
- MOST_COMMON_VALUES (List[str]): Most common values in the column (optional).
|
|
380
|
+
- MOST_COMMON_FREQUENCIES (List[str]): Frequencies of the most common values in the column (optional).
|
|
381
|
+
- NULL_PERCENTAGE: Percentage of NULL values in the column (optional).
|
|
382
|
+
- MINIMUM_VALUE (str): Minimum value in the column (optional).
|
|
383
|
+
- MAXIMUM_VALUE (str): Maximum value in the column (optional).
|
|
384
|
+
- DISTINCT_VALUES_COUNT (int): Count of distinct values in the column (optional).
|
|
385
|
+
"""
|
|
386
|
+
pass
|
|
387
|
+
|
|
388
|
+
def meta_get_primary_keys(self, table_name: str, **kwargs) -> List[dict]:
|
|
389
|
+
"""
|
|
390
|
+
Retrieves primary key metadata for the API resource.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
|
|
394
|
+
kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
List[dict]: The list should contain dictionaries with the following fields:
|
|
398
|
+
- TABLE_NAME (str): Name of the table.
|
|
399
|
+
- COLUMN_NAME (str): Name of the column that is part of the primary key.
|
|
400
|
+
- ORDINAL_POSITION (int): Position of the column in the primary key (optional).
|
|
401
|
+
- CONSTRAINT_NAME (str): Name of the primary key constraint (optional).
|
|
402
|
+
"""
|
|
403
|
+
pass
|
|
404
|
+
|
|
405
|
+
def meta_get_foreign_keys(self, table_name: str, all_tables: List[str], **kwargs) -> List[dict]:
|
|
406
|
+
"""
|
|
407
|
+
Retrieves foreign key metadata for the API resource.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
table_name (str): The name given to the table that represents the API resource. This is required because the name for the APIResource is given by the handler.
|
|
411
|
+
all_tables (List[str]): A list of all table names in the API resource. This is used to identify relationships between tables.
|
|
412
|
+
kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
List[dict]: The list should contain dictionaries with the following fields:
|
|
416
|
+
- PARENT_TABLE_NAME (str): Name of the parent table.
|
|
417
|
+
- PARENT_COLUMN_NAME (str): Name of the parent column that is part of the foreign key.
|
|
418
|
+
- CHILD_TABLE_NAME (str): Name of the child table.
|
|
419
|
+
- CHILD_COLUMN_NAME (str): Name of the child column that is part of the foreign key.
|
|
420
|
+
- CONSTRAINT_NAME (str): Name of the foreign key constraint (optional).
|
|
421
|
+
"""
|
|
422
|
+
pass
|
|
339
423
|
|
|
340
424
|
|
|
341
425
|
class APIHandler(BaseHandler):
|
|
@@ -368,14 +452,13 @@ class APIHandler(BaseHandler):
|
|
|
368
452
|
"""
|
|
369
453
|
name = name.parts[-1]
|
|
370
454
|
if name not in self._tables:
|
|
371
|
-
raise TableNotFound(f
|
|
455
|
+
raise TableNotFound(f"Table not found: {name}")
|
|
372
456
|
return self._tables[name]
|
|
373
457
|
|
|
374
458
|
def query(self, query: ASTNode):
|
|
375
|
-
|
|
376
459
|
if isinstance(query, Select):
|
|
377
460
|
table = self._get_table(query.from_table)
|
|
378
|
-
if not hasattr(table,
|
|
461
|
+
if not hasattr(table, "list"):
|
|
379
462
|
# for back compatibility, targets wasn't passed in previous version
|
|
380
463
|
query.targets = [Star()]
|
|
381
464
|
result = self._get_table(query.from_table).select(query)
|
|
@@ -406,8 +489,8 @@ class APIHandler(BaseHandler):
|
|
|
406
489
|
|
|
407
490
|
result = self._get_table(Identifier(table_name)).get_columns()
|
|
408
491
|
|
|
409
|
-
df = pd.DataFrame(result, columns=[
|
|
410
|
-
df[
|
|
492
|
+
df = pd.DataFrame(result, columns=["Field"])
|
|
493
|
+
df["Type"] = "str"
|
|
411
494
|
|
|
412
495
|
return Response(RESPONSE_TYPE.TABLE, df)
|
|
413
496
|
|
|
@@ -419,14 +502,135 @@ class APIHandler(BaseHandler):
|
|
|
419
502
|
"""
|
|
420
503
|
result = list(self._tables.keys())
|
|
421
504
|
|
|
422
|
-
df = pd.DataFrame(result, columns=[
|
|
423
|
-
df[
|
|
505
|
+
df = pd.DataFrame(result, columns=["table_name"])
|
|
506
|
+
df["table_type"] = "BASE TABLE"
|
|
424
507
|
|
|
425
508
|
return Response(RESPONSE_TYPE.TABLE, df)
|
|
426
509
|
|
|
427
510
|
|
|
428
|
-
class
|
|
511
|
+
class MetaAPIHandler(APIHandler):
|
|
512
|
+
"""
|
|
513
|
+
Base class for handlers associated to the applications APIs (e.g. twitter, slack, discord etc.)
|
|
429
514
|
|
|
515
|
+
This class is used when the handler is also needed to store information in the data catalog.
|
|
516
|
+
"""
|
|
517
|
+
|
|
518
|
+
def meta_get_tables(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
|
|
519
|
+
"""
|
|
520
|
+
Retrieves metadata for the specified tables (or all tables if no list is provided).
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
table_names (List): A list of table names for which to retrieve metadata.
|
|
524
|
+
kwargs: Additional keyword arguments that may be used by the specific API resource implementation.
|
|
525
|
+
|
|
526
|
+
Returns:
|
|
527
|
+
Response: A response object containing the table metadata.
|
|
528
|
+
"""
|
|
529
|
+
df = pd.DataFrame()
|
|
530
|
+
for table_name, table_class in self._tables.items():
|
|
531
|
+
if table_names is None or table_name in table_names:
|
|
532
|
+
try:
|
|
533
|
+
if hasattr(table_class, "meta_get_tables"):
|
|
534
|
+
table_metadata = table_class.meta_get_tables(table_name, **kwargs)
|
|
535
|
+
df = pd.concat([df, pd.DataFrame([table_metadata])], ignore_index=True)
|
|
536
|
+
except Exception as e:
|
|
537
|
+
logger.error(f"Error retrieving metadata for table {table_name}: {e}")
|
|
538
|
+
|
|
539
|
+
return Response(RESPONSE_TYPE.TABLE, df)
|
|
540
|
+
|
|
541
|
+
def meta_get_columns(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
|
|
542
|
+
"""
|
|
543
|
+
Retrieves column metadata for the specified tables (or all tables if no list is provided).
|
|
544
|
+
|
|
545
|
+
Args:
|
|
546
|
+
table_names (List): A list of table names for which to retrieve column metadata.
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Response: A response object containing the column metadata.
|
|
550
|
+
"""
|
|
551
|
+
df = pd.DataFrame()
|
|
552
|
+
for table_name, table_class in self._tables.items():
|
|
553
|
+
if table_names is None or table_name in table_names:
|
|
554
|
+
try:
|
|
555
|
+
if hasattr(table_class, "meta_get_columns"):
|
|
556
|
+
column_metadata = table_class.meta_get_columns(table_name, **kwargs)
|
|
557
|
+
df = pd.concat([df, pd.DataFrame(column_metadata)], ignore_index=True)
|
|
558
|
+
except Exception as e:
|
|
559
|
+
logger.error(f"Error retrieving column metadata for table {table_name}: {e}")
|
|
560
|
+
|
|
561
|
+
return Response(RESPONSE_TYPE.TABLE, df)
|
|
562
|
+
|
|
563
|
+
def meta_get_column_statistics(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
|
|
564
|
+
"""
|
|
565
|
+
Retrieves column statistics for the specified tables (or all tables if no list is provided).
|
|
566
|
+
|
|
567
|
+
Args:
|
|
568
|
+
table_names (List): A list of table names for which to retrieve column statistics.
|
|
569
|
+
|
|
570
|
+
Returns:
|
|
571
|
+
Response: A response object containing the column statistics.
|
|
572
|
+
"""
|
|
573
|
+
df = pd.DataFrame()
|
|
574
|
+
for table_name, table_class in self._tables.items():
|
|
575
|
+
if table_names is None or table_name in table_names:
|
|
576
|
+
try:
|
|
577
|
+
if hasattr(table_class, "meta_get_column_statistics"):
|
|
578
|
+
column_statistics = table_class.meta_get_column_statistics(table_name, **kwargs)
|
|
579
|
+
df = pd.concat([df, pd.DataFrame(column_statistics)], ignore_index=True)
|
|
580
|
+
except Exception as e:
|
|
581
|
+
logger.error(f"Error retrieving column statistics for table {table_name}: {e}")
|
|
582
|
+
|
|
583
|
+
return Response(RESPONSE_TYPE.TABLE, df)
|
|
584
|
+
|
|
585
|
+
def meta_get_primary_keys(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
|
|
586
|
+
"""
|
|
587
|
+
Retrieves primary key metadata for the specified tables (or all tables if no list is provided).
|
|
588
|
+
|
|
589
|
+
Args:
|
|
590
|
+
table_names (List): A list of table names for which to retrieve primary key metadata.
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
Response: A response object containing the primary key metadata.
|
|
594
|
+
"""
|
|
595
|
+
df = pd.DataFrame()
|
|
596
|
+
for table_name, table_class in self._tables.items():
|
|
597
|
+
if table_names is None or table_name in table_names:
|
|
598
|
+
try:
|
|
599
|
+
if hasattr(table_class, "meta_get_primary_keys"):
|
|
600
|
+
primary_key_metadata = table_class.meta_get_primary_keys(table_name, **kwargs)
|
|
601
|
+
df = pd.concat([df, pd.DataFrame(primary_key_metadata)], ignore_index=True)
|
|
602
|
+
except Exception as e:
|
|
603
|
+
logger.error(f"Error retrieving primary keys for table {table_name}: {e}")
|
|
604
|
+
|
|
605
|
+
return Response(RESPONSE_TYPE.TABLE, df)
|
|
606
|
+
|
|
607
|
+
def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
|
|
608
|
+
"""
|
|
609
|
+
Retrieves foreign key metadata for the specified tables (or all tables if no list is provided).
|
|
610
|
+
|
|
611
|
+
Args:
|
|
612
|
+
table_names (List): A list of table names for which to retrieve foreign key metadata.
|
|
613
|
+
|
|
614
|
+
Returns:
|
|
615
|
+
Response: A response object containing the foreign key metadata.
|
|
616
|
+
"""
|
|
617
|
+
df = pd.DataFrame()
|
|
618
|
+
all_tables = list(self._tables.keys())
|
|
619
|
+
for table_name, table_class in self._tables.items():
|
|
620
|
+
if table_names is None or table_name in table_names:
|
|
621
|
+
try:
|
|
622
|
+
if hasattr(table_class, "meta_get_foreign_keys"):
|
|
623
|
+
foreign_key_metadata = table_class.meta_get_foreign_keys(
|
|
624
|
+
table_name, all_tables=table_names if table_names else all_tables, **kwargs
|
|
625
|
+
)
|
|
626
|
+
df = pd.concat([df, pd.DataFrame(foreign_key_metadata)], ignore_index=True)
|
|
627
|
+
except Exception as e:
|
|
628
|
+
logger.error(f"Error retrieving foreign keys for table {table_name}: {e}")
|
|
629
|
+
|
|
630
|
+
return Response(RESPONSE_TYPE.TABLE, df)
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
class APIChatHandler(APIHandler):
|
|
430
634
|
def get_chat_config(self):
|
|
431
635
|
"""Return configuration to connect to chatbot
|
|
432
636
|
|
|
@@ -14,14 +14,14 @@ logger = log.getLogger(__name__)
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class BaseHandler:
|
|
17
|
-
"""
|
|
17
|
+
"""Base class for database handlers
|
|
18
18
|
|
|
19
19
|
Base class for handlers that associate a source of information with the
|
|
20
20
|
broader MindsDB ecosystem via SQL commands.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
def __init__(self, name: str):
|
|
24
|
-
"""
|
|
24
|
+
"""constructor
|
|
25
25
|
Args:
|
|
26
26
|
name (str): the handler name
|
|
27
27
|
"""
|
|
@@ -29,7 +29,7 @@ class BaseHandler:
|
|
|
29
29
|
self.name = name
|
|
30
30
|
|
|
31
31
|
def connect(self):
|
|
32
|
-
"""
|
|
32
|
+
"""Set up any connections required by the handler
|
|
33
33
|
|
|
34
34
|
Should return connection
|
|
35
35
|
|
|
@@ -37,7 +37,7 @@ class BaseHandler:
|
|
|
37
37
|
raise NotImplementedError()
|
|
38
38
|
|
|
39
39
|
def disconnect(self):
|
|
40
|
-
"""
|
|
40
|
+
"""Close any existing connections
|
|
41
41
|
|
|
42
42
|
Should switch self.is_connected.
|
|
43
43
|
"""
|
|
@@ -45,7 +45,7 @@ class BaseHandler:
|
|
|
45
45
|
return
|
|
46
46
|
|
|
47
47
|
def check_connection(self) -> HandlerStatusResponse:
|
|
48
|
-
"""
|
|
48
|
+
"""Check connection to the handler
|
|
49
49
|
|
|
50
50
|
Returns:
|
|
51
51
|
HandlerStatusResponse
|
|
@@ -77,7 +77,7 @@ class BaseHandler:
|
|
|
77
77
|
raise NotImplementedError()
|
|
78
78
|
|
|
79
79
|
def get_tables(self) -> HandlerResponse:
|
|
80
|
-
"""
|
|
80
|
+
"""Return list of entities
|
|
81
81
|
|
|
82
82
|
Return list of entities that will be accesible as tables.
|
|
83
83
|
|
|
@@ -89,7 +89,7 @@ class BaseHandler:
|
|
|
89
89
|
raise NotImplementedError()
|
|
90
90
|
|
|
91
91
|
def get_columns(self, table_name: str) -> HandlerResponse:
|
|
92
|
-
"""
|
|
92
|
+
"""Returns a list of entity columns
|
|
93
93
|
|
|
94
94
|
Args:
|
|
95
95
|
table_name (str): name of one of tables returned by self.get_tables()
|
|
@@ -113,6 +113,91 @@ class DatabaseHandler(BaseHandler):
|
|
|
113
113
|
super().__init__(name)
|
|
114
114
|
|
|
115
115
|
|
|
116
|
+
class MetaDatabaseHandler(DatabaseHandler):
|
|
117
|
+
"""
|
|
118
|
+
Base class for handlers associated to data storage systems (e.g. databases, data warehouses, streaming services, etc.)
|
|
119
|
+
|
|
120
|
+
This class is used when the handler is also needed to store information in the data catalog.
|
|
121
|
+
This information is typically avaiable in the information schema or system tables of the database.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def __init__(self, name: str):
|
|
125
|
+
super().__init__(name)
|
|
126
|
+
|
|
127
|
+
def meta_get_tables(self, table_names: Optional[List[str]]) -> HandlerResponse:
|
|
128
|
+
"""
|
|
129
|
+
Returns metadata information about the tables to be stored in the data catalog.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
HandlerResponse: The response should consist of the following columns:
|
|
133
|
+
- TABLE_NAME (str): Name of the table.
|
|
134
|
+
- TABLE_TYPE (str): Type of the table, e.g. 'BASE TABLE', 'VIEW', etc. (optional).
|
|
135
|
+
- TABLE_SCHEMA (str): Schema of the table (optional).
|
|
136
|
+
- TABLE_DESCRIPTION (str): Description of the table (optional).
|
|
137
|
+
- ROW_COUNT (int): Estimated number of rows in the table (optional).
|
|
138
|
+
"""
|
|
139
|
+
raise NotImplementedError()
|
|
140
|
+
|
|
141
|
+
def meta_get_columns(self, table_names: Optional[List[str]]) -> HandlerResponse:
|
|
142
|
+
"""
|
|
143
|
+
Returns metadata information about the columns in the tables to be stored in the data catalog.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
HandlerResponse: The response should consist of the following columns:
|
|
147
|
+
- TABLE_NAME (str): Name of the table.
|
|
148
|
+
- COLUMN_NAME (str): Name of the column.
|
|
149
|
+
- DATA_TYPE (str): Data type of the column, e.g. 'VARCHAR', 'INT', etc.
|
|
150
|
+
- COLUMN_DESCRIPTION (str): Description of the column (optional).
|
|
151
|
+
- IS_NULLABLE (bool): Whether the column can contain NULL values (optional).
|
|
152
|
+
- COLUMN_DEFAULT (str): Default value of the column (optional).
|
|
153
|
+
"""
|
|
154
|
+
raise NotImplementedError()
|
|
155
|
+
|
|
156
|
+
def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> HandlerResponse:
|
|
157
|
+
"""
|
|
158
|
+
Returns metadata statisical information about the columns in the tables to be stored in the data catalog.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
HandlerResponse: The response should consist of the following columns:
|
|
162
|
+
- TABLE_NAME (str): Name of the table.
|
|
163
|
+
- COLUMN_NAME (str): Name of the column.
|
|
164
|
+
- MOST_COMMON_VALUES (List[str]): Most common values in the column (optional).
|
|
165
|
+
- MOST_COMMON_FREQUENCIES (List[str]): Frequencies of the most common values in the column (optional).
|
|
166
|
+
- NULL_PERCENTAGE: Percentage of NULL values in the column (optional).
|
|
167
|
+
- MINIMUM_VALUE (str): Minimum value in the column (optional).
|
|
168
|
+
- MAXIMUM_VALUE (str): Maximum value in the column (optional).
|
|
169
|
+
- DISTINCT_VALUES_COUNT (int): Count of distinct values in the column (optional).
|
|
170
|
+
"""
|
|
171
|
+
raise NotImplementedError()
|
|
172
|
+
|
|
173
|
+
def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> HandlerResponse:
|
|
174
|
+
"""
|
|
175
|
+
Returns metadata information about the primary keys in the tables to be stored in the data catalog.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
HandlerResponse: The response should consist of the following columns:
|
|
179
|
+
- TABLE_NAME (str): Name of the table.
|
|
180
|
+
- COLUMN_NAME (str): Name of the column that is part of the primary key.
|
|
181
|
+
- ORDINAL_POSITION (int): Position of the column in the primary key (optional).
|
|
182
|
+
- CONSTRAINT_NAME (str): Name of the primary key constraint (optional).
|
|
183
|
+
"""
|
|
184
|
+
raise NotImplementedError()
|
|
185
|
+
|
|
186
|
+
def meta_get_foreign_keys(self, table_names: Optional[List[str]]) -> HandlerResponse:
|
|
187
|
+
"""
|
|
188
|
+
Returns metadata information about the foreign keys in the tables to be stored in the data catalog.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
HandlerResponse: The response should consist of the following columns:
|
|
192
|
+
- PARENT_TABLE_NAME (str): Name of the parent table.
|
|
193
|
+
- PARENT_COLUMN_NAME (str): Name of the parent column that is part of the foreign key.
|
|
194
|
+
- CHILD_TABLE_NAME (str): Name of the child table.
|
|
195
|
+
- CHILD_COLUMN_NAME (str): Name of the child column that is part of the foreign key.
|
|
196
|
+
- CONSTRAINT_NAME (str): Name of the foreign key constraint (optional).
|
|
197
|
+
"""
|
|
198
|
+
raise NotImplementedError()
|
|
199
|
+
|
|
200
|
+
|
|
116
201
|
class ArgProbeMixin:
|
|
117
202
|
"""
|
|
118
203
|
A mixin class that provides probing of arguments that
|
|
@@ -154,26 +239,16 @@ class ArgProbeMixin:
|
|
|
154
239
|
self.visit(node.value)
|
|
155
240
|
|
|
156
241
|
def visit_Subscript(self, node):
|
|
157
|
-
if (
|
|
158
|
-
isinstance(node.value, ast.
|
|
159
|
-
and node.value.id in self.var_names_to_track
|
|
160
|
-
):
|
|
161
|
-
if isinstance(node.slice, ast.Index) and isinstance(
|
|
162
|
-
node.slice.value, ast.Str
|
|
163
|
-
):
|
|
242
|
+
if isinstance(node.value, ast.Name) and node.value.id in self.var_names_to_track:
|
|
243
|
+
if isinstance(node.slice, ast.Index) and isinstance(node.slice.value, ast.Str):
|
|
164
244
|
self.arg_keys.append({"name": node.slice.value.s, "required": True})
|
|
165
245
|
self.generic_visit(node)
|
|
166
246
|
|
|
167
247
|
def visit_Call(self, node):
|
|
168
248
|
if isinstance(node.func, ast.Attribute) and node.func.attr == "get":
|
|
169
|
-
if (
|
|
170
|
-
isinstance(node.func.value, ast.Name)
|
|
171
|
-
and node.func.value.id in self.var_names_to_track
|
|
172
|
-
):
|
|
249
|
+
if isinstance(node.func.value, ast.Name) and node.func.value.id in self.var_names_to_track:
|
|
173
250
|
if isinstance(node.args[0], ast.Str):
|
|
174
|
-
self.arg_keys.append(
|
|
175
|
-
{"name": node.args[0].s, "required": False}
|
|
176
|
-
)
|
|
251
|
+
self.arg_keys.append({"name": node.args[0].s, "required": False})
|
|
177
252
|
self.generic_visit(node)
|
|
178
253
|
|
|
179
254
|
@classmethod
|
|
@@ -197,9 +272,7 @@ class ArgProbeMixin:
|
|
|
197
272
|
try:
|
|
198
273
|
source_code = self.get_source_code(method_name)
|
|
199
274
|
except Exception as e:
|
|
200
|
-
logger.error(
|
|
201
|
-
f"Failed to get source code of method {method_name} in {self.__class__.__name__}. Reason: {e}"
|
|
202
|
-
)
|
|
275
|
+
logger.error(f"Failed to get source code of method {method_name} in {self.__class__.__name__}. Reason: {e}")
|
|
203
276
|
return []
|
|
204
277
|
|
|
205
278
|
# parse the source code
|
|
@@ -238,9 +311,7 @@ class ArgProbeMixin:
|
|
|
238
311
|
"""
|
|
239
312
|
method = getattr(self, method_name)
|
|
240
313
|
if method is None:
|
|
241
|
-
raise Exception(
|
|
242
|
-
f"Method {method_name} does not exist in {self.__class__.__name__}"
|
|
243
|
-
)
|
|
314
|
+
raise Exception(f"Method {method_name} does not exist in {self.__class__.__name__}")
|
|
244
315
|
source_code = inspect.getsource(method)
|
|
245
316
|
return source_code
|
|
246
317
|
|
|
@@ -288,8 +359,8 @@ class BaseMLEngine(ArgProbeMixin):
|
|
|
288
359
|
self.engine_storage = engine_storage
|
|
289
360
|
self.generative = False # if True, the target column name does not have to be specified at creation time
|
|
290
361
|
|
|
291
|
-
if kwargs.get(
|
|
292
|
-
self.base_model_storage = kwargs[
|
|
362
|
+
if kwargs.get("base_model_storage"):
|
|
363
|
+
self.base_model_storage = kwargs["base_model_storage"] # available when updating a model
|
|
293
364
|
else:
|
|
294
365
|
self.base_model_storage = None
|
|
295
366
|
|