MindsDB 25.5.4.2__py3-none-any.whl → 25.6.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +50 -26
- mindsdb/api/a2a/common/server/server.py +32 -26
- mindsdb/api/a2a/task_manager.py +68 -6
- mindsdb/api/executor/command_executor.py +69 -14
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
- mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
- mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
- mindsdb/api/executor/planner/plan_join.py +67 -77
- mindsdb/api/executor/planner/query_planner.py +176 -155
- mindsdb/api/executor/planner/steps.py +37 -12
- mindsdb/api/executor/sql_query/result_set.py +45 -64
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
- mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
- mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
- mindsdb/api/executor/utilities/sql.py +42 -48
- mindsdb/api/http/namespaces/config.py +1 -1
- mindsdb/api/http/namespaces/file.py +14 -23
- mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
- mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +29 -33
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +145 -40
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +352 -83
- mindsdb/integrations/libs/api_handler.py +279 -57
- mindsdb/integrations/libs/base.py +185 -30
- mindsdb/integrations/utilities/files/file_reader.py +99 -73
- mindsdb/integrations/utilities/handler_utils.py +23 -8
- mindsdb/integrations/utilities/sql_utils.py +35 -40
- mindsdb/interfaces/agents/agents_controller.py +226 -196
- mindsdb/interfaces/agents/constants.py +8 -1
- mindsdb/interfaces/agents/langchain_agent.py +42 -11
- mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
- mindsdb/interfaces/agents/mindsdb_database_agent.py +23 -18
- mindsdb/interfaces/data_catalog/__init__.py +0 -0
- mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +375 -0
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +38 -0
- mindsdb/interfaces/database/database.py +81 -57
- mindsdb/interfaces/database/integrations.py +222 -234
- mindsdb/interfaces/database/log.py +72 -104
- mindsdb/interfaces/database/projects.py +156 -193
- mindsdb/interfaces/file/file_controller.py +21 -65
- mindsdb/interfaces/knowledge_base/controller.py +66 -25
- mindsdb/interfaces/knowledge_base/evaluate.py +516 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
- mindsdb/interfaces/skills/skills_controller.py +31 -36
- mindsdb/interfaces/skills/sql_agent.py +113 -86
- mindsdb/interfaces/storage/db.py +242 -82
- mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
- mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
- mindsdb/utilities/config.py +13 -2
- mindsdb/utilities/log.py +35 -26
- mindsdb/utilities/ml_task_queue/task.py +19 -22
- mindsdb/utilities/render/sqlalchemy_render.py +129 -181
- mindsdb/utilities/starters.py +40 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/METADATA +257 -257
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/RECORD +76 -68
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import io
|
|
1
3
|
import time
|
|
2
4
|
import json
|
|
3
5
|
from typing import Optional, Any
|
|
@@ -13,12 +15,12 @@ from mindsdb_sql_parser import parse_sql
|
|
|
13
15
|
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
14
16
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
15
17
|
|
|
16
|
-
from mindsdb.integrations.libs.base import
|
|
18
|
+
from mindsdb.integrations.libs.base import MetaDatabaseHandler
|
|
17
19
|
from mindsdb.utilities import log
|
|
18
20
|
from mindsdb.integrations.libs.response import (
|
|
19
21
|
HandlerStatusResponse as StatusResponse,
|
|
20
22
|
HandlerResponse as Response,
|
|
21
|
-
RESPONSE_TYPE
|
|
23
|
+
RESPONSE_TYPE,
|
|
22
24
|
)
|
|
23
25
|
import mindsdb.utilities.profiler as profiler
|
|
24
26
|
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
|
|
@@ -44,20 +46,21 @@ def _map_type(internal_type_name: str | None) -> MYSQL_DATA_TYPE:
|
|
|
44
46
|
|
|
45
47
|
internal_type_name = internal_type_name.lower()
|
|
46
48
|
types_map = {
|
|
47
|
-
(
|
|
48
|
-
(
|
|
49
|
-
(
|
|
50
|
-
(
|
|
51
|
-
(
|
|
52
|
-
(
|
|
53
|
-
(
|
|
49
|
+
("smallint", "smallserial"): MYSQL_DATA_TYPE.SMALLINT,
|
|
50
|
+
("integer", "int", "serial"): MYSQL_DATA_TYPE.INT,
|
|
51
|
+
("bigint", "bigserial"): MYSQL_DATA_TYPE.BIGINT,
|
|
52
|
+
("real", "float"): MYSQL_DATA_TYPE.FLOAT,
|
|
53
|
+
("numeric", "decimal"): MYSQL_DATA_TYPE.DECIMAL,
|
|
54
|
+
("double precision",): MYSQL_DATA_TYPE.DOUBLE,
|
|
55
|
+
("character varying", "varchar"): MYSQL_DATA_TYPE.VARCHAR,
|
|
54
56
|
# NOTE: if return chars-types as mysql's CHAR, then response will be padded with spaces, so return as TEXT
|
|
55
|
-
(
|
|
56
|
-
(
|
|
57
|
-
(
|
|
58
|
-
(
|
|
59
|
-
(
|
|
60
|
-
(
|
|
57
|
+
("money", "character", "char", "bpchar", "bpchar", "text"): MYSQL_DATA_TYPE.TEXT,
|
|
58
|
+
("timestamp", "timestamp without time zone", "timestamp with time zone"): MYSQL_DATA_TYPE.DATETIME,
|
|
59
|
+
("date",): MYSQL_DATA_TYPE.DATE,
|
|
60
|
+
("time", "time without time zone", "time with time zone"): MYSQL_DATA_TYPE.TIME,
|
|
61
|
+
("boolean",): MYSQL_DATA_TYPE.BOOL,
|
|
62
|
+
("bytea",): MYSQL_DATA_TYPE.BINARY,
|
|
63
|
+
("json", "jsonb"): MYSQL_DATA_TYPE.JSON,
|
|
61
64
|
}
|
|
62
65
|
|
|
63
66
|
for db_types_list, mysql_data_type in types_map.items():
|
|
@@ -81,10 +84,28 @@ def _make_table_response(result: list[tuple[Any]], cursor: Cursor) -> Response:
|
|
|
81
84
|
description: list[PGColumn] = cursor.description
|
|
82
85
|
mysql_types: list[MYSQL_DATA_TYPE] = []
|
|
83
86
|
for column in description:
|
|
87
|
+
if column.type_display == "vector":
|
|
88
|
+
# 'vector' is type of pgvector extension, added here as text to not import pgvector
|
|
89
|
+
# NOTE: data returned as numpy array
|
|
90
|
+
mysql_types.append(MYSQL_DATA_TYPE.VECTOR)
|
|
91
|
+
continue
|
|
84
92
|
pg_type_info: TypeInfo = pg_types.get(column.type_code)
|
|
85
93
|
if pg_type_info is None:
|
|
86
|
-
|
|
87
|
-
|
|
94
|
+
# postgres may return 'polymorphic type', which are not present in the pg_types
|
|
95
|
+
# list of 'polymorphic type' can be obtained:
|
|
96
|
+
# SELECT oid, typname, typcategory FROM pg_type WHERE typcategory = 'P' ORDER BY oid;
|
|
97
|
+
if column.type_code in (2277, 5078):
|
|
98
|
+
# anyarray, anycompatiblearray
|
|
99
|
+
regtype = "json"
|
|
100
|
+
else:
|
|
101
|
+
logger.warning(f"Postgres handler: unknown type: {column.type_code}")
|
|
102
|
+
mysql_types.append(MYSQL_DATA_TYPE.TEXT)
|
|
103
|
+
continue
|
|
104
|
+
elif pg_type_info.array_oid == column.type_code:
|
|
105
|
+
# it is any array, handle is as json
|
|
106
|
+
regtype: str = "json"
|
|
107
|
+
else:
|
|
108
|
+
regtype: str = pg_type_info.regtype if pg_type_info is not None else None
|
|
88
109
|
mysql_type = _map_type(regtype)
|
|
89
110
|
mysql_types.append(mysql_type)
|
|
90
111
|
|
|
@@ -93,38 +114,37 @@ def _make_table_response(result: list[tuple[Any]], cursor: Cursor) -> Response:
|
|
|
93
114
|
for i, mysql_type in enumerate(mysql_types):
|
|
94
115
|
expected_dtype = None
|
|
95
116
|
if mysql_type in (
|
|
96
|
-
MYSQL_DATA_TYPE.SMALLINT,
|
|
97
|
-
MYSQL_DATA_TYPE.
|
|
117
|
+
MYSQL_DATA_TYPE.SMALLINT,
|
|
118
|
+
MYSQL_DATA_TYPE.INT,
|
|
119
|
+
MYSQL_DATA_TYPE.MEDIUMINT,
|
|
120
|
+
MYSQL_DATA_TYPE.BIGINT,
|
|
121
|
+
MYSQL_DATA_TYPE.TINYINT,
|
|
98
122
|
):
|
|
99
|
-
expected_dtype =
|
|
123
|
+
expected_dtype = "Int64"
|
|
100
124
|
elif mysql_type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN):
|
|
101
|
-
expected_dtype =
|
|
125
|
+
expected_dtype = "boolean"
|
|
102
126
|
serieses.append(pd.Series([row[i] for row in result], dtype=expected_dtype, name=description[i].name))
|
|
103
127
|
df = pd.concat(serieses, axis=1, copy=False)
|
|
104
128
|
# endregion
|
|
105
129
|
|
|
106
|
-
return Response(
|
|
107
|
-
RESPONSE_TYPE.TABLE,
|
|
108
|
-
data_frame=df,
|
|
109
|
-
affected_rows=cursor.rowcount,
|
|
110
|
-
mysql_types=mysql_types
|
|
111
|
-
)
|
|
130
|
+
return Response(RESPONSE_TYPE.TABLE, data_frame=df, affected_rows=cursor.rowcount, mysql_types=mysql_types)
|
|
112
131
|
|
|
113
132
|
|
|
114
|
-
class PostgresHandler(
|
|
133
|
+
class PostgresHandler(MetaDatabaseHandler):
|
|
115
134
|
"""
|
|
116
135
|
This handler handles connection and execution of the PostgreSQL statements.
|
|
117
136
|
"""
|
|
118
|
-
name = 'postgres'
|
|
119
137
|
|
|
120
|
-
|
|
138
|
+
name = "postgres"
|
|
139
|
+
|
|
140
|
+
@profiler.profile("init_pg_handler")
|
|
121
141
|
def __init__(self, name=None, **kwargs):
|
|
122
142
|
super().__init__(name)
|
|
123
143
|
self.parser = parse_sql
|
|
124
|
-
self.connection_args = kwargs.get(
|
|
125
|
-
self.dialect =
|
|
126
|
-
self.database = self.connection_args.get(
|
|
127
|
-
self.renderer = SqlalchemyRender(
|
|
144
|
+
self.connection_args = kwargs.get("connection_data")
|
|
145
|
+
self.dialect = "postgresql"
|
|
146
|
+
self.database = self.connection_args.get("database")
|
|
147
|
+
self.renderer = SqlalchemyRender("postgres")
|
|
128
148
|
|
|
129
149
|
self.connection = None
|
|
130
150
|
self.is_connected = False
|
|
@@ -136,30 +156,30 @@ class PostgresHandler(DatabaseHandler):
|
|
|
136
156
|
|
|
137
157
|
def _make_connection_args(self):
|
|
138
158
|
config = {
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
159
|
+
"host": self.connection_args.get("host"),
|
|
160
|
+
"port": self.connection_args.get("port"),
|
|
161
|
+
"user": self.connection_args.get("user"),
|
|
162
|
+
"password": self.connection_args.get("password"),
|
|
163
|
+
"dbname": self.connection_args.get("database"),
|
|
144
164
|
}
|
|
145
165
|
|
|
146
166
|
# https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS
|
|
147
|
-
connection_parameters = self.connection_args.get(
|
|
167
|
+
connection_parameters = self.connection_args.get("connection_parameters")
|
|
148
168
|
if isinstance(connection_parameters, dict) is False:
|
|
149
169
|
connection_parameters = {}
|
|
150
|
-
if
|
|
151
|
-
connection_parameters[
|
|
170
|
+
if "connect_timeout" not in connection_parameters:
|
|
171
|
+
connection_parameters["connect_timeout"] = 10
|
|
152
172
|
config.update(connection_parameters)
|
|
153
173
|
|
|
154
|
-
if self.connection_args.get(
|
|
155
|
-
config[
|
|
174
|
+
if self.connection_args.get("sslmode"):
|
|
175
|
+
config["sslmode"] = self.connection_args.get("sslmode")
|
|
156
176
|
|
|
157
|
-
if self.connection_args.get(
|
|
158
|
-
config[
|
|
177
|
+
if self.connection_args.get("autocommit"):
|
|
178
|
+
config["autocommit"] = self.connection_args.get("autocommit")
|
|
159
179
|
|
|
160
180
|
# If schema is not provided set public as default one
|
|
161
|
-
if self.connection_args.get(
|
|
162
|
-
config[
|
|
181
|
+
if self.connection_args.get("schema"):
|
|
182
|
+
config["options"] = f"-c search_path={self.connection_args.get('schema')},public"
|
|
163
183
|
return config
|
|
164
184
|
|
|
165
185
|
@profiler.profile()
|
|
@@ -182,7 +202,7 @@ class PostgresHandler(DatabaseHandler):
|
|
|
182
202
|
self.is_connected = True
|
|
183
203
|
return self.connection
|
|
184
204
|
except psycopg.Error as e:
|
|
185
|
-
logger.error(f
|
|
205
|
+
logger.error(f"Error connecting to PostgreSQL {self.database}, {e}!")
|
|
186
206
|
self.is_connected = False
|
|
187
207
|
raise
|
|
188
208
|
|
|
@@ -209,10 +229,10 @@ class PostgresHandler(DatabaseHandler):
|
|
|
209
229
|
connection = self.connect()
|
|
210
230
|
with connection.cursor() as cur:
|
|
211
231
|
# Execute a simple query to test the connection
|
|
212
|
-
cur.execute(
|
|
232
|
+
cur.execute("select 1;")
|
|
213
233
|
response.success = True
|
|
214
234
|
except psycopg.Error as e:
|
|
215
|
-
logger.error(f
|
|
235
|
+
logger.error(f"Error connecting to PostgreSQL {self.database}, {e}!")
|
|
216
236
|
response.error_message = str(e)
|
|
217
237
|
|
|
218
238
|
if response.success and need_to_close:
|
|
@@ -239,25 +259,25 @@ class PostgresHandler(DatabaseHandler):
|
|
|
239
259
|
description (list): psycopg cursor description
|
|
240
260
|
"""
|
|
241
261
|
types_map = {
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
262
|
+
"int2": "int16",
|
|
263
|
+
"int4": "int32",
|
|
264
|
+
"int8": "int64",
|
|
265
|
+
"numeric": "float64",
|
|
266
|
+
"float4": "float32",
|
|
267
|
+
"float8": "float64",
|
|
248
268
|
}
|
|
249
269
|
columns = df.columns
|
|
250
270
|
df.columns = list(range(len(columns)))
|
|
251
271
|
for column_index, column_name in enumerate(df.columns):
|
|
252
272
|
col = df[column_name]
|
|
253
|
-
if str(col.dtype) ==
|
|
254
|
-
pg_type_info: TypeInfo = pg_types.get(description[column_index].type_code)
|
|
273
|
+
if str(col.dtype) == "object":
|
|
274
|
+
pg_type_info: TypeInfo = pg_types.get(description[column_index].type_code) # type_code is int!?
|
|
255
275
|
if pg_type_info is not None and pg_type_info.name in types_map:
|
|
256
|
-
col = col.fillna(0)
|
|
276
|
+
col = col.fillna(0) # TODO rework
|
|
257
277
|
try:
|
|
258
278
|
df[column_name] = col.astype(types_map[pg_type_info.name])
|
|
259
279
|
except ValueError as e:
|
|
260
|
-
logger.error(f
|
|
280
|
+
logger.error(f"Error casting column {col.name} to {types_map[pg_type_info.name]}: {e}")
|
|
261
281
|
df.columns = columns
|
|
262
282
|
|
|
263
283
|
@profiler.profile()
|
|
@@ -287,12 +307,8 @@ class PostgresHandler(DatabaseHandler):
|
|
|
287
307
|
response = _make_table_response(result, cur)
|
|
288
308
|
connection.commit()
|
|
289
309
|
except Exception as e:
|
|
290
|
-
logger.error(f
|
|
291
|
-
response = Response(
|
|
292
|
-
RESPONSE_TYPE.ERROR,
|
|
293
|
-
error_code=0,
|
|
294
|
-
error_message=str(e)
|
|
295
|
-
)
|
|
310
|
+
logger.error(f"Error running query: {query} on {self.database}, {e}!")
|
|
311
|
+
response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
|
|
296
312
|
connection.rollback()
|
|
297
313
|
|
|
298
314
|
if need_to_close:
|
|
@@ -325,10 +341,7 @@ class PostgresHandler(DatabaseHandler):
|
|
|
325
341
|
result = cur.fetchmany(fetch_size)
|
|
326
342
|
if not result:
|
|
327
343
|
break
|
|
328
|
-
df = DataFrame(
|
|
329
|
-
result,
|
|
330
|
-
columns=[x.name for x in cur.description]
|
|
331
|
-
)
|
|
344
|
+
df = DataFrame(result, columns=[x.name for x in cur.description])
|
|
332
345
|
self._cast_dtypes(df, cur.description)
|
|
333
346
|
yield df
|
|
334
347
|
connection.commit()
|
|
@@ -349,16 +362,10 @@ class PostgresHandler(DatabaseHandler):
|
|
|
349
362
|
|
|
350
363
|
# copy requires precise cases of names: get current column names from table and adapt input dataframe columns
|
|
351
364
|
if resp.data_frame is not None and not resp.data_frame.empty:
|
|
352
|
-
db_columns = {
|
|
353
|
-
c.lower(): c
|
|
354
|
-
for c in resp.data_frame['COLUMN_NAME']
|
|
355
|
-
}
|
|
365
|
+
db_columns = {c.lower(): c for c in resp.data_frame["COLUMN_NAME"]}
|
|
356
366
|
|
|
357
367
|
# try to get case of existing column
|
|
358
|
-
columns = [
|
|
359
|
-
db_columns.get(c.lower(), c)
|
|
360
|
-
for c in columns
|
|
361
|
-
]
|
|
368
|
+
columns = [db_columns.get(c.lower(), c) for c in columns]
|
|
362
369
|
|
|
363
370
|
columns = [f'"{c}"' for c in columns]
|
|
364
371
|
rowcount = None
|
|
@@ -370,7 +377,7 @@ class PostgresHandler(DatabaseHandler):
|
|
|
370
377
|
|
|
371
378
|
connection.commit()
|
|
372
379
|
except Exception as e:
|
|
373
|
-
logger.error(f
|
|
380
|
+
logger.error(f"Error running insert to {table_name} on {self.database}, {e}!")
|
|
374
381
|
connection.rollback()
|
|
375
382
|
raise e
|
|
376
383
|
rowcount = cur.rowcount
|
|
@@ -402,9 +409,9 @@ class PostgresHandler(DatabaseHandler):
|
|
|
402
409
|
Returns:
|
|
403
410
|
Response: A response object containing the list of tables and views, formatted as per the `Response` class.
|
|
404
411
|
"""
|
|
405
|
-
all_filter =
|
|
412
|
+
all_filter = "and table_schema = current_schema()"
|
|
406
413
|
if all is True:
|
|
407
|
-
all_filter =
|
|
414
|
+
all_filter = ""
|
|
408
415
|
query = f"""
|
|
409
416
|
SELECT
|
|
410
417
|
table_schema,
|
|
@@ -439,7 +446,7 @@ class PostgresHandler(DatabaseHandler):
|
|
|
439
446
|
if isinstance(schema_name, str):
|
|
440
447
|
schema_name = f"'{schema_name}'"
|
|
441
448
|
else:
|
|
442
|
-
schema_name =
|
|
449
|
+
schema_name = "current_schema()"
|
|
443
450
|
query = f"""
|
|
444
451
|
SELECT
|
|
445
452
|
COLUMN_NAME,
|
|
@@ -467,33 +474,33 @@ class PostgresHandler(DatabaseHandler):
|
|
|
467
474
|
|
|
468
475
|
def subscribe(self, stop_event, callback, table_name, columns=None, **kwargs):
|
|
469
476
|
config = self._make_connection_args()
|
|
470
|
-
config[
|
|
477
|
+
config["autocommit"] = True
|
|
471
478
|
|
|
472
479
|
conn = psycopg.connect(connect_timeout=10, **config)
|
|
473
480
|
|
|
474
481
|
# create db trigger
|
|
475
|
-
trigger_name = f
|
|
482
|
+
trigger_name = f"mdb_notify_{table_name}"
|
|
476
483
|
|
|
477
|
-
before, after =
|
|
484
|
+
before, after = "", ""
|
|
478
485
|
|
|
479
486
|
if columns:
|
|
480
487
|
# check column exist
|
|
481
|
-
conn.execute(f
|
|
488
|
+
conn.execute(f"select {','.join(columns)} from {table_name} limit 0")
|
|
482
489
|
|
|
483
490
|
columns = set(columns)
|
|
484
|
-
trigger_name +=
|
|
491
|
+
trigger_name += "_" + "_".join(columns)
|
|
485
492
|
|
|
486
493
|
news, olds = [], []
|
|
487
494
|
for column in columns:
|
|
488
|
-
news.append(f
|
|
489
|
-
olds.append(f
|
|
495
|
+
news.append(f"NEW.{column}")
|
|
496
|
+
olds.append(f"OLD.{column}")
|
|
490
497
|
|
|
491
|
-
before = f
|
|
492
|
-
after =
|
|
498
|
+
before = f"IF ({', '.join(news)}) IS DISTINCT FROM ({', '.join(olds)}) then\n"
|
|
499
|
+
after = "\nEND IF;"
|
|
493
500
|
else:
|
|
494
501
|
columns = set()
|
|
495
502
|
|
|
496
|
-
func_code = f
|
|
503
|
+
func_code = f"""
|
|
497
504
|
CREATE OR REPLACE FUNCTION {trigger_name}()
|
|
498
505
|
RETURNS trigger AS $$
|
|
499
506
|
DECLARE
|
|
@@ -504,16 +511,16 @@ class PostgresHandler(DatabaseHandler):
|
|
|
504
511
|
RETURN NEW;
|
|
505
512
|
END;
|
|
506
513
|
$$ LANGUAGE plpgsql;
|
|
507
|
-
|
|
514
|
+
"""
|
|
508
515
|
conn.execute(func_code)
|
|
509
516
|
|
|
510
517
|
# for after update - new and old have the same values
|
|
511
|
-
conn.execute(f
|
|
518
|
+
conn.execute(f"""
|
|
512
519
|
CREATE OR REPLACE TRIGGER {trigger_name}
|
|
513
520
|
BEFORE INSERT OR UPDATE ON {table_name}
|
|
514
521
|
FOR EACH ROW
|
|
515
522
|
EXECUTE PROCEDURE {trigger_name}();
|
|
516
|
-
|
|
523
|
+
""")
|
|
517
524
|
conn.commit()
|
|
518
525
|
|
|
519
526
|
# start listen
|
|
@@ -544,8 +551,208 @@ class PostgresHandler(DatabaseHandler):
|
|
|
544
551
|
time.sleep(SUBSCRIBE_SLEEP_INTERVAL)
|
|
545
552
|
|
|
546
553
|
finally:
|
|
547
|
-
conn.execute(f
|
|
548
|
-
conn.execute(f
|
|
554
|
+
conn.execute(f"drop TRIGGER {trigger_name} on {table_name}")
|
|
555
|
+
conn.execute(f"drop FUNCTION {trigger_name}")
|
|
549
556
|
conn.commit()
|
|
550
557
|
|
|
551
558
|
conn.close()
|
|
559
|
+
|
|
560
|
+
def meta_get_tables(self, table_names: Optional[list] = None) -> Response:
|
|
561
|
+
"""
|
|
562
|
+
Retrieves metadata information about the tables in the PostgreSQL database to be stored in the data catalog.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
table_names (list): A list of table names for which to retrieve metadata information.
|
|
566
|
+
|
|
567
|
+
Returns:
|
|
568
|
+
Response: A response object containing the metadata information, formatted as per the `Response` class.
|
|
569
|
+
"""
|
|
570
|
+
query = """
|
|
571
|
+
SELECT
|
|
572
|
+
t.table_name,
|
|
573
|
+
t.table_schema,
|
|
574
|
+
t.table_type,
|
|
575
|
+
obj_description(pgc.oid, 'pg_class') AS table_description,
|
|
576
|
+
pgc.reltuples AS row_count
|
|
577
|
+
FROM information_schema.tables t
|
|
578
|
+
JOIN pg_catalog.pg_class pgc ON pgc.relname = t.table_name
|
|
579
|
+
JOIN pg_catalog.pg_namespace pgn ON pgn.oid = pgc.relnamespace
|
|
580
|
+
WHERE t.table_schema = current_schema()
|
|
581
|
+
AND t.table_type in ('BASE TABLE', 'VIEW')
|
|
582
|
+
AND t.table_name NOT LIKE 'pg_%'
|
|
583
|
+
AND t.table_name NOT LIKE 'sql_%'
|
|
584
|
+
"""
|
|
585
|
+
|
|
586
|
+
if table_names is not None and len(table_names) > 0:
|
|
587
|
+
table_names = [f"'{t}'" for t in table_names]
|
|
588
|
+
query += f" AND t.table_name IN ({','.join(table_names)})"
|
|
589
|
+
|
|
590
|
+
result = self.native_query(query)
|
|
591
|
+
return result
|
|
592
|
+
|
|
593
|
+
def meta_get_columns(self, table_names: Optional[list] = None) -> Response:
|
|
594
|
+
"""
|
|
595
|
+
Retrieves column metadata for the specified tables (or all tables if no list is provided).
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
table_names (list): A list of table names for which to retrieve column metadata.
|
|
599
|
+
|
|
600
|
+
Returns:
|
|
601
|
+
Response: A response object containing the column metadata.
|
|
602
|
+
"""
|
|
603
|
+
query = """
|
|
604
|
+
SELECT
|
|
605
|
+
c.table_name,
|
|
606
|
+
c.column_name,
|
|
607
|
+
c.data_type,
|
|
608
|
+
col_description(pgc.oid, c.ordinal_position) AS column_description,
|
|
609
|
+
c.column_default,
|
|
610
|
+
(c.is_nullable = 'YES') AS is_nullable
|
|
611
|
+
FROM information_schema.columns c
|
|
612
|
+
JOIN pg_catalog.pg_class pgc ON pgc.relname = c.table_name
|
|
613
|
+
JOIN pg_catalog.pg_namespace pgn ON pgn.oid = pgc.relnamespace
|
|
614
|
+
WHERE c.table_schema = current_schema()
|
|
615
|
+
AND pgc.relkind = 'r' -- Only consider regular tables (avoids indexes, sequences, etc.)
|
|
616
|
+
AND c.table_name NOT LIKE 'pg_%'
|
|
617
|
+
AND c.table_name NOT LIKE 'sql_%'
|
|
618
|
+
AND pgn.nspname = c.table_schema
|
|
619
|
+
"""
|
|
620
|
+
|
|
621
|
+
if table_names is not None and len(table_names) > 0:
|
|
622
|
+
table_names = [f"'{t}'" for t in table_names]
|
|
623
|
+
query += f" AND c.table_name IN ({','.join(table_names)})"
|
|
624
|
+
|
|
625
|
+
result = self.native_query(query)
|
|
626
|
+
return result
|
|
627
|
+
|
|
628
|
+
def meta_get_column_statistics(self, table_names: Optional[list] = None) -> dict:
|
|
629
|
+
"""
|
|
630
|
+
Retrieves column statistics (e.g., most common values, frequencies, null percentage, and distinct value count)
|
|
631
|
+
for the specified tables or all tables if no list is provided.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
table_names (list): A list of table names for which to retrieve column statistics.
|
|
635
|
+
|
|
636
|
+
Returns:
|
|
637
|
+
dict: A dictionary containing the column statistics.
|
|
638
|
+
"""
|
|
639
|
+
query = """
|
|
640
|
+
SELECT
|
|
641
|
+
ps.attname AS column_name,
|
|
642
|
+
ps.tablename AS table_name,
|
|
643
|
+
ps.most_common_vals AS most_common_values,
|
|
644
|
+
ps.most_common_freqs::text AS most_common_frequencies,
|
|
645
|
+
ps.null_frac * 100 AS null_percentage,
|
|
646
|
+
ps.n_distinct AS distinct_values_count,
|
|
647
|
+
ps.histogram_bounds AS histogram_bounds
|
|
648
|
+
FROM pg_stats ps
|
|
649
|
+
WHERE ps.schemaname = current_schema()
|
|
650
|
+
AND ps.tablename NOT LIKE 'pg_%'
|
|
651
|
+
AND ps.tablename NOT LIKE 'sql_%'
|
|
652
|
+
"""
|
|
653
|
+
|
|
654
|
+
if table_names is not None and len(table_names) > 0:
|
|
655
|
+
table_names = [f"'{t}'" for t in table_names]
|
|
656
|
+
query += f" AND ps.tablename IN ({','.join(table_names)})"
|
|
657
|
+
|
|
658
|
+
result = self.native_query(query)
|
|
659
|
+
df = result.data_frame
|
|
660
|
+
|
|
661
|
+
def parse_pg_array_string(x):
|
|
662
|
+
try:
|
|
663
|
+
return (
|
|
664
|
+
[item.strip(" ,") for row in csv.reader(io.StringIO(x.strip("{}"))) for item in row if item.strip()]
|
|
665
|
+
if x
|
|
666
|
+
else []
|
|
667
|
+
)
|
|
668
|
+
except IndexError:
|
|
669
|
+
logger.error(f"Error parsing PostgreSQL array string: {x}")
|
|
670
|
+
return []
|
|
671
|
+
|
|
672
|
+
# Convert most_common_values and most_common_frequencies from string representation to lists.
|
|
673
|
+
df["most_common_values"] = df["most_common_values"].apply(lambda x: parse_pg_array_string(x))
|
|
674
|
+
df["most_common_frequencies"] = df["most_common_frequencies"].apply(lambda x: parse_pg_array_string(x))
|
|
675
|
+
|
|
676
|
+
# Get the minimum and maximum values from the histogram bounds.
|
|
677
|
+
df["minimum_value"] = df["histogram_bounds"].apply(lambda x: parse_pg_array_string(x)[0] if x else None)
|
|
678
|
+
df["maximum_value"] = df["histogram_bounds"].apply(lambda x: parse_pg_array_string(x)[-1] if x else None)
|
|
679
|
+
|
|
680
|
+
# Handle cases where distinct_values_count is negative (indicating an approximation).
|
|
681
|
+
df["distinct_values_count"] = df["distinct_values_count"].apply(lambda x: x if x >= 0 else None)
|
|
682
|
+
|
|
683
|
+
result.data_frame = df.drop(columns=["histogram_bounds"])
|
|
684
|
+
|
|
685
|
+
return result
|
|
686
|
+
|
|
687
|
+
def meta_get_primary_keys(self, table_names: Optional[list] = None) -> Response:
|
|
688
|
+
"""
|
|
689
|
+
Retrieves primary key information for the specified tables (or all tables if no list is provided).
|
|
690
|
+
|
|
691
|
+
Args:
|
|
692
|
+
table_names (list): A list of table names for which to retrieve primary key information.
|
|
693
|
+
|
|
694
|
+
Returns:
|
|
695
|
+
Response: A response object containing the primary key information.
|
|
696
|
+
"""
|
|
697
|
+
query = """
|
|
698
|
+
SELECT
|
|
699
|
+
tc.table_name,
|
|
700
|
+
kcu.column_name,
|
|
701
|
+
kcu.ordinal_position,
|
|
702
|
+
tc.constraint_name
|
|
703
|
+
FROM
|
|
704
|
+
information_schema.table_constraints AS tc
|
|
705
|
+
JOIN
|
|
706
|
+
information_schema.key_column_usage AS kcu
|
|
707
|
+
ON
|
|
708
|
+
tc.constraint_name = kcu.constraint_name
|
|
709
|
+
WHERE
|
|
710
|
+
tc.constraint_type = 'PRIMARY KEY'
|
|
711
|
+
AND tc.table_schema = current_schema()
|
|
712
|
+
"""
|
|
713
|
+
|
|
714
|
+
if table_names is not None and len(table_names) > 0:
|
|
715
|
+
table_names = [f"'{t}'" for t in table_names]
|
|
716
|
+
query += f" AND tc.table_name IN ({','.join(table_names)})"
|
|
717
|
+
|
|
718
|
+
result = self.native_query(query)
|
|
719
|
+
return result
|
|
720
|
+
|
|
721
|
+
def meta_get_foreign_keys(self, table_names: Optional[list] = None) -> Response:
|
|
722
|
+
"""
|
|
723
|
+
Retrieves foreign key information for the specified tables (or all tables if no list is provided).
|
|
724
|
+
|
|
725
|
+
Args:
|
|
726
|
+
table_names (list): A list of table names for which to retrieve foreign key information.
|
|
727
|
+
|
|
728
|
+
Returns:
|
|
729
|
+
Response: A response object containing the foreign key information.
|
|
730
|
+
"""
|
|
731
|
+
query = """
|
|
732
|
+
SELECT
|
|
733
|
+
ccu.table_name AS parent_table_name,
|
|
734
|
+
ccu.column_name AS parent_column_name,
|
|
735
|
+
tc.table_name AS child_table_name,
|
|
736
|
+
kcu.column_name AS child_column_name,
|
|
737
|
+
tc.constraint_name
|
|
738
|
+
FROM
|
|
739
|
+
information_schema.table_constraints AS tc
|
|
740
|
+
JOIN
|
|
741
|
+
information_schema.key_column_usage AS kcu
|
|
742
|
+
ON
|
|
743
|
+
tc.constraint_name = kcu.constraint_name
|
|
744
|
+
JOIN
|
|
745
|
+
information_schema.constraint_column_usage AS ccu
|
|
746
|
+
ON
|
|
747
|
+
ccu.constraint_name = tc.constraint_name
|
|
748
|
+
WHERE
|
|
749
|
+
tc.constraint_type = 'FOREIGN KEY'
|
|
750
|
+
AND tc.table_schema = current_schema()
|
|
751
|
+
"""
|
|
752
|
+
|
|
753
|
+
if table_names is not None and len(table_names) > 0:
|
|
754
|
+
table_names = [f"'{t}'" for t in table_names]
|
|
755
|
+
query += f" AND tc.table_name IN ({','.join(table_names)})"
|
|
756
|
+
|
|
757
|
+
result = self.native_query(query)
|
|
758
|
+
return result
|