MindsDB 25.5.4.2__py3-none-any.whl → 25.6.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +50 -26
- mindsdb/api/a2a/common/server/server.py +32 -26
- mindsdb/api/a2a/task_manager.py +68 -6
- mindsdb/api/executor/command_executor.py +69 -14
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
- mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
- mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
- mindsdb/api/executor/planner/plan_join.py +67 -77
- mindsdb/api/executor/planner/query_planner.py +176 -155
- mindsdb/api/executor/planner/steps.py +37 -12
- mindsdb/api/executor/sql_query/result_set.py +45 -64
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
- mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
- mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
- mindsdb/api/executor/utilities/sql.py +42 -48
- mindsdb/api/http/namespaces/config.py +1 -1
- mindsdb/api/http/namespaces/file.py +14 -23
- mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
- mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +29 -33
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +145 -40
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +352 -83
- mindsdb/integrations/libs/api_handler.py +279 -57
- mindsdb/integrations/libs/base.py +185 -30
- mindsdb/integrations/utilities/files/file_reader.py +99 -73
- mindsdb/integrations/utilities/handler_utils.py +23 -8
- mindsdb/integrations/utilities/sql_utils.py +35 -40
- mindsdb/interfaces/agents/agents_controller.py +226 -196
- mindsdb/interfaces/agents/constants.py +8 -1
- mindsdb/interfaces/agents/langchain_agent.py +42 -11
- mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
- mindsdb/interfaces/agents/mindsdb_database_agent.py +23 -18
- mindsdb/interfaces/data_catalog/__init__.py +0 -0
- mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +375 -0
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +38 -0
- mindsdb/interfaces/database/database.py +81 -57
- mindsdb/interfaces/database/integrations.py +222 -234
- mindsdb/interfaces/database/log.py +72 -104
- mindsdb/interfaces/database/projects.py +156 -193
- mindsdb/interfaces/file/file_controller.py +21 -65
- mindsdb/interfaces/knowledge_base/controller.py +66 -25
- mindsdb/interfaces/knowledge_base/evaluate.py +516 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
- mindsdb/interfaces/skills/skills_controller.py +31 -36
- mindsdb/interfaces/skills/sql_agent.py +113 -86
- mindsdb/interfaces/storage/db.py +242 -82
- mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
- mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
- mindsdb/utilities/config.py +13 -2
- mindsdb/utilities/log.py +35 -26
- mindsdb/utilities/ml_task_queue/task.py +19 -22
- mindsdb/utilities/render/sqlalchemy_render.py +129 -181
- mindsdb/utilities/starters.py +40 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/METADATA +257 -257
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/RECORD +76 -68
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/top_level.txt +0 -0
mindsdb/interfaces/storage/db.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import datetime
|
|
3
|
-
from typing import Dict, List
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from sqlalchemy import (
|
|
7
7
|
JSON,
|
|
8
|
+
BigInteger,
|
|
8
9
|
Boolean,
|
|
9
10
|
Column,
|
|
10
11
|
DateTime,
|
|
@@ -16,7 +17,7 @@ from sqlalchemy import (
|
|
|
16
17
|
UniqueConstraint,
|
|
17
18
|
create_engine,
|
|
18
19
|
text,
|
|
19
|
-
types
|
|
20
|
+
types,
|
|
20
21
|
)
|
|
21
22
|
from sqlalchemy.exc import OperationalError
|
|
22
23
|
from sqlalchemy.orm import (
|
|
@@ -45,7 +46,7 @@ session, engine = None, None
|
|
|
45
46
|
def init(connection_str: str = None):
|
|
46
47
|
global Base, session, engine
|
|
47
48
|
if connection_str is None:
|
|
48
|
-
connection_str = config[
|
|
49
|
+
connection_str = config["storage_db"]
|
|
49
50
|
base_args = {
|
|
50
51
|
"pool_size": 30,
|
|
51
52
|
"max_overflow": 200,
|
|
@@ -144,15 +145,11 @@ class Predictor(Base):
|
|
|
144
145
|
__tablename__ = "predictor"
|
|
145
146
|
|
|
146
147
|
id = Column(Integer, primary_key=True)
|
|
147
|
-
updated_at = Column(
|
|
148
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
149
|
-
)
|
|
148
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
150
149
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
151
150
|
deleted_at = Column(DateTime)
|
|
152
151
|
name = Column(String)
|
|
153
|
-
data = Column(
|
|
154
|
-
Json
|
|
155
|
-
) # A JSON -- should be everything returned by `get_model_data`, I think
|
|
152
|
+
data = Column(Json) # A JSON -- should be everything returned by `get_model_data`, I think
|
|
156
153
|
to_predict = Column(Array)
|
|
157
154
|
company_id = Column(Integer)
|
|
158
155
|
mindsdb_version = Column(String)
|
|
@@ -173,9 +170,7 @@ class Predictor(Base):
|
|
|
173
170
|
code = Column(String, nullable=True)
|
|
174
171
|
lightwood_version = Column(String, nullable=True)
|
|
175
172
|
dtype_dict = Column(Json, nullable=True)
|
|
176
|
-
project_id = Column(
|
|
177
|
-
Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False
|
|
178
|
-
)
|
|
173
|
+
project_id = Column(Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False)
|
|
179
174
|
training_phase_current = Column(Integer)
|
|
180
175
|
training_phase_total = Column(Integer)
|
|
181
176
|
training_phase_name = Column(String)
|
|
@@ -199,7 +194,7 @@ Index(
|
|
|
199
194
|
Predictor.version,
|
|
200
195
|
Predictor.active,
|
|
201
196
|
Predictor.deleted_at, # would be good to have here nullsfirst(Predictor.deleted_at)
|
|
202
|
-
unique=True
|
|
197
|
+
unique=True,
|
|
203
198
|
)
|
|
204
199
|
|
|
205
200
|
|
|
@@ -208,34 +203,27 @@ class Project(Base):
|
|
|
208
203
|
|
|
209
204
|
id = Column(Integer, primary_key=True)
|
|
210
205
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
211
|
-
updated_at = Column(
|
|
212
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
213
|
-
)
|
|
206
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
214
207
|
deleted_at = Column(DateTime)
|
|
215
208
|
name = Column(String, nullable=False)
|
|
216
209
|
company_id = Column(Integer, default=0)
|
|
217
210
|
metadata_: dict = Column("metadata", JSON, nullable=True)
|
|
218
|
-
__table_args__ = (
|
|
219
|
-
UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
|
|
220
|
-
)
|
|
211
|
+
__table_args__ = (UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),)
|
|
221
212
|
|
|
222
213
|
|
|
223
214
|
class Integration(Base):
|
|
224
215
|
__tablename__ = "integration"
|
|
225
216
|
id = Column(Integer, primary_key=True)
|
|
226
|
-
updated_at = Column(
|
|
227
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
228
|
-
)
|
|
217
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
229
218
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
230
219
|
name = Column(String, nullable=False)
|
|
231
220
|
engine = Column(String, nullable=False)
|
|
232
221
|
data = Column(Json)
|
|
233
222
|
company_id = Column(Integer)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
)
|
|
223
|
+
|
|
224
|
+
meta_tables = relationship("MetaTables", back_populates="integration")
|
|
225
|
+
|
|
226
|
+
__table_args__ = (UniqueConstraint("name", "company_id", name="unique_integration_name_company_id"),)
|
|
239
227
|
|
|
240
228
|
|
|
241
229
|
class File(Base):
|
|
@@ -249,12 +237,8 @@ class File(Base):
|
|
|
249
237
|
columns = Column(Json, nullable=False)
|
|
250
238
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
251
239
|
metadata_: dict = Column("metadata", JSON, nullable=True)
|
|
252
|
-
updated_at = Column(
|
|
253
|
-
|
|
254
|
-
)
|
|
255
|
-
__table_args__ = (
|
|
256
|
-
UniqueConstraint("name", "company_id", name="unique_file_name_company_id"),
|
|
257
|
-
)
|
|
240
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
241
|
+
__table_args__ = (UniqueConstraint("name", "company_id", name="unique_file_name_company_id"),)
|
|
258
242
|
|
|
259
243
|
|
|
260
244
|
class View(Base):
|
|
@@ -263,12 +247,8 @@ class View(Base):
|
|
|
263
247
|
name = Column(String, nullable=False)
|
|
264
248
|
company_id = Column(Integer)
|
|
265
249
|
query = Column(String, nullable=False)
|
|
266
|
-
project_id = Column(
|
|
267
|
-
|
|
268
|
-
)
|
|
269
|
-
__table_args__ = (
|
|
270
|
-
UniqueConstraint("name", "company_id", name="unique_view_name_company_id"),
|
|
271
|
-
)
|
|
250
|
+
project_id = Column(Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False)
|
|
251
|
+
__table_args__ = (UniqueConstraint("name", "company_id", name="unique_view_name_company_id"),)
|
|
272
252
|
|
|
273
253
|
|
|
274
254
|
class JsonStorage(Base):
|
|
@@ -310,9 +290,7 @@ class Jobs(Base):
|
|
|
310
290
|
schedule_str = Column(String)
|
|
311
291
|
|
|
312
292
|
deleted_at = Column(DateTime)
|
|
313
|
-
updated_at = Column(
|
|
314
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
315
|
-
)
|
|
293
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
316
294
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
317
295
|
|
|
318
296
|
|
|
@@ -331,9 +309,7 @@ class JobsHistory(Base):
|
|
|
331
309
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
332
310
|
updated_at = Column(DateTime, default=datetime.datetime.now)
|
|
333
311
|
|
|
334
|
-
__table_args__ = (
|
|
335
|
-
UniqueConstraint("job_id", "start_at", name="uniq_job_history_job_id_start"),
|
|
336
|
-
)
|
|
312
|
+
__table_args__ = (UniqueConstraint("job_id", "start_at", name="uniq_job_history_job_id_start"),)
|
|
337
313
|
|
|
338
314
|
|
|
339
315
|
class ChatBots(Base):
|
|
@@ -349,9 +325,7 @@ class ChatBots(Base):
|
|
|
349
325
|
database_id = Column(Integer)
|
|
350
326
|
params = Column(JSON)
|
|
351
327
|
|
|
352
|
-
updated_at = Column(
|
|
353
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
354
|
-
)
|
|
328
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
355
329
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
356
330
|
webhook_token = Column(String)
|
|
357
331
|
|
|
@@ -393,9 +367,7 @@ class Triggers(Base):
|
|
|
393
367
|
query_str = Column(String, nullable=False)
|
|
394
368
|
columns = Column(String) # list of columns separated by delimiter
|
|
395
369
|
|
|
396
|
-
updated_at = Column(
|
|
397
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
398
|
-
)
|
|
370
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
399
371
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
400
372
|
|
|
401
373
|
|
|
@@ -417,9 +389,7 @@ class Tasks(Base):
|
|
|
417
389
|
run_by = Column(String)
|
|
418
390
|
alive_time = Column(DateTime(timezone=True))
|
|
419
391
|
|
|
420
|
-
updated_at = Column(
|
|
421
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
422
|
-
)
|
|
392
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
423
393
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
424
394
|
|
|
425
395
|
|
|
@@ -444,9 +414,7 @@ class Skills(Base):
|
|
|
444
414
|
params = Column(JSON)
|
|
445
415
|
|
|
446
416
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
447
|
-
updated_at = Column(
|
|
448
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
449
|
-
)
|
|
417
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
450
418
|
deleted_at = Column(DateTime)
|
|
451
419
|
|
|
452
420
|
def as_dict(self) -> Dict:
|
|
@@ -475,9 +443,7 @@ class Agents(Base):
|
|
|
475
443
|
provider = Column(String, nullable=True)
|
|
476
444
|
params = Column(JSON)
|
|
477
445
|
|
|
478
|
-
updated_at = Column(
|
|
479
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
480
|
-
)
|
|
446
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
481
447
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
482
448
|
deleted_at = Column(DateTime)
|
|
483
449
|
|
|
@@ -520,33 +486,41 @@ class KnowledgeBase(Base):
|
|
|
520
486
|
doc="fk to the embedding model",
|
|
521
487
|
)
|
|
522
488
|
|
|
523
|
-
embedding_model = relationship(
|
|
524
|
-
"Predictor", foreign_keys=[embedding_model_id], doc="embedding model"
|
|
525
|
-
)
|
|
489
|
+
embedding_model = relationship("Predictor", foreign_keys=[embedding_model_id], doc="embedding model")
|
|
526
490
|
query_id = Column(Integer, nullable=True)
|
|
527
491
|
|
|
528
492
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
529
|
-
updated_at = Column(
|
|
530
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
531
|
-
)
|
|
493
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
532
494
|
|
|
533
|
-
__table_args__ = (
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
)
|
|
537
|
-
|
|
495
|
+
__table_args__ = (UniqueConstraint("name", "project_id", name="unique_knowledge_base_name_project_id"),)
|
|
496
|
+
|
|
497
|
+
def as_dict(self, with_secrets: Optional[bool] = True) -> Dict:
|
|
498
|
+
params = self.params.copy()
|
|
499
|
+
embedding_model = params.pop("embedding_model", None)
|
|
500
|
+
reranking_model = params.pop("reranking_model", None)
|
|
501
|
+
|
|
502
|
+
if not with_secrets:
|
|
503
|
+
if embedding_model and "api_key" in embedding_model:
|
|
504
|
+
embedding_model["api_key"] = "******"
|
|
505
|
+
|
|
506
|
+
if reranking_model and "api_key" in reranking_model:
|
|
507
|
+
reranking_model["api_key"] = "******"
|
|
538
508
|
|
|
539
|
-
def as_dict(self) -> Dict:
|
|
540
509
|
return {
|
|
541
510
|
"id": self.id,
|
|
542
511
|
"name": self.name,
|
|
543
512
|
"project_id": self.project_id,
|
|
544
|
-
"embedding_model": None if self.embedding_model is None else self.embedding_model.name,
|
|
545
513
|
"vector_database": None if self.vector_database is None else self.vector_database.name,
|
|
546
514
|
"vector_database_table": self.vector_database_table,
|
|
547
515
|
"updated_at": self.updated_at,
|
|
548
516
|
"created_at": self.created_at,
|
|
549
|
-
"
|
|
517
|
+
"query_id": self.query_id,
|
|
518
|
+
"embedding_model": embedding_model,
|
|
519
|
+
"reranking_model": reranking_model,
|
|
520
|
+
"metadata_columns": params.pop("metadata_columns", None),
|
|
521
|
+
"content_columns": params.pop("content_columns", None),
|
|
522
|
+
"id_column": params.pop("id_column", None),
|
|
523
|
+
"params": params,
|
|
550
524
|
}
|
|
551
525
|
|
|
552
526
|
|
|
@@ -559,9 +533,7 @@ class QueryContext(Base):
|
|
|
559
533
|
context_name: str = Column(String, nullable=False)
|
|
560
534
|
values: dict = Column(JSON)
|
|
561
535
|
|
|
562
|
-
updated_at: datetime.datetime = Column(
|
|
563
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
564
|
-
)
|
|
536
|
+
updated_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
565
537
|
created_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now)
|
|
566
538
|
|
|
567
539
|
|
|
@@ -581,9 +553,7 @@ class Queries(Base):
|
|
|
581
553
|
processed_rows = Column(Integer, default=0)
|
|
582
554
|
error: str = Column(String, nullable=True)
|
|
583
555
|
|
|
584
|
-
updated_at: datetime.datetime = Column(
|
|
585
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
586
|
-
)
|
|
556
|
+
updated_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
587
557
|
created_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now)
|
|
588
558
|
|
|
589
559
|
|
|
@@ -610,10 +580,11 @@ class LLMLog(Base):
|
|
|
610
580
|
|
|
611
581
|
|
|
612
582
|
class LLMData(Base):
|
|
613
|
-
|
|
583
|
+
"""
|
|
614
584
|
Stores the question/answer pairs of an LLM call so examples can be used
|
|
615
585
|
for self improvement with DSPy
|
|
616
|
-
|
|
586
|
+
"""
|
|
587
|
+
|
|
617
588
|
__tablename__ = "llm_data"
|
|
618
589
|
id: int = Column(Integer, primary_key=True)
|
|
619
590
|
input: str = Column(String, nullable=False)
|
|
@@ -621,3 +592,192 @@ class LLMData(Base):
|
|
|
621
592
|
model_id: int = Column(Integer, nullable=False)
|
|
622
593
|
created_at: datetime = Column(DateTime, default=datetime.datetime.now)
|
|
623
594
|
updated_at: datetime = Column(DateTime, onupdate=datetime.datetime.now)
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
# Data Catalog
|
|
598
|
+
class MetaTables(Base):
|
|
599
|
+
__tablename__ = "meta_tables"
|
|
600
|
+
id: int = Column(Integer, primary_key=True)
|
|
601
|
+
|
|
602
|
+
integration_id: int = Column(Integer, ForeignKey("integration.id"))
|
|
603
|
+
integration = relationship("Integration", back_populates="meta_tables")
|
|
604
|
+
|
|
605
|
+
name: str = Column(String, nullable=False)
|
|
606
|
+
schema: str = Column(String, nullable=True)
|
|
607
|
+
description: str = Column(String, nullable=True)
|
|
608
|
+
type: str = Column(String, nullable=True)
|
|
609
|
+
row_count: int = Column(BigInteger, nullable=True)
|
|
610
|
+
|
|
611
|
+
meta_columns: Mapped[List["MetaColumns"]] = relationship("MetaColumns", back_populates="meta_tables")
|
|
612
|
+
meta_primary_keys: Mapped[List["MetaPrimaryKeys"]] = relationship("MetaPrimaryKeys", back_populates="meta_tables")
|
|
613
|
+
meta_foreign_keys_parents: Mapped[List["MetaForeignKeys"]] = relationship(
|
|
614
|
+
"MetaForeignKeys", foreign_keys="MetaForeignKeys.parent_table_id", back_populates="parent_table"
|
|
615
|
+
)
|
|
616
|
+
meta_foreign_keys_children: Mapped[List["MetaForeignKeys"]] = relationship(
|
|
617
|
+
"MetaForeignKeys", foreign_keys="MetaForeignKeys.child_table_id", back_populates="child_table"
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
def as_string(self, indent: int = 0) -> str:
|
|
621
|
+
pad = " " * indent
|
|
622
|
+
|
|
623
|
+
table_info = f"`{self.integration.name}`.`{self.name}` ({self.type})"
|
|
624
|
+
|
|
625
|
+
if self.description:
|
|
626
|
+
table_info += f" : {self.description}"
|
|
627
|
+
|
|
628
|
+
if self.schema:
|
|
629
|
+
table_info += f"\n{pad}Schema: {self.schema}"
|
|
630
|
+
|
|
631
|
+
if self.row_count and self.row_count > 0:
|
|
632
|
+
table_info += f"\n{pad}Estimated Row Count: {self.row_count}"
|
|
633
|
+
|
|
634
|
+
if self.meta_primary_keys:
|
|
635
|
+
table_info += f"\n{pad}Primary Keys (in defined order): {', '.join([pk.as_string() for pk in self.meta_primary_keys])}"
|
|
636
|
+
|
|
637
|
+
if self.meta_columns:
|
|
638
|
+
table_info += f"\n\n{pad}Columns:"
|
|
639
|
+
for index, column in enumerate(self.meta_columns, start=1):
|
|
640
|
+
table_info += f"\n{index}. {column.as_string(indent + 4)}\n"
|
|
641
|
+
|
|
642
|
+
if self.meta_foreign_keys_children:
|
|
643
|
+
table_info += f"\n\n{pad}Key Relationships:"
|
|
644
|
+
for fk in self.meta_foreign_keys_children:
|
|
645
|
+
table_info += f"\n{pad} {fk.as_string()}"
|
|
646
|
+
|
|
647
|
+
return table_info
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
class MetaColumns(Base):
|
|
651
|
+
__tablename__ = "meta_columns"
|
|
652
|
+
id: int = Column(Integer, primary_key=True)
|
|
653
|
+
|
|
654
|
+
table_id: int = Column(Integer, ForeignKey("meta_tables.id"))
|
|
655
|
+
meta_tables = relationship("MetaTables", back_populates="meta_columns")
|
|
656
|
+
|
|
657
|
+
name: str = Column(String, nullable=False)
|
|
658
|
+
data_type: str = Column(String, nullable=False)
|
|
659
|
+
description: str = Column(String, nullable=True)
|
|
660
|
+
default_value: str = Column(String, nullable=True)
|
|
661
|
+
is_nullable: bool = Column(Boolean, nullable=True)
|
|
662
|
+
|
|
663
|
+
meta_column_statistics: Mapped[List["MetaColumnStatistics"]] = relationship(
|
|
664
|
+
"MetaColumnStatistics", back_populates="meta_columns"
|
|
665
|
+
)
|
|
666
|
+
meta_primary_keys: Mapped[List["MetaPrimaryKeys"]] = relationship("MetaPrimaryKeys", back_populates="meta_columns")
|
|
667
|
+
meta_foreign_keys_parents: Mapped[List["MetaForeignKeys"]] = relationship(
|
|
668
|
+
"MetaForeignKeys", foreign_keys="MetaForeignKeys.parent_column_id", back_populates="parent_column"
|
|
669
|
+
)
|
|
670
|
+
meta_foreign_keys_children: Mapped[List["MetaForeignKeys"]] = relationship(
|
|
671
|
+
"MetaForeignKeys", foreign_keys="MetaForeignKeys.child_column_id", back_populates="child_column"
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
def as_string(self, indent: int = 0) -> str:
|
|
675
|
+
pad = " " * indent
|
|
676
|
+
|
|
677
|
+
column_info = f"{self.name} ({self.data_type}):"
|
|
678
|
+
if self.description:
|
|
679
|
+
column_info += f"\n{pad}Description: {self.description}"
|
|
680
|
+
|
|
681
|
+
if self.is_nullable:
|
|
682
|
+
column_info += f"\n{pad}- Nullable: Yes"
|
|
683
|
+
|
|
684
|
+
if self.default_value:
|
|
685
|
+
column_info += f"\n{pad}- Default Value: {self.default_value}"
|
|
686
|
+
|
|
687
|
+
if self.meta_column_statistics:
|
|
688
|
+
column_info += f"\n\n{pad}- Column Statistics:"
|
|
689
|
+
column_info += f"\n{self.meta_column_statistics[0].as_string(indent + 4)}"
|
|
690
|
+
|
|
691
|
+
return column_info
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
class MetaColumnStatistics(Base):
|
|
695
|
+
__tablename__ = "meta_column_statistics"
|
|
696
|
+
column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
|
|
697
|
+
meta_columns = relationship("MetaColumns", back_populates="meta_column_statistics")
|
|
698
|
+
|
|
699
|
+
most_common_values: str = Column(Array, nullable=True)
|
|
700
|
+
most_common_frequencies: str = Column(Array, nullable=True)
|
|
701
|
+
null_percentage: float = Column(Numeric(5, 2), nullable=True)
|
|
702
|
+
distinct_values_count: int = Column(BigInteger, nullable=True)
|
|
703
|
+
minimum_value: str = Column(String, nullable=True)
|
|
704
|
+
maximum_value: str = Column(String, nullable=True)
|
|
705
|
+
|
|
706
|
+
def as_string(self, indent: int = 0) -> str:
|
|
707
|
+
pad = " " * indent
|
|
708
|
+
inner_pad = " " * (indent + 4)
|
|
709
|
+
|
|
710
|
+
column_statistics = ""
|
|
711
|
+
|
|
712
|
+
if any(self.most_common_values) and any(self.most_common_frequencies):
|
|
713
|
+
column_statistics += f"{pad}- Top 10 Most Common Values and Frequencies:"
|
|
714
|
+
for i in range(min(10, len(self.most_common_values))):
|
|
715
|
+
freq = self.most_common_frequencies[i]
|
|
716
|
+
try:
|
|
717
|
+
percent = float(freq) * 100
|
|
718
|
+
freq_str = f"{percent:.2f}%"
|
|
719
|
+
except (ValueError, TypeError):
|
|
720
|
+
freq_str = str(freq)
|
|
721
|
+
|
|
722
|
+
column_statistics += f"\n{inner_pad}- {self.most_common_values[i]}: {freq_str}"
|
|
723
|
+
column_statistics += "\n"
|
|
724
|
+
|
|
725
|
+
if self.null_percentage:
|
|
726
|
+
column_statistics += f"{pad}- Null Percentage: {self.null_percentage}\n"
|
|
727
|
+
|
|
728
|
+
if self.distinct_values_count:
|
|
729
|
+
column_statistics += f"{pad}- No. of Distinct Values: {self.distinct_values_count}\n"
|
|
730
|
+
|
|
731
|
+
if self.minimum_value:
|
|
732
|
+
column_statistics += f"{pad}- Minimum Value: {self.minimum_value}\n"
|
|
733
|
+
|
|
734
|
+
if self.maximum_value:
|
|
735
|
+
column_statistics += f"{pad}- Maximum Value: {self.maximum_value}"
|
|
736
|
+
|
|
737
|
+
return column_statistics
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
class MetaPrimaryKeys(Base):
|
|
741
|
+
__tablename__ = "meta_primary_keys"
|
|
742
|
+
table_id: int = Column(Integer, ForeignKey("meta_tables.id"), primary_key=True)
|
|
743
|
+
meta_tables = relationship("MetaTables", back_populates="meta_primary_keys")
|
|
744
|
+
|
|
745
|
+
column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
|
|
746
|
+
meta_columns = relationship("MetaColumns", back_populates="meta_primary_keys")
|
|
747
|
+
|
|
748
|
+
ordinal_position: int = Column(Integer, nullable=True)
|
|
749
|
+
constraint_name: str = Column(String, nullable=True)
|
|
750
|
+
|
|
751
|
+
def as_string(self) -> str:
|
|
752
|
+
pk_list = sorted(
|
|
753
|
+
self.meta_tables.meta_primary_keys,
|
|
754
|
+
key=lambda pk: pk.ordinal_position if pk.ordinal_position is not None else 0,
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
return ", ".join(f"{pk.meta_columns.name} ({pk.meta_columns.data_type})" for pk in pk_list)
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
class MetaForeignKeys(Base):
|
|
761
|
+
__tablename__ = "meta_foreign_keys"
|
|
762
|
+
parent_table_id: int = Column(Integer, ForeignKey("meta_tables.id"), primary_key=True)
|
|
763
|
+
parent_table = relationship(
|
|
764
|
+
"MetaTables", back_populates="meta_foreign_keys_parents", foreign_keys=[parent_table_id]
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
parent_column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
|
|
768
|
+
parent_column = relationship(
|
|
769
|
+
"MetaColumns", back_populates="meta_foreign_keys_parents", foreign_keys=[parent_column_id]
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
child_table_id: int = Column(Integer, ForeignKey("meta_tables.id"), primary_key=True)
|
|
773
|
+
child_table = relationship("MetaTables", back_populates="meta_foreign_keys_children", foreign_keys=[child_table_id])
|
|
774
|
+
|
|
775
|
+
child_column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
|
|
776
|
+
child_column = relationship(
|
|
777
|
+
"MetaColumns", back_populates="meta_foreign_keys_children", foreign_keys=[child_column_id]
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
constraint_name: str = Column(String, nullable=True)
|
|
781
|
+
|
|
782
|
+
def as_string(self) -> str:
|
|
783
|
+
return f"{self.child_column.name} in {self.child_table.name} references {self.parent_column.name} in {self.parent_table.name}"
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""added data catalog tables
|
|
2
|
+
|
|
3
|
+
Revision ID: a44643042fe8
|
|
4
|
+
Revises: 9f150e4f9a05
|
|
5
|
+
Create Date: 2025-05-28 17:20:57.300313
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
import mindsdb.interfaces.storage.db # noqa
|
|
12
|
+
from mindsdb.interfaces.storage.db import Array
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision = "a44643042fe8"
|
|
17
|
+
down_revision = "9f150e4f9a05"
|
|
18
|
+
branch_labels = None
|
|
19
|
+
depends_on = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade():
|
|
23
|
+
op.create_table(
|
|
24
|
+
"meta_tables",
|
|
25
|
+
sa.Column("id", sa.Integer(), primary_key=True),
|
|
26
|
+
sa.Column(
|
|
27
|
+
"integration_id",
|
|
28
|
+
sa.Integer(),
|
|
29
|
+
sa.ForeignKey("integration.id"),
|
|
30
|
+
nullable=False,
|
|
31
|
+
),
|
|
32
|
+
sa.Column("name", sa.String(), nullable=False),
|
|
33
|
+
sa.Column("schema", sa.String(), nullable=True),
|
|
34
|
+
sa.Column("description", sa.String(), nullable=True),
|
|
35
|
+
sa.Column("type", sa.String(), nullable=True),
|
|
36
|
+
sa.Column("row_count", sa.Integer(), nullable=True),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
op.create_table(
|
|
40
|
+
"meta_columns",
|
|
41
|
+
sa.Column("id", sa.Integer(), primary_key=True),
|
|
42
|
+
sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), nullable=False),
|
|
43
|
+
sa.Column("name", sa.String(), nullable=False),
|
|
44
|
+
sa.Column("data_type", sa.String(), nullable=False),
|
|
45
|
+
sa.Column("default_value", sa.String(), nullable=True),
|
|
46
|
+
sa.Column("description", sa.String(), nullable=True),
|
|
47
|
+
sa.Column("is_nullable", sa.Boolean(), nullable=True),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
op.create_table(
|
|
51
|
+
"meta_column_statistics",
|
|
52
|
+
sa.Column(
|
|
53
|
+
"column_id",
|
|
54
|
+
sa.Integer(),
|
|
55
|
+
sa.ForeignKey("meta_columns.id"),
|
|
56
|
+
primary_key=True,
|
|
57
|
+
),
|
|
58
|
+
sa.Column("most_common_values", Array(), nullable=True),
|
|
59
|
+
sa.Column("most_common_frequencies", Array(), nullable=True),
|
|
60
|
+
sa.Column("null_percentage", sa.Numeric(5, 2), nullable=True),
|
|
61
|
+
sa.Column("distinct_values_count", sa.Integer(), nullable=True),
|
|
62
|
+
sa.Column("minimum_value", sa.String(), nullable=True),
|
|
63
|
+
sa.Column("maximum_value", sa.String(), nullable=True),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
op.create_table(
|
|
67
|
+
"meta_primary_keys",
|
|
68
|
+
sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), primary_key=True),
|
|
69
|
+
sa.Column(
|
|
70
|
+
"column_id",
|
|
71
|
+
sa.Integer(),
|
|
72
|
+
sa.ForeignKey("meta_columns.id"),
|
|
73
|
+
primary_key=True,
|
|
74
|
+
),
|
|
75
|
+
sa.Column("ordinal_position", sa.Integer(), nullable=True),
|
|
76
|
+
sa.Column("constraint_name", sa.String(), nullable=True),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
op.create_table(
|
|
80
|
+
"meta_foreign_keys",
|
|
81
|
+
sa.Column(
|
|
82
|
+
"parent_table_id",
|
|
83
|
+
sa.Integer(),
|
|
84
|
+
sa.ForeignKey("meta_tables.id"),
|
|
85
|
+
primary_key=True,
|
|
86
|
+
),
|
|
87
|
+
sa.Column(
|
|
88
|
+
"parent_column_id",
|
|
89
|
+
sa.Integer(),
|
|
90
|
+
sa.ForeignKey("meta_columns.id"),
|
|
91
|
+
primary_key=True,
|
|
92
|
+
),
|
|
93
|
+
sa.Column(
|
|
94
|
+
"child_table_id",
|
|
95
|
+
sa.Integer(),
|
|
96
|
+
sa.ForeignKey("meta_tables.id"),
|
|
97
|
+
primary_key=True,
|
|
98
|
+
),
|
|
99
|
+
sa.Column(
|
|
100
|
+
"child_column_id",
|
|
101
|
+
sa.Integer(),
|
|
102
|
+
sa.ForeignKey("meta_columns.id"),
|
|
103
|
+
primary_key=True,
|
|
104
|
+
),
|
|
105
|
+
sa.Column("constraint_name", sa.String(), nullable=True),
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def downgrade():
|
|
110
|
+
op.drop_table("meta_tables")
|
|
111
|
+
|
|
112
|
+
op.drop_table("meta_columns")
|
|
113
|
+
|
|
114
|
+
op.drop_table("meta_column_statistics")
|
|
115
|
+
|
|
116
|
+
op.drop_table("meta_primary_keys")
|
|
117
|
+
|
|
118
|
+
op.drop_table("meta_foreign_keys")
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""updated data catalog data types
|
|
2
|
+
|
|
3
|
+
Revision ID: 608e376c19a7
|
|
4
|
+
Revises: a44643042fe8
|
|
5
|
+
Create Date: 2025-06-09 23:20:34.739735
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
import mindsdb.interfaces.storage.db # noqa
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# revision identifiers, used by Alembic.
|
|
15
|
+
revision = "608e376c19a7"
|
|
16
|
+
down_revision = "a44643042fe8"
|
|
17
|
+
branch_labels = None
|
|
18
|
+
depends_on = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def upgrade():
|
|
22
|
+
with op.batch_alter_table("meta_tables", schema=None) as batch_op:
|
|
23
|
+
batch_op.alter_column(
|
|
24
|
+
"row_count",
|
|
25
|
+
type_=sa.BigInteger(),
|
|
26
|
+
existing_type=sa.Integer(),
|
|
27
|
+
existing_nullable=True,
|
|
28
|
+
existing_server_default=None,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
with op.batch_alter_table("meta_column_statistics", schema=None) as batch_op:
|
|
32
|
+
batch_op.alter_column(
|
|
33
|
+
"distinct_values_count",
|
|
34
|
+
type_=sa.BigInteger(),
|
|
35
|
+
existing_type=sa.Integer(),
|
|
36
|
+
existing_nullable=True,
|
|
37
|
+
existing_server_default=None,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def downgrade():
|
|
42
|
+
with op.batch_alter_table("meta_tables", schema=None) as batch_op:
|
|
43
|
+
batch_op.alter_column(
|
|
44
|
+
"row_count",
|
|
45
|
+
type_=sa.Integer(),
|
|
46
|
+
existing_type=sa.BigInteger(),
|
|
47
|
+
existing_nullable=True,
|
|
48
|
+
existing_server_default=None,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
with op.batch_alter_table("meta_column_statistics", schema=None) as batch_op:
|
|
52
|
+
batch_op.alter_column(
|
|
53
|
+
"distinct_values_count",
|
|
54
|
+
type_=sa.Integer(),
|
|
55
|
+
existing_type=sa.BigInteger(),
|
|
56
|
+
existing_nullable=True,
|
|
57
|
+
existing_server_default=None,
|
|
58
|
+
)
|