MindsDB 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +28 -25
- mindsdb/api/a2a/common/server/server.py +32 -26
- mindsdb/api/executor/command_executor.py +69 -14
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
- mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
- mindsdb/api/executor/planner/plan_join.py +67 -77
- mindsdb/api/executor/planner/query_planner.py +176 -155
- mindsdb/api/executor/planner/steps.py +37 -12
- mindsdb/api/executor/sql_query/result_set.py +45 -64
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
- mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
- mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
- mindsdb/api/executor/utilities/sql.py +42 -48
- mindsdb/api/http/namespaces/config.py +1 -1
- mindsdb/api/http/namespaces/file.py +14 -23
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
- mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
- mindsdb/integrations/libs/api_handler.py +261 -57
- mindsdb/integrations/libs/base.py +100 -29
- mindsdb/integrations/utilities/files/file_reader.py +99 -73
- mindsdb/integrations/utilities/handler_utils.py +23 -8
- mindsdb/integrations/utilities/sql_utils.py +35 -40
- mindsdb/interfaces/agents/agents_controller.py +196 -192
- mindsdb/interfaces/agents/constants.py +7 -1
- mindsdb/interfaces/agents/langchain_agent.py +42 -11
- mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
- mindsdb/interfaces/data_catalog/__init__.py +0 -0
- mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
- mindsdb/interfaces/database/database.py +81 -57
- mindsdb/interfaces/database/integrations.py +220 -234
- mindsdb/interfaces/database/log.py +72 -104
- mindsdb/interfaces/database/projects.py +156 -193
- mindsdb/interfaces/file/file_controller.py +21 -65
- mindsdb/interfaces/knowledge_base/controller.py +63 -10
- mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
- mindsdb/interfaces/skills/skills_controller.py +54 -36
- mindsdb/interfaces/skills/sql_agent.py +109 -86
- mindsdb/interfaces/storage/db.py +223 -79
- mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
- mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
- mindsdb/utilities/config.py +9 -2
- mindsdb/utilities/log.py +35 -26
- mindsdb/utilities/ml_task_queue/task.py +19 -22
- mindsdb/utilities/render/sqlalchemy_render.py +129 -181
- mindsdb/utilities/starters.py +40 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +253 -253
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +69 -61
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
mindsdb/interfaces/storage/db.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Dict, List
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from sqlalchemy import (
|
|
7
7
|
JSON,
|
|
8
|
+
BigInteger,
|
|
8
9
|
Boolean,
|
|
9
10
|
Column,
|
|
10
11
|
DateTime,
|
|
@@ -16,7 +17,7 @@ from sqlalchemy import (
|
|
|
16
17
|
UniqueConstraint,
|
|
17
18
|
create_engine,
|
|
18
19
|
text,
|
|
19
|
-
types
|
|
20
|
+
types,
|
|
20
21
|
)
|
|
21
22
|
from sqlalchemy.exc import OperationalError
|
|
22
23
|
from sqlalchemy.orm import (
|
|
@@ -45,7 +46,7 @@ session, engine = None, None
|
|
|
45
46
|
def init(connection_str: str = None):
|
|
46
47
|
global Base, session, engine
|
|
47
48
|
if connection_str is None:
|
|
48
|
-
connection_str = config[
|
|
49
|
+
connection_str = config["storage_db"]
|
|
49
50
|
base_args = {
|
|
50
51
|
"pool_size": 30,
|
|
51
52
|
"max_overflow": 200,
|
|
@@ -144,15 +145,11 @@ class Predictor(Base):
|
|
|
144
145
|
__tablename__ = "predictor"
|
|
145
146
|
|
|
146
147
|
id = Column(Integer, primary_key=True)
|
|
147
|
-
updated_at = Column(
|
|
148
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
149
|
-
)
|
|
148
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
150
149
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
151
150
|
deleted_at = Column(DateTime)
|
|
152
151
|
name = Column(String)
|
|
153
|
-
data = Column(
|
|
154
|
-
Json
|
|
155
|
-
) # A JSON -- should be everything returned by `get_model_data`, I think
|
|
152
|
+
data = Column(Json) # A JSON -- should be everything returned by `get_model_data`, I think
|
|
156
153
|
to_predict = Column(Array)
|
|
157
154
|
company_id = Column(Integer)
|
|
158
155
|
mindsdb_version = Column(String)
|
|
@@ -173,9 +170,7 @@ class Predictor(Base):
|
|
|
173
170
|
code = Column(String, nullable=True)
|
|
174
171
|
lightwood_version = Column(String, nullable=True)
|
|
175
172
|
dtype_dict = Column(Json, nullable=True)
|
|
176
|
-
project_id = Column(
|
|
177
|
-
Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False
|
|
178
|
-
)
|
|
173
|
+
project_id = Column(Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False)
|
|
179
174
|
training_phase_current = Column(Integer)
|
|
180
175
|
training_phase_total = Column(Integer)
|
|
181
176
|
training_phase_name = Column(String)
|
|
@@ -199,7 +194,7 @@ Index(
|
|
|
199
194
|
Predictor.version,
|
|
200
195
|
Predictor.active,
|
|
201
196
|
Predictor.deleted_at, # would be good to have here nullsfirst(Predictor.deleted_at)
|
|
202
|
-
unique=True
|
|
197
|
+
unique=True,
|
|
203
198
|
)
|
|
204
199
|
|
|
205
200
|
|
|
@@ -208,34 +203,27 @@ class Project(Base):
|
|
|
208
203
|
|
|
209
204
|
id = Column(Integer, primary_key=True)
|
|
210
205
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
211
|
-
updated_at = Column(
|
|
212
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
213
|
-
)
|
|
206
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
214
207
|
deleted_at = Column(DateTime)
|
|
215
208
|
name = Column(String, nullable=False)
|
|
216
209
|
company_id = Column(Integer, default=0)
|
|
217
210
|
metadata_: dict = Column("metadata", JSON, nullable=True)
|
|
218
|
-
__table_args__ = (
|
|
219
|
-
UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
|
|
220
|
-
)
|
|
211
|
+
__table_args__ = (UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),)
|
|
221
212
|
|
|
222
213
|
|
|
223
214
|
class Integration(Base):
|
|
224
215
|
__tablename__ = "integration"
|
|
225
216
|
id = Column(Integer, primary_key=True)
|
|
226
|
-
updated_at = Column(
|
|
227
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
228
|
-
)
|
|
217
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
229
218
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
230
219
|
name = Column(String, nullable=False)
|
|
231
220
|
engine = Column(String, nullable=False)
|
|
232
221
|
data = Column(Json)
|
|
233
222
|
company_id = Column(Integer)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
)
|
|
223
|
+
|
|
224
|
+
meta_tables = relationship("MetaTables", back_populates="integration")
|
|
225
|
+
|
|
226
|
+
__table_args__ = (UniqueConstraint("name", "company_id", name="unique_integration_name_company_id"),)
|
|
239
227
|
|
|
240
228
|
|
|
241
229
|
class File(Base):
|
|
@@ -249,12 +237,8 @@ class File(Base):
|
|
|
249
237
|
columns = Column(Json, nullable=False)
|
|
250
238
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
251
239
|
metadata_: dict = Column("metadata", JSON, nullable=True)
|
|
252
|
-
updated_at = Column(
|
|
253
|
-
|
|
254
|
-
)
|
|
255
|
-
__table_args__ = (
|
|
256
|
-
UniqueConstraint("name", "company_id", name="unique_file_name_company_id"),
|
|
257
|
-
)
|
|
240
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
241
|
+
__table_args__ = (UniqueConstraint("name", "company_id", name="unique_file_name_company_id"),)
|
|
258
242
|
|
|
259
243
|
|
|
260
244
|
class View(Base):
|
|
@@ -263,12 +247,8 @@ class View(Base):
|
|
|
263
247
|
name = Column(String, nullable=False)
|
|
264
248
|
company_id = Column(Integer)
|
|
265
249
|
query = Column(String, nullable=False)
|
|
266
|
-
project_id = Column(
|
|
267
|
-
|
|
268
|
-
)
|
|
269
|
-
__table_args__ = (
|
|
270
|
-
UniqueConstraint("name", "company_id", name="unique_view_name_company_id"),
|
|
271
|
-
)
|
|
250
|
+
project_id = Column(Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False)
|
|
251
|
+
__table_args__ = (UniqueConstraint("name", "company_id", name="unique_view_name_company_id"),)
|
|
272
252
|
|
|
273
253
|
|
|
274
254
|
class JsonStorage(Base):
|
|
@@ -310,9 +290,7 @@ class Jobs(Base):
|
|
|
310
290
|
schedule_str = Column(String)
|
|
311
291
|
|
|
312
292
|
deleted_at = Column(DateTime)
|
|
313
|
-
updated_at = Column(
|
|
314
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
315
|
-
)
|
|
293
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
316
294
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
317
295
|
|
|
318
296
|
|
|
@@ -331,9 +309,7 @@ class JobsHistory(Base):
|
|
|
331
309
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
332
310
|
updated_at = Column(DateTime, default=datetime.datetime.now)
|
|
333
311
|
|
|
334
|
-
__table_args__ = (
|
|
335
|
-
UniqueConstraint("job_id", "start_at", name="uniq_job_history_job_id_start"),
|
|
336
|
-
)
|
|
312
|
+
__table_args__ = (UniqueConstraint("job_id", "start_at", name="uniq_job_history_job_id_start"),)
|
|
337
313
|
|
|
338
314
|
|
|
339
315
|
class ChatBots(Base):
|
|
@@ -349,9 +325,7 @@ class ChatBots(Base):
|
|
|
349
325
|
database_id = Column(Integer)
|
|
350
326
|
params = Column(JSON)
|
|
351
327
|
|
|
352
|
-
updated_at = Column(
|
|
353
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
354
|
-
)
|
|
328
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
355
329
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
356
330
|
webhook_token = Column(String)
|
|
357
331
|
|
|
@@ -393,9 +367,7 @@ class Triggers(Base):
|
|
|
393
367
|
query_str = Column(String, nullable=False)
|
|
394
368
|
columns = Column(String) # list of columns separated by delimiter
|
|
395
369
|
|
|
396
|
-
updated_at = Column(
|
|
397
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
398
|
-
)
|
|
370
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
399
371
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
400
372
|
|
|
401
373
|
|
|
@@ -417,9 +389,7 @@ class Tasks(Base):
|
|
|
417
389
|
run_by = Column(String)
|
|
418
390
|
alive_time = Column(DateTime(timezone=True))
|
|
419
391
|
|
|
420
|
-
updated_at = Column(
|
|
421
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
422
|
-
)
|
|
392
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
423
393
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
424
394
|
|
|
425
395
|
|
|
@@ -444,9 +414,7 @@ class Skills(Base):
|
|
|
444
414
|
params = Column(JSON)
|
|
445
415
|
|
|
446
416
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
447
|
-
updated_at = Column(
|
|
448
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
449
|
-
)
|
|
417
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
450
418
|
deleted_at = Column(DateTime)
|
|
451
419
|
|
|
452
420
|
def as_dict(self) -> Dict:
|
|
@@ -475,9 +443,7 @@ class Agents(Base):
|
|
|
475
443
|
provider = Column(String, nullable=True)
|
|
476
444
|
params = Column(JSON)
|
|
477
445
|
|
|
478
|
-
updated_at = Column(
|
|
479
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
480
|
-
)
|
|
446
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
481
447
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
482
448
|
deleted_at = Column(DateTime)
|
|
483
449
|
|
|
@@ -520,21 +486,13 @@ class KnowledgeBase(Base):
|
|
|
520
486
|
doc="fk to the embedding model",
|
|
521
487
|
)
|
|
522
488
|
|
|
523
|
-
embedding_model = relationship(
|
|
524
|
-
"Predictor", foreign_keys=[embedding_model_id], doc="embedding model"
|
|
525
|
-
)
|
|
489
|
+
embedding_model = relationship("Predictor", foreign_keys=[embedding_model_id], doc="embedding model")
|
|
526
490
|
query_id = Column(Integer, nullable=True)
|
|
527
491
|
|
|
528
492
|
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
529
|
-
updated_at = Column(
|
|
530
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
531
|
-
)
|
|
493
|
+
updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
532
494
|
|
|
533
|
-
__table_args__ = (
|
|
534
|
-
UniqueConstraint(
|
|
535
|
-
"name", "project_id", name="unique_knowledge_base_name_project_id"
|
|
536
|
-
),
|
|
537
|
-
)
|
|
495
|
+
__table_args__ = (UniqueConstraint("name", "project_id", name="unique_knowledge_base_name_project_id"),)
|
|
538
496
|
|
|
539
497
|
def as_dict(self) -> Dict:
|
|
540
498
|
return {
|
|
@@ -546,7 +504,7 @@ class KnowledgeBase(Base):
|
|
|
546
504
|
"vector_database_table": self.vector_database_table,
|
|
547
505
|
"updated_at": self.updated_at,
|
|
548
506
|
"created_at": self.created_at,
|
|
549
|
-
"params": self.params
|
|
507
|
+
"params": self.params,
|
|
550
508
|
}
|
|
551
509
|
|
|
552
510
|
|
|
@@ -559,9 +517,7 @@ class QueryContext(Base):
|
|
|
559
517
|
context_name: str = Column(String, nullable=False)
|
|
560
518
|
values: dict = Column(JSON)
|
|
561
519
|
|
|
562
|
-
updated_at: datetime.datetime = Column(
|
|
563
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
564
|
-
)
|
|
520
|
+
updated_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
565
521
|
created_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now)
|
|
566
522
|
|
|
567
523
|
|
|
@@ -581,9 +537,7 @@ class Queries(Base):
|
|
|
581
537
|
processed_rows = Column(Integer, default=0)
|
|
582
538
|
error: str = Column(String, nullable=True)
|
|
583
539
|
|
|
584
|
-
updated_at: datetime.datetime = Column(
|
|
585
|
-
DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
|
|
586
|
-
)
|
|
540
|
+
updated_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
|
|
587
541
|
created_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now)
|
|
588
542
|
|
|
589
543
|
|
|
@@ -610,10 +564,11 @@ class LLMLog(Base):
|
|
|
610
564
|
|
|
611
565
|
|
|
612
566
|
class LLMData(Base):
|
|
613
|
-
|
|
567
|
+
"""
|
|
614
568
|
Stores the question/answer pairs of an LLM call so examples can be used
|
|
615
569
|
for self improvement with DSPy
|
|
616
|
-
|
|
570
|
+
"""
|
|
571
|
+
|
|
617
572
|
__tablename__ = "llm_data"
|
|
618
573
|
id: int = Column(Integer, primary_key=True)
|
|
619
574
|
input: str = Column(String, nullable=False)
|
|
@@ -621,3 +576,192 @@ class LLMData(Base):
|
|
|
621
576
|
model_id: int = Column(Integer, nullable=False)
|
|
622
577
|
created_at: datetime = Column(DateTime, default=datetime.datetime.now)
|
|
623
578
|
updated_at: datetime = Column(DateTime, onupdate=datetime.datetime.now)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
# Data Catalog
|
|
582
|
+
class MetaTables(Base):
|
|
583
|
+
__tablename__ = "meta_tables"
|
|
584
|
+
id: int = Column(Integer, primary_key=True)
|
|
585
|
+
|
|
586
|
+
integration_id: int = Column(Integer, ForeignKey("integration.id"))
|
|
587
|
+
integration = relationship("Integration", back_populates="meta_tables")
|
|
588
|
+
|
|
589
|
+
name: str = Column(String, nullable=False)
|
|
590
|
+
schema: str = Column(String, nullable=True)
|
|
591
|
+
description: str = Column(String, nullable=True)
|
|
592
|
+
type: str = Column(String, nullable=True)
|
|
593
|
+
row_count: int = Column(BigInteger, nullable=True)
|
|
594
|
+
|
|
595
|
+
meta_columns: Mapped[List["MetaColumns"]] = relationship("MetaColumns", back_populates="meta_tables")
|
|
596
|
+
meta_primary_keys: Mapped[List["MetaPrimaryKeys"]] = relationship("MetaPrimaryKeys", back_populates="meta_tables")
|
|
597
|
+
meta_foreign_keys_parents: Mapped[List["MetaForeignKeys"]] = relationship(
|
|
598
|
+
"MetaForeignKeys", foreign_keys="MetaForeignKeys.parent_table_id", back_populates="parent_table"
|
|
599
|
+
)
|
|
600
|
+
meta_foreign_keys_children: Mapped[List["MetaForeignKeys"]] = relationship(
|
|
601
|
+
"MetaForeignKeys", foreign_keys="MetaForeignKeys.child_table_id", back_populates="child_table"
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
def as_string(self, indent: int = 0) -> str:
|
|
605
|
+
pad = " " * indent
|
|
606
|
+
|
|
607
|
+
table_info = f"`{self.integration.name}`.`{self.name}` ({self.type})"
|
|
608
|
+
|
|
609
|
+
if self.description:
|
|
610
|
+
table_info += f" : {self.description}"
|
|
611
|
+
|
|
612
|
+
if self.schema:
|
|
613
|
+
table_info += f"\n{pad}Schema: {self.schema}"
|
|
614
|
+
|
|
615
|
+
if self.row_count and self.row_count > 0:
|
|
616
|
+
table_info += f"\n{pad}Estimated Row Count: {self.row_count}"
|
|
617
|
+
|
|
618
|
+
if self.meta_primary_keys:
|
|
619
|
+
table_info += f"\n{pad}Primary Keys (in defined order): {', '.join([pk.as_string() for pk in self.meta_primary_keys])}"
|
|
620
|
+
|
|
621
|
+
if self.meta_columns:
|
|
622
|
+
table_info += f"\n\n{pad}Columns:"
|
|
623
|
+
for index, column in enumerate(self.meta_columns, start=1):
|
|
624
|
+
table_info += f"\n{index}. {column.as_string(indent + 4)}\n"
|
|
625
|
+
|
|
626
|
+
if self.meta_foreign_keys_children:
|
|
627
|
+
table_info += f"\n\n{pad}Key Relationships:"
|
|
628
|
+
for fk in self.meta_foreign_keys_children:
|
|
629
|
+
table_info += f"\n{pad} {fk.as_string()}"
|
|
630
|
+
|
|
631
|
+
return table_info
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
class MetaColumns(Base):
|
|
635
|
+
__tablename__ = "meta_columns"
|
|
636
|
+
id: int = Column(Integer, primary_key=True)
|
|
637
|
+
|
|
638
|
+
table_id: int = Column(Integer, ForeignKey("meta_tables.id"))
|
|
639
|
+
meta_tables = relationship("MetaTables", back_populates="meta_columns")
|
|
640
|
+
|
|
641
|
+
name: str = Column(String, nullable=False)
|
|
642
|
+
data_type: str = Column(String, nullable=False)
|
|
643
|
+
description: str = Column(String, nullable=True)
|
|
644
|
+
default_value: str = Column(String, nullable=True)
|
|
645
|
+
is_nullable: bool = Column(Boolean, nullable=True)
|
|
646
|
+
|
|
647
|
+
meta_column_statistics: Mapped[List["MetaColumnStatistics"]] = relationship(
|
|
648
|
+
"MetaColumnStatistics", back_populates="meta_columns"
|
|
649
|
+
)
|
|
650
|
+
meta_primary_keys: Mapped[List["MetaPrimaryKeys"]] = relationship("MetaPrimaryKeys", back_populates="meta_columns")
|
|
651
|
+
meta_foreign_keys_parents: Mapped[List["MetaForeignKeys"]] = relationship(
|
|
652
|
+
"MetaForeignKeys", foreign_keys="MetaForeignKeys.parent_column_id", back_populates="parent_column"
|
|
653
|
+
)
|
|
654
|
+
meta_foreign_keys_children: Mapped[List["MetaForeignKeys"]] = relationship(
|
|
655
|
+
"MetaForeignKeys", foreign_keys="MetaForeignKeys.child_column_id", back_populates="child_column"
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
def as_string(self, indent: int = 0) -> str:
|
|
659
|
+
pad = " " * indent
|
|
660
|
+
|
|
661
|
+
column_info = f"{self.name} ({self.data_type}):"
|
|
662
|
+
if self.description:
|
|
663
|
+
column_info += f"\n{pad}Description: {self.description}"
|
|
664
|
+
|
|
665
|
+
if self.is_nullable:
|
|
666
|
+
column_info += f"\n{pad}- Nullable: Yes"
|
|
667
|
+
|
|
668
|
+
if self.default_value:
|
|
669
|
+
column_info += f"\n{pad}- Default Value: {self.default_value}"
|
|
670
|
+
|
|
671
|
+
if self.meta_column_statistics:
|
|
672
|
+
column_info += f"\n\n{pad}- Column Statistics:"
|
|
673
|
+
column_info += f"\n{self.meta_column_statistics[0].as_string(indent + 4)}"
|
|
674
|
+
|
|
675
|
+
return column_info
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
class MetaColumnStatistics(Base):
|
|
679
|
+
__tablename__ = "meta_column_statistics"
|
|
680
|
+
column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
|
|
681
|
+
meta_columns = relationship("MetaColumns", back_populates="meta_column_statistics")
|
|
682
|
+
|
|
683
|
+
most_common_values: str = Column(Array, nullable=True)
|
|
684
|
+
most_common_frequencies: str = Column(Array, nullable=True)
|
|
685
|
+
null_percentage: float = Column(Numeric(5, 2), nullable=True)
|
|
686
|
+
distinct_values_count: int = Column(BigInteger, nullable=True)
|
|
687
|
+
minimum_value: str = Column(String, nullable=True)
|
|
688
|
+
maximum_value: str = Column(String, nullable=True)
|
|
689
|
+
|
|
690
|
+
def as_string(self, indent: int = 0) -> str:
|
|
691
|
+
pad = " " * indent
|
|
692
|
+
inner_pad = " " * (indent + 4)
|
|
693
|
+
|
|
694
|
+
column_statistics = ""
|
|
695
|
+
|
|
696
|
+
if any(self.most_common_values) and any(self.most_common_frequencies):
|
|
697
|
+
column_statistics += f"{pad}- Top 10 Most Common Values and Frequencies:"
|
|
698
|
+
for i in range(min(10, len(self.most_common_values))):
|
|
699
|
+
freq = self.most_common_frequencies[i]
|
|
700
|
+
try:
|
|
701
|
+
percent = float(freq) * 100
|
|
702
|
+
freq_str = f"{percent:.2f}%"
|
|
703
|
+
except (ValueError, TypeError):
|
|
704
|
+
freq_str = str(freq)
|
|
705
|
+
|
|
706
|
+
column_statistics += f"\n{inner_pad}- {self.most_common_values[i]}: {freq_str}"
|
|
707
|
+
column_statistics += "\n"
|
|
708
|
+
|
|
709
|
+
if self.null_percentage:
|
|
710
|
+
column_statistics += f"{pad}- Null Percentage: {self.null_percentage}\n"
|
|
711
|
+
|
|
712
|
+
if self.distinct_values_count:
|
|
713
|
+
column_statistics += f"{pad}- No. of Distinct Values: {self.distinct_values_count}\n"
|
|
714
|
+
|
|
715
|
+
if self.minimum_value:
|
|
716
|
+
column_statistics += f"{pad}- Minimum Value: {self.minimum_value}\n"
|
|
717
|
+
|
|
718
|
+
if self.maximum_value:
|
|
719
|
+
column_statistics += f"{pad}- Maximum Value: {self.maximum_value}"
|
|
720
|
+
|
|
721
|
+
return column_statistics
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
class MetaPrimaryKeys(Base):
|
|
725
|
+
__tablename__ = "meta_primary_keys"
|
|
726
|
+
table_id: int = Column(Integer, ForeignKey("meta_tables.id"), primary_key=True)
|
|
727
|
+
meta_tables = relationship("MetaTables", back_populates="meta_primary_keys")
|
|
728
|
+
|
|
729
|
+
column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
|
|
730
|
+
meta_columns = relationship("MetaColumns", back_populates="meta_primary_keys")
|
|
731
|
+
|
|
732
|
+
ordinal_position: int = Column(Integer, nullable=True)
|
|
733
|
+
constraint_name: str = Column(String, nullable=True)
|
|
734
|
+
|
|
735
|
+
def as_string(self) -> str:
|
|
736
|
+
pk_list = sorted(
|
|
737
|
+
self.meta_tables.meta_primary_keys,
|
|
738
|
+
key=lambda pk: pk.ordinal_position if pk.ordinal_position is not None else 0,
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
return ", ".join(f"{pk.meta_columns.name} ({pk.meta_columns.data_type})" for pk in pk_list)
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
class MetaForeignKeys(Base):
|
|
745
|
+
__tablename__ = "meta_foreign_keys"
|
|
746
|
+
parent_table_id: int = Column(Integer, ForeignKey("meta_tables.id"), primary_key=True)
|
|
747
|
+
parent_table = relationship(
|
|
748
|
+
"MetaTables", back_populates="meta_foreign_keys_parents", foreign_keys=[parent_table_id]
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
parent_column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
|
|
752
|
+
parent_column = relationship(
|
|
753
|
+
"MetaColumns", back_populates="meta_foreign_keys_parents", foreign_keys=[parent_column_id]
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
child_table_id: int = Column(Integer, ForeignKey("meta_tables.id"), primary_key=True)
|
|
757
|
+
child_table = relationship("MetaTables", back_populates="meta_foreign_keys_children", foreign_keys=[child_table_id])
|
|
758
|
+
|
|
759
|
+
child_column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
|
|
760
|
+
child_column = relationship(
|
|
761
|
+
"MetaColumns", back_populates="meta_foreign_keys_children", foreign_keys=[child_column_id]
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
constraint_name: str = Column(String, nullable=True)
|
|
765
|
+
|
|
766
|
+
def as_string(self) -> str:
|
|
767
|
+
return f"{self.child_column.name} in {self.child_table.name} references {self.parent_column.name} in {self.parent_table.name}"
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""added data catalog tables
|
|
2
|
+
|
|
3
|
+
Revision ID: a44643042fe8
|
|
4
|
+
Revises: 9f150e4f9a05
|
|
5
|
+
Create Date: 2025-05-28 17:20:57.300313
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
import mindsdb.interfaces.storage.db # noqa
|
|
12
|
+
from mindsdb.interfaces.storage.db import Array
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision = "a44643042fe8"
|
|
17
|
+
down_revision = "9f150e4f9a05"
|
|
18
|
+
branch_labels = None
|
|
19
|
+
depends_on = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade():
|
|
23
|
+
op.create_table(
|
|
24
|
+
"meta_tables",
|
|
25
|
+
sa.Column("id", sa.Integer(), primary_key=True),
|
|
26
|
+
sa.Column(
|
|
27
|
+
"integration_id",
|
|
28
|
+
sa.Integer(),
|
|
29
|
+
sa.ForeignKey("integration.id"),
|
|
30
|
+
nullable=False,
|
|
31
|
+
),
|
|
32
|
+
sa.Column("name", sa.String(), nullable=False),
|
|
33
|
+
sa.Column("schema", sa.String(), nullable=True),
|
|
34
|
+
sa.Column("description", sa.String(), nullable=True),
|
|
35
|
+
sa.Column("type", sa.String(), nullable=True),
|
|
36
|
+
sa.Column("row_count", sa.Integer(), nullable=True),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
op.create_table(
|
|
40
|
+
"meta_columns",
|
|
41
|
+
sa.Column("id", sa.Integer(), primary_key=True),
|
|
42
|
+
sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), nullable=False),
|
|
43
|
+
sa.Column("name", sa.String(), nullable=False),
|
|
44
|
+
sa.Column("data_type", sa.String(), nullable=False),
|
|
45
|
+
sa.Column("default_value", sa.String(), nullable=True),
|
|
46
|
+
sa.Column("description", sa.String(), nullable=True),
|
|
47
|
+
sa.Column("is_nullable", sa.Boolean(), nullable=True),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
op.create_table(
|
|
51
|
+
"meta_column_statistics",
|
|
52
|
+
sa.Column(
|
|
53
|
+
"column_id",
|
|
54
|
+
sa.Integer(),
|
|
55
|
+
sa.ForeignKey("meta_columns.id"),
|
|
56
|
+
primary_key=True,
|
|
57
|
+
),
|
|
58
|
+
sa.Column("most_common_values", Array(), nullable=True),
|
|
59
|
+
sa.Column("most_common_frequencies", Array(), nullable=True),
|
|
60
|
+
sa.Column("null_percentage", sa.Numeric(5, 2), nullable=True),
|
|
61
|
+
sa.Column("distinct_values_count", sa.Integer(), nullable=True),
|
|
62
|
+
sa.Column("minimum_value", sa.String(), nullable=True),
|
|
63
|
+
sa.Column("maximum_value", sa.String(), nullable=True),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
op.create_table(
|
|
67
|
+
"meta_primary_keys",
|
|
68
|
+
sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), primary_key=True),
|
|
69
|
+
sa.Column(
|
|
70
|
+
"column_id",
|
|
71
|
+
sa.Integer(),
|
|
72
|
+
sa.ForeignKey("meta_columns.id"),
|
|
73
|
+
primary_key=True,
|
|
74
|
+
),
|
|
75
|
+
sa.Column("ordinal_position", sa.Integer(), nullable=True),
|
|
76
|
+
sa.Column("constraint_name", sa.String(), nullable=True),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
op.create_table(
|
|
80
|
+
"meta_foreign_keys",
|
|
81
|
+
sa.Column(
|
|
82
|
+
"parent_table_id",
|
|
83
|
+
sa.Integer(),
|
|
84
|
+
sa.ForeignKey("meta_tables.id"),
|
|
85
|
+
primary_key=True,
|
|
86
|
+
),
|
|
87
|
+
sa.Column(
|
|
88
|
+
"parent_column_id",
|
|
89
|
+
sa.Integer(),
|
|
90
|
+
sa.ForeignKey("meta_columns.id"),
|
|
91
|
+
primary_key=True,
|
|
92
|
+
),
|
|
93
|
+
sa.Column(
|
|
94
|
+
"child_table_id",
|
|
95
|
+
sa.Integer(),
|
|
96
|
+
sa.ForeignKey("meta_tables.id"),
|
|
97
|
+
primary_key=True,
|
|
98
|
+
),
|
|
99
|
+
sa.Column(
|
|
100
|
+
"child_column_id",
|
|
101
|
+
sa.Integer(),
|
|
102
|
+
sa.ForeignKey("meta_columns.id"),
|
|
103
|
+
primary_key=True,
|
|
104
|
+
),
|
|
105
|
+
sa.Column("constraint_name", sa.String(), nullable=True),
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def downgrade():
|
|
110
|
+
op.drop_table("meta_tables")
|
|
111
|
+
|
|
112
|
+
op.drop_table("meta_columns")
|
|
113
|
+
|
|
114
|
+
op.drop_table("meta_column_statistics")
|
|
115
|
+
|
|
116
|
+
op.drop_table("meta_primary_keys")
|
|
117
|
+
|
|
118
|
+
op.drop_table("meta_foreign_keys")
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""updated data catalog data types
|
|
2
|
+
|
|
3
|
+
Revision ID: 608e376c19a7
|
|
4
|
+
Revises: a44643042fe8
|
|
5
|
+
Create Date: 2025-06-09 23:20:34.739735
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
import mindsdb.interfaces.storage.db # noqa
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# revision identifiers, used by Alembic.
|
|
15
|
+
revision = "608e376c19a7"
|
|
16
|
+
down_revision = "a44643042fe8"
|
|
17
|
+
branch_labels = None
|
|
18
|
+
depends_on = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def upgrade():
|
|
22
|
+
with op.batch_alter_table("meta_tables", schema=None) as batch_op:
|
|
23
|
+
batch_op.alter_column(
|
|
24
|
+
"row_count",
|
|
25
|
+
type_=sa.BigInteger(),
|
|
26
|
+
existing_type=sa.Integer(),
|
|
27
|
+
existing_nullable=True,
|
|
28
|
+
existing_server_default=None,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
with op.batch_alter_table("meta_column_statistics", schema=None) as batch_op:
|
|
32
|
+
batch_op.alter_column(
|
|
33
|
+
"distinct_values_count",
|
|
34
|
+
type_=sa.BigInteger(),
|
|
35
|
+
existing_type=sa.Integer(),
|
|
36
|
+
existing_nullable=True,
|
|
37
|
+
existing_server_default=None,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def downgrade():
|
|
42
|
+
with op.batch_alter_table("meta_tables", schema=None) as batch_op:
|
|
43
|
+
batch_op.alter_column(
|
|
44
|
+
"row_count",
|
|
45
|
+
type_=sa.Integer(),
|
|
46
|
+
existing_type=sa.BigInteger(),
|
|
47
|
+
existing_nullable=True,
|
|
48
|
+
existing_server_default=None,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
with op.batch_alter_table("meta_column_statistics", schema=None) as batch_op:
|
|
52
|
+
batch_op.alter_column(
|
|
53
|
+
"distinct_values_count",
|
|
54
|
+
type_=sa.Integer(),
|
|
55
|
+
existing_type=sa.BigInteger(),
|
|
56
|
+
existing_nullable=True,
|
|
57
|
+
existing_server_default=None,
|
|
58
|
+
)
|
mindsdb/utilities/config.py
CHANGED
|
@@ -217,6 +217,9 @@ class Config:
|
|
|
217
217
|
"project_name": "mindsdb",
|
|
218
218
|
"enabled": False,
|
|
219
219
|
},
|
|
220
|
+
"data_catalog": {
|
|
221
|
+
"enabled": False,
|
|
222
|
+
},
|
|
220
223
|
}
|
|
221
224
|
# endregion
|
|
222
225
|
|
|
@@ -360,6 +363,8 @@ class Config:
|
|
|
360
363
|
self._env_config["default_reranking_model"] = {
|
|
361
364
|
"api_key": os.environ["MINDSDB_DEFAULT_RERANKING_MODEL_API_KEY"]
|
|
362
365
|
}
|
|
366
|
+
if os.environ.get("MINDSDB_DATA_CATALOG_ENABLED", "").lower() in ("1", "true"):
|
|
367
|
+
self._env_config["data_catalog"] = {"enabled": True}
|
|
363
368
|
|
|
364
369
|
# region vars: a2a configuration
|
|
365
370
|
a2a_config = {}
|
|
@@ -395,11 +400,13 @@ class Config:
|
|
|
395
400
|
bool: True if config was loaded or updated
|
|
396
401
|
"""
|
|
397
402
|
|
|
398
|
-
if self.auto_config_mtime != self.auto_config_path.stat().st_mtime:
|
|
403
|
+
if self.auto_config_path.is_file() and self.auto_config_mtime != self.auto_config_path.stat().st_mtime:
|
|
399
404
|
try:
|
|
400
405
|
self._auto_config = json.loads(self.auto_config_path.read_text())
|
|
401
406
|
except json.JSONDecodeError as e:
|
|
402
|
-
raise ValueError(
|
|
407
|
+
raise ValueError(
|
|
408
|
+
f"The 'auto' configuration file ({self.auto_config_path}) contains invalid JSON: {e}\nFile content: {self.auto_config_path.read_text()}"
|
|
409
|
+
)
|
|
403
410
|
self.auto_config_mtime = self.auto_config_path.stat().st_mtime
|
|
404
411
|
return True
|
|
405
412
|
return False
|