MindsDB 25.5.4.2__py3-none-any.whl → 25.6.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (76) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +50 -26
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/a2a/task_manager.py +68 -6
  5. mindsdb/api/executor/command_executor.py +69 -14
  6. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  7. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
  8. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  9. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  10. mindsdb/api/executor/planner/plan_join.py +67 -77
  11. mindsdb/api/executor/planner/query_planner.py +176 -155
  12. mindsdb/api/executor/planner/steps.py +37 -12
  13. mindsdb/api/executor/sql_query/result_set.py +45 -64
  14. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  15. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  16. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  17. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  18. mindsdb/api/executor/utilities/sql.py +42 -48
  19. mindsdb/api/http/namespaces/config.py +1 -1
  20. mindsdb/api/http/namespaces/file.py +14 -23
  21. mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
  22. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  23. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  24. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  25. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  26. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  27. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
  28. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  29. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  30. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  31. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
  32. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +29 -33
  33. mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
  34. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  35. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  36. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +145 -40
  37. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  38. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +352 -83
  39. mindsdb/integrations/libs/api_handler.py +279 -57
  40. mindsdb/integrations/libs/base.py +185 -30
  41. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  42. mindsdb/integrations/utilities/handler_utils.py +23 -8
  43. mindsdb/integrations/utilities/sql_utils.py +35 -40
  44. mindsdb/interfaces/agents/agents_controller.py +226 -196
  45. mindsdb/interfaces/agents/constants.py +8 -1
  46. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  47. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  48. mindsdb/interfaces/agents/mindsdb_database_agent.py +23 -18
  49. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  50. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  51. mindsdb/interfaces/data_catalog/data_catalog_loader.py +375 -0
  52. mindsdb/interfaces/data_catalog/data_catalog_reader.py +38 -0
  53. mindsdb/interfaces/database/database.py +81 -57
  54. mindsdb/interfaces/database/integrations.py +222 -234
  55. mindsdb/interfaces/database/log.py +72 -104
  56. mindsdb/interfaces/database/projects.py +156 -193
  57. mindsdb/interfaces/file/file_controller.py +21 -65
  58. mindsdb/interfaces/knowledge_base/controller.py +66 -25
  59. mindsdb/interfaces/knowledge_base/evaluate.py +516 -0
  60. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  61. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  62. mindsdb/interfaces/skills/skills_controller.py +31 -36
  63. mindsdb/interfaces/skills/sql_agent.py +113 -86
  64. mindsdb/interfaces/storage/db.py +242 -82
  65. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  66. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  67. mindsdb/utilities/config.py +13 -2
  68. mindsdb/utilities/log.py +35 -26
  69. mindsdb/utilities/ml_task_queue/task.py +19 -22
  70. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  71. mindsdb/utilities/starters.py +40 -0
  72. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/METADATA +257 -257
  73. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/RECORD +76 -68
  74. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/WHEEL +0 -0
  75. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/licenses/LICENSE +0 -0
  76. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,11 @@
1
1
  import json
2
2
  import datetime
3
- from typing import Dict, List
3
+ from typing import Dict, List, Optional
4
4
 
5
5
  import numpy as np
6
6
  from sqlalchemy import (
7
7
  JSON,
8
+ BigInteger,
8
9
  Boolean,
9
10
  Column,
10
11
  DateTime,
@@ -16,7 +17,7 @@ from sqlalchemy import (
16
17
  UniqueConstraint,
17
18
  create_engine,
18
19
  text,
19
- types
20
+ types,
20
21
  )
21
22
  from sqlalchemy.exc import OperationalError
22
23
  from sqlalchemy.orm import (
@@ -45,7 +46,7 @@ session, engine = None, None
45
46
  def init(connection_str: str = None):
46
47
  global Base, session, engine
47
48
  if connection_str is None:
48
- connection_str = config['storage_db']
49
+ connection_str = config["storage_db"]
49
50
  base_args = {
50
51
  "pool_size": 30,
51
52
  "max_overflow": 200,
@@ -144,15 +145,11 @@ class Predictor(Base):
144
145
  __tablename__ = "predictor"
145
146
 
146
147
  id = Column(Integer, primary_key=True)
147
- updated_at = Column(
148
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
149
- )
148
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
150
149
  created_at = Column(DateTime, default=datetime.datetime.now)
151
150
  deleted_at = Column(DateTime)
152
151
  name = Column(String)
153
- data = Column(
154
- Json
155
- ) # A JSON -- should be everything returned by `get_model_data`, I think
152
+ data = Column(Json) # A JSON -- should be everything returned by `get_model_data`, I think
156
153
  to_predict = Column(Array)
157
154
  company_id = Column(Integer)
158
155
  mindsdb_version = Column(String)
@@ -173,9 +170,7 @@ class Predictor(Base):
173
170
  code = Column(String, nullable=True)
174
171
  lightwood_version = Column(String, nullable=True)
175
172
  dtype_dict = Column(Json, nullable=True)
176
- project_id = Column(
177
- Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False
178
- )
173
+ project_id = Column(Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False)
179
174
  training_phase_current = Column(Integer)
180
175
  training_phase_total = Column(Integer)
181
176
  training_phase_name = Column(String)
@@ -199,7 +194,7 @@ Index(
199
194
  Predictor.version,
200
195
  Predictor.active,
201
196
  Predictor.deleted_at, # would be good to have here nullsfirst(Predictor.deleted_at)
202
- unique=True
197
+ unique=True,
203
198
  )
204
199
 
205
200
 
@@ -208,34 +203,27 @@ class Project(Base):
208
203
 
209
204
  id = Column(Integer, primary_key=True)
210
205
  created_at = Column(DateTime, default=datetime.datetime.now)
211
- updated_at = Column(
212
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
213
- )
206
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
214
207
  deleted_at = Column(DateTime)
215
208
  name = Column(String, nullable=False)
216
209
  company_id = Column(Integer, default=0)
217
210
  metadata_: dict = Column("metadata", JSON, nullable=True)
218
- __table_args__ = (
219
- UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
220
- )
211
+ __table_args__ = (UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),)
221
212
 
222
213
 
223
214
  class Integration(Base):
224
215
  __tablename__ = "integration"
225
216
  id = Column(Integer, primary_key=True)
226
- updated_at = Column(
227
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
228
- )
217
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
229
218
  created_at = Column(DateTime, default=datetime.datetime.now)
230
219
  name = Column(String, nullable=False)
231
220
  engine = Column(String, nullable=False)
232
221
  data = Column(Json)
233
222
  company_id = Column(Integer)
234
- __table_args__ = (
235
- UniqueConstraint(
236
- "name", "company_id", name="unique_integration_name_company_id"
237
- ),
238
- )
223
+
224
+ meta_tables = relationship("MetaTables", back_populates="integration")
225
+
226
+ __table_args__ = (UniqueConstraint("name", "company_id", name="unique_integration_name_company_id"),)
239
227
 
240
228
 
241
229
  class File(Base):
@@ -249,12 +237,8 @@ class File(Base):
249
237
  columns = Column(Json, nullable=False)
250
238
  created_at = Column(DateTime, default=datetime.datetime.now)
251
239
  metadata_: dict = Column("metadata", JSON, nullable=True)
252
- updated_at = Column(
253
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
254
- )
255
- __table_args__ = (
256
- UniqueConstraint("name", "company_id", name="unique_file_name_company_id"),
257
- )
240
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
241
+ __table_args__ = (UniqueConstraint("name", "company_id", name="unique_file_name_company_id"),)
258
242
 
259
243
 
260
244
  class View(Base):
@@ -263,12 +247,8 @@ class View(Base):
263
247
  name = Column(String, nullable=False)
264
248
  company_id = Column(Integer)
265
249
  query = Column(String, nullable=False)
266
- project_id = Column(
267
- Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False
268
- )
269
- __table_args__ = (
270
- UniqueConstraint("name", "company_id", name="unique_view_name_company_id"),
271
- )
250
+ project_id = Column(Integer, ForeignKey("project.id", name="fk_project_id"), nullable=False)
251
+ __table_args__ = (UniqueConstraint("name", "company_id", name="unique_view_name_company_id"),)
272
252
 
273
253
 
274
254
  class JsonStorage(Base):
@@ -310,9 +290,7 @@ class Jobs(Base):
310
290
  schedule_str = Column(String)
311
291
 
312
292
  deleted_at = Column(DateTime)
313
- updated_at = Column(
314
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
315
- )
293
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
316
294
  created_at = Column(DateTime, default=datetime.datetime.now)
317
295
 
318
296
 
@@ -331,9 +309,7 @@ class JobsHistory(Base):
331
309
  created_at = Column(DateTime, default=datetime.datetime.now)
332
310
  updated_at = Column(DateTime, default=datetime.datetime.now)
333
311
 
334
- __table_args__ = (
335
- UniqueConstraint("job_id", "start_at", name="uniq_job_history_job_id_start"),
336
- )
312
+ __table_args__ = (UniqueConstraint("job_id", "start_at", name="uniq_job_history_job_id_start"),)
337
313
 
338
314
 
339
315
  class ChatBots(Base):
@@ -349,9 +325,7 @@ class ChatBots(Base):
349
325
  database_id = Column(Integer)
350
326
  params = Column(JSON)
351
327
 
352
- updated_at = Column(
353
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
354
- )
328
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
355
329
  created_at = Column(DateTime, default=datetime.datetime.now)
356
330
  webhook_token = Column(String)
357
331
 
@@ -393,9 +367,7 @@ class Triggers(Base):
393
367
  query_str = Column(String, nullable=False)
394
368
  columns = Column(String) # list of columns separated by delimiter
395
369
 
396
- updated_at = Column(
397
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
398
- )
370
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
399
371
  created_at = Column(DateTime, default=datetime.datetime.now)
400
372
 
401
373
 
@@ -417,9 +389,7 @@ class Tasks(Base):
417
389
  run_by = Column(String)
418
390
  alive_time = Column(DateTime(timezone=True))
419
391
 
420
- updated_at = Column(
421
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
422
- )
392
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
423
393
  created_at = Column(DateTime, default=datetime.datetime.now)
424
394
 
425
395
 
@@ -444,9 +414,7 @@ class Skills(Base):
444
414
  params = Column(JSON)
445
415
 
446
416
  created_at = Column(DateTime, default=datetime.datetime.now)
447
- updated_at = Column(
448
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
449
- )
417
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
450
418
  deleted_at = Column(DateTime)
451
419
 
452
420
  def as_dict(self) -> Dict:
@@ -475,9 +443,7 @@ class Agents(Base):
475
443
  provider = Column(String, nullable=True)
476
444
  params = Column(JSON)
477
445
 
478
- updated_at = Column(
479
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
480
- )
446
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
481
447
  created_at = Column(DateTime, default=datetime.datetime.now)
482
448
  deleted_at = Column(DateTime)
483
449
 
@@ -520,33 +486,41 @@ class KnowledgeBase(Base):
520
486
  doc="fk to the embedding model",
521
487
  )
522
488
 
523
- embedding_model = relationship(
524
- "Predictor", foreign_keys=[embedding_model_id], doc="embedding model"
525
- )
489
+ embedding_model = relationship("Predictor", foreign_keys=[embedding_model_id], doc="embedding model")
526
490
  query_id = Column(Integer, nullable=True)
527
491
 
528
492
  created_at = Column(DateTime, default=datetime.datetime.now)
529
- updated_at = Column(
530
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
531
- )
493
+ updated_at = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
532
494
 
533
- __table_args__ = (
534
- UniqueConstraint(
535
- "name", "project_id", name="unique_knowledge_base_name_project_id"
536
- ),
537
- )
495
+ __table_args__ = (UniqueConstraint("name", "project_id", name="unique_knowledge_base_name_project_id"),)
496
+
497
+ def as_dict(self, with_secrets: Optional[bool] = True) -> Dict:
498
+ params = self.params.copy()
499
+ embedding_model = params.pop("embedding_model", None)
500
+ reranking_model = params.pop("reranking_model", None)
501
+
502
+ if not with_secrets:
503
+ if embedding_model and "api_key" in embedding_model:
504
+ embedding_model["api_key"] = "******"
505
+
506
+ if reranking_model and "api_key" in reranking_model:
507
+ reranking_model["api_key"] = "******"
538
508
 
539
- def as_dict(self) -> Dict:
540
509
  return {
541
510
  "id": self.id,
542
511
  "name": self.name,
543
512
  "project_id": self.project_id,
544
- "embedding_model": None if self.embedding_model is None else self.embedding_model.name,
545
513
  "vector_database": None if self.vector_database is None else self.vector_database.name,
546
514
  "vector_database_table": self.vector_database_table,
547
515
  "updated_at": self.updated_at,
548
516
  "created_at": self.created_at,
549
- "params": self.params
517
+ "query_id": self.query_id,
518
+ "embedding_model": embedding_model,
519
+ "reranking_model": reranking_model,
520
+ "metadata_columns": params.pop("metadata_columns", None),
521
+ "content_columns": params.pop("content_columns", None),
522
+ "id_column": params.pop("id_column", None),
523
+ "params": params,
550
524
  }
551
525
 
552
526
 
@@ -559,9 +533,7 @@ class QueryContext(Base):
559
533
  context_name: str = Column(String, nullable=False)
560
534
  values: dict = Column(JSON)
561
535
 
562
- updated_at: datetime.datetime = Column(
563
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
564
- )
536
+ updated_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
565
537
  created_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now)
566
538
 
567
539
 
@@ -581,9 +553,7 @@ class Queries(Base):
581
553
  processed_rows = Column(Integer, default=0)
582
554
  error: str = Column(String, nullable=True)
583
555
 
584
- updated_at: datetime.datetime = Column(
585
- DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now
586
- )
556
+ updated_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
587
557
  created_at: datetime.datetime = Column(DateTime, default=datetime.datetime.now)
588
558
 
589
559
 
@@ -610,10 +580,11 @@ class LLMLog(Base):
610
580
 
611
581
 
612
582
  class LLMData(Base):
613
- '''
583
+ """
614
584
  Stores the question/answer pairs of an LLM call so examples can be used
615
585
  for self improvement with DSPy
616
- '''
586
+ """
587
+
617
588
  __tablename__ = "llm_data"
618
589
  id: int = Column(Integer, primary_key=True)
619
590
  input: str = Column(String, nullable=False)
@@ -621,3 +592,192 @@ class LLMData(Base):
621
592
  model_id: int = Column(Integer, nullable=False)
622
593
  created_at: datetime = Column(DateTime, default=datetime.datetime.now)
623
594
  updated_at: datetime = Column(DateTime, onupdate=datetime.datetime.now)
595
+
596
+
597
+ # Data Catalog
598
+ class MetaTables(Base):
599
+ __tablename__ = "meta_tables"
600
+ id: int = Column(Integer, primary_key=True)
601
+
602
+ integration_id: int = Column(Integer, ForeignKey("integration.id"))
603
+ integration = relationship("Integration", back_populates="meta_tables")
604
+
605
+ name: str = Column(String, nullable=False)
606
+ schema: str = Column(String, nullable=True)
607
+ description: str = Column(String, nullable=True)
608
+ type: str = Column(String, nullable=True)
609
+ row_count: int = Column(BigInteger, nullable=True)
610
+
611
+ meta_columns: Mapped[List["MetaColumns"]] = relationship("MetaColumns", back_populates="meta_tables")
612
+ meta_primary_keys: Mapped[List["MetaPrimaryKeys"]] = relationship("MetaPrimaryKeys", back_populates="meta_tables")
613
+ meta_foreign_keys_parents: Mapped[List["MetaForeignKeys"]] = relationship(
614
+ "MetaForeignKeys", foreign_keys="MetaForeignKeys.parent_table_id", back_populates="parent_table"
615
+ )
616
+ meta_foreign_keys_children: Mapped[List["MetaForeignKeys"]] = relationship(
617
+ "MetaForeignKeys", foreign_keys="MetaForeignKeys.child_table_id", back_populates="child_table"
618
+ )
619
+
620
+ def as_string(self, indent: int = 0) -> str:
621
+ pad = " " * indent
622
+
623
+ table_info = f"`{self.integration.name}`.`{self.name}` ({self.type})"
624
+
625
+ if self.description:
626
+ table_info += f" : {self.description}"
627
+
628
+ if self.schema:
629
+ table_info += f"\n{pad}Schema: {self.schema}"
630
+
631
+ if self.row_count and self.row_count > 0:
632
+ table_info += f"\n{pad}Estimated Row Count: {self.row_count}"
633
+
634
+ if self.meta_primary_keys:
635
+ table_info += f"\n{pad}Primary Keys (in defined order): {', '.join([pk.as_string() for pk in self.meta_primary_keys])}"
636
+
637
+ if self.meta_columns:
638
+ table_info += f"\n\n{pad}Columns:"
639
+ for index, column in enumerate(self.meta_columns, start=1):
640
+ table_info += f"\n{index}. {column.as_string(indent + 4)}\n"
641
+
642
+ if self.meta_foreign_keys_children:
643
+ table_info += f"\n\n{pad}Key Relationships:"
644
+ for fk in self.meta_foreign_keys_children:
645
+ table_info += f"\n{pad} {fk.as_string()}"
646
+
647
+ return table_info
648
+
649
+
650
+ class MetaColumns(Base):
651
+ __tablename__ = "meta_columns"
652
+ id: int = Column(Integer, primary_key=True)
653
+
654
+ table_id: int = Column(Integer, ForeignKey("meta_tables.id"))
655
+ meta_tables = relationship("MetaTables", back_populates="meta_columns")
656
+
657
+ name: str = Column(String, nullable=False)
658
+ data_type: str = Column(String, nullable=False)
659
+ description: str = Column(String, nullable=True)
660
+ default_value: str = Column(String, nullable=True)
661
+ is_nullable: bool = Column(Boolean, nullable=True)
662
+
663
+ meta_column_statistics: Mapped[List["MetaColumnStatistics"]] = relationship(
664
+ "MetaColumnStatistics", back_populates="meta_columns"
665
+ )
666
+ meta_primary_keys: Mapped[List["MetaPrimaryKeys"]] = relationship("MetaPrimaryKeys", back_populates="meta_columns")
667
+ meta_foreign_keys_parents: Mapped[List["MetaForeignKeys"]] = relationship(
668
+ "MetaForeignKeys", foreign_keys="MetaForeignKeys.parent_column_id", back_populates="parent_column"
669
+ )
670
+ meta_foreign_keys_children: Mapped[List["MetaForeignKeys"]] = relationship(
671
+ "MetaForeignKeys", foreign_keys="MetaForeignKeys.child_column_id", back_populates="child_column"
672
+ )
673
+
674
+ def as_string(self, indent: int = 0) -> str:
675
+ pad = " " * indent
676
+
677
+ column_info = f"{self.name} ({self.data_type}):"
678
+ if self.description:
679
+ column_info += f"\n{pad}Description: {self.description}"
680
+
681
+ if self.is_nullable:
682
+ column_info += f"\n{pad}- Nullable: Yes"
683
+
684
+ if self.default_value:
685
+ column_info += f"\n{pad}- Default Value: {self.default_value}"
686
+
687
+ if self.meta_column_statistics:
688
+ column_info += f"\n\n{pad}- Column Statistics:"
689
+ column_info += f"\n{self.meta_column_statistics[0].as_string(indent + 4)}"
690
+
691
+ return column_info
692
+
693
+
694
+ class MetaColumnStatistics(Base):
695
+ __tablename__ = "meta_column_statistics"
696
+ column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
697
+ meta_columns = relationship("MetaColumns", back_populates="meta_column_statistics")
698
+
699
+ most_common_values: str = Column(Array, nullable=True)
700
+ most_common_frequencies: str = Column(Array, nullable=True)
701
+ null_percentage: float = Column(Numeric(5, 2), nullable=True)
702
+ distinct_values_count: int = Column(BigInteger, nullable=True)
703
+ minimum_value: str = Column(String, nullable=True)
704
+ maximum_value: str = Column(String, nullable=True)
705
+
706
+ def as_string(self, indent: int = 0) -> str:
707
+ pad = " " * indent
708
+ inner_pad = " " * (indent + 4)
709
+
710
+ column_statistics = ""
711
+
712
+ if any(self.most_common_values) and any(self.most_common_frequencies):
713
+ column_statistics += f"{pad}- Top 10 Most Common Values and Frequencies:"
714
+ for i in range(min(10, len(self.most_common_values))):
715
+ freq = self.most_common_frequencies[i]
716
+ try:
717
+ percent = float(freq) * 100
718
+ freq_str = f"{percent:.2f}%"
719
+ except (ValueError, TypeError):
720
+ freq_str = str(freq)
721
+
722
+ column_statistics += f"\n{inner_pad}- {self.most_common_values[i]}: {freq_str}"
723
+ column_statistics += "\n"
724
+
725
+ if self.null_percentage:
726
+ column_statistics += f"{pad}- Null Percentage: {self.null_percentage}\n"
727
+
728
+ if self.distinct_values_count:
729
+ column_statistics += f"{pad}- No. of Distinct Values: {self.distinct_values_count}\n"
730
+
731
+ if self.minimum_value:
732
+ column_statistics += f"{pad}- Minimum Value: {self.minimum_value}\n"
733
+
734
+ if self.maximum_value:
735
+ column_statistics += f"{pad}- Maximum Value: {self.maximum_value}"
736
+
737
+ return column_statistics
738
+
739
+
740
+ class MetaPrimaryKeys(Base):
741
+ __tablename__ = "meta_primary_keys"
742
+ table_id: int = Column(Integer, ForeignKey("meta_tables.id"), primary_key=True)
743
+ meta_tables = relationship("MetaTables", back_populates="meta_primary_keys")
744
+
745
+ column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
746
+ meta_columns = relationship("MetaColumns", back_populates="meta_primary_keys")
747
+
748
+ ordinal_position: int = Column(Integer, nullable=True)
749
+ constraint_name: str = Column(String, nullable=True)
750
+
751
+ def as_string(self) -> str:
752
+ pk_list = sorted(
753
+ self.meta_tables.meta_primary_keys,
754
+ key=lambda pk: pk.ordinal_position if pk.ordinal_position is not None else 0,
755
+ )
756
+
757
+ return ", ".join(f"{pk.meta_columns.name} ({pk.meta_columns.data_type})" for pk in pk_list)
758
+
759
+
760
+ class MetaForeignKeys(Base):
761
+ __tablename__ = "meta_foreign_keys"
762
+ parent_table_id: int = Column(Integer, ForeignKey("meta_tables.id"), primary_key=True)
763
+ parent_table = relationship(
764
+ "MetaTables", back_populates="meta_foreign_keys_parents", foreign_keys=[parent_table_id]
765
+ )
766
+
767
+ parent_column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
768
+ parent_column = relationship(
769
+ "MetaColumns", back_populates="meta_foreign_keys_parents", foreign_keys=[parent_column_id]
770
+ )
771
+
772
+ child_table_id: int = Column(Integer, ForeignKey("meta_tables.id"), primary_key=True)
773
+ child_table = relationship("MetaTables", back_populates="meta_foreign_keys_children", foreign_keys=[child_table_id])
774
+
775
+ child_column_id: int = Column(Integer, ForeignKey("meta_columns.id"), primary_key=True)
776
+ child_column = relationship(
777
+ "MetaColumns", back_populates="meta_foreign_keys_children", foreign_keys=[child_column_id]
778
+ )
779
+
780
+ constraint_name: str = Column(String, nullable=True)
781
+
782
+ def as_string(self) -> str:
783
+ return f"{self.child_column.name} in {self.child_table.name} references {self.parent_column.name} in {self.parent_table.name}"
@@ -0,0 +1,118 @@
1
+ """added data catalog tables
2
+
3
+ Revision ID: a44643042fe8
4
+ Revises: 9f150e4f9a05
5
+ Create Date: 2025-05-28 17:20:57.300313
6
+
7
+ """
8
+
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+ import mindsdb.interfaces.storage.db # noqa
12
+ from mindsdb.interfaces.storage.db import Array
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision = "a44643042fe8"
17
+ down_revision = "9f150e4f9a05"
18
+ branch_labels = None
19
+ depends_on = None
20
+
21
+
22
+ def upgrade():
23
+ op.create_table(
24
+ "meta_tables",
25
+ sa.Column("id", sa.Integer(), primary_key=True),
26
+ sa.Column(
27
+ "integration_id",
28
+ sa.Integer(),
29
+ sa.ForeignKey("integration.id"),
30
+ nullable=False,
31
+ ),
32
+ sa.Column("name", sa.String(), nullable=False),
33
+ sa.Column("schema", sa.String(), nullable=True),
34
+ sa.Column("description", sa.String(), nullable=True),
35
+ sa.Column("type", sa.String(), nullable=True),
36
+ sa.Column("row_count", sa.Integer(), nullable=True),
37
+ )
38
+
39
+ op.create_table(
40
+ "meta_columns",
41
+ sa.Column("id", sa.Integer(), primary_key=True),
42
+ sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), nullable=False),
43
+ sa.Column("name", sa.String(), nullable=False),
44
+ sa.Column("data_type", sa.String(), nullable=False),
45
+ sa.Column("default_value", sa.String(), nullable=True),
46
+ sa.Column("description", sa.String(), nullable=True),
47
+ sa.Column("is_nullable", sa.Boolean(), nullable=True),
48
+ )
49
+
50
+ op.create_table(
51
+ "meta_column_statistics",
52
+ sa.Column(
53
+ "column_id",
54
+ sa.Integer(),
55
+ sa.ForeignKey("meta_columns.id"),
56
+ primary_key=True,
57
+ ),
58
+ sa.Column("most_common_values", Array(), nullable=True),
59
+ sa.Column("most_common_frequencies", Array(), nullable=True),
60
+ sa.Column("null_percentage", sa.Numeric(5, 2), nullable=True),
61
+ sa.Column("distinct_values_count", sa.Integer(), nullable=True),
62
+ sa.Column("minimum_value", sa.String(), nullable=True),
63
+ sa.Column("maximum_value", sa.String(), nullable=True),
64
+ )
65
+
66
+ op.create_table(
67
+ "meta_primary_keys",
68
+ sa.Column("table_id", sa.Integer(), sa.ForeignKey("meta_tables.id"), primary_key=True),
69
+ sa.Column(
70
+ "column_id",
71
+ sa.Integer(),
72
+ sa.ForeignKey("meta_columns.id"),
73
+ primary_key=True,
74
+ ),
75
+ sa.Column("ordinal_position", sa.Integer(), nullable=True),
76
+ sa.Column("constraint_name", sa.String(), nullable=True),
77
+ )
78
+
79
+ op.create_table(
80
+ "meta_foreign_keys",
81
+ sa.Column(
82
+ "parent_table_id",
83
+ sa.Integer(),
84
+ sa.ForeignKey("meta_tables.id"),
85
+ primary_key=True,
86
+ ),
87
+ sa.Column(
88
+ "parent_column_id",
89
+ sa.Integer(),
90
+ sa.ForeignKey("meta_columns.id"),
91
+ primary_key=True,
92
+ ),
93
+ sa.Column(
94
+ "child_table_id",
95
+ sa.Integer(),
96
+ sa.ForeignKey("meta_tables.id"),
97
+ primary_key=True,
98
+ ),
99
+ sa.Column(
100
+ "child_column_id",
101
+ sa.Integer(),
102
+ sa.ForeignKey("meta_columns.id"),
103
+ primary_key=True,
104
+ ),
105
+ sa.Column("constraint_name", sa.String(), nullable=True),
106
+ )
107
+
108
+
109
+ def downgrade():
110
+ op.drop_table("meta_tables")
111
+
112
+ op.drop_table("meta_columns")
113
+
114
+ op.drop_table("meta_column_statistics")
115
+
116
+ op.drop_table("meta_primary_keys")
117
+
118
+ op.drop_table("meta_foreign_keys")
@@ -0,0 +1,58 @@
1
+ """updated data catalog data types
2
+
3
+ Revision ID: 608e376c19a7
4
+ Revises: a44643042fe8
5
+ Create Date: 2025-06-09 23:20:34.739735
6
+
7
+ """
8
+
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+ import mindsdb.interfaces.storage.db # noqa
12
+
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision = "608e376c19a7"
16
+ down_revision = "a44643042fe8"
17
+ branch_labels = None
18
+ depends_on = None
19
+
20
+
21
+ def upgrade():
22
+ with op.batch_alter_table("meta_tables", schema=None) as batch_op:
23
+ batch_op.alter_column(
24
+ "row_count",
25
+ type_=sa.BigInteger(),
26
+ existing_type=sa.Integer(),
27
+ existing_nullable=True,
28
+ existing_server_default=None,
29
+ )
30
+
31
+ with op.batch_alter_table("meta_column_statistics", schema=None) as batch_op:
32
+ batch_op.alter_column(
33
+ "distinct_values_count",
34
+ type_=sa.BigInteger(),
35
+ existing_type=sa.Integer(),
36
+ existing_nullable=True,
37
+ existing_server_default=None,
38
+ )
39
+
40
+
41
+ def downgrade():
42
+ with op.batch_alter_table("meta_tables", schema=None) as batch_op:
43
+ batch_op.alter_column(
44
+ "row_count",
45
+ type_=sa.Integer(),
46
+ existing_type=sa.BigInteger(),
47
+ existing_nullable=True,
48
+ existing_server_default=None,
49
+ )
50
+
51
+ with op.batch_alter_table("meta_column_statistics", schema=None) as batch_op:
52
+ batch_op.alter_column(
53
+ "distinct_values_count",
54
+ type_=sa.Integer(),
55
+ existing_type=sa.BigInteger(),
56
+ existing_nullable=True,
57
+ existing_server_default=None,
58
+ )