MindsDB 25.6.2.0__py3-none-any.whl → 25.6.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (30) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +25 -4
  3. mindsdb/api/a2a/task_manager.py +68 -6
  4. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
  5. mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
  6. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
  7. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
  8. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +3 -0
  9. mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
  10. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +94 -8
  11. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +19 -1
  12. mindsdb/integrations/libs/api_handler.py +19 -1
  13. mindsdb/integrations/libs/base.py +86 -2
  14. mindsdb/interfaces/agents/agents_controller.py +32 -6
  15. mindsdb/interfaces/agents/constants.py +1 -0
  16. mindsdb/interfaces/agents/mindsdb_database_agent.py +23 -18
  17. mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -6
  18. mindsdb/interfaces/data_catalog/data_catalog_reader.py +4 -0
  19. mindsdb/interfaces/database/integrations.py +4 -2
  20. mindsdb/interfaces/knowledge_base/controller.py +3 -15
  21. mindsdb/interfaces/knowledge_base/evaluate.py +0 -3
  22. mindsdb/interfaces/skills/skills_controller.py +0 -23
  23. mindsdb/interfaces/skills/sql_agent.py +8 -4
  24. mindsdb/interfaces/storage/db.py +20 -4
  25. mindsdb/utilities/config.py +5 -1
  26. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/METADATA +250 -250
  27. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/RECORD +30 -30
  28. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/WHEEL +0 -0
  29. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/licenses/LICENSE +0 -0
  30. {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.0.dist-info}/top_level.txt +0 -0
@@ -70,8 +70,8 @@ class SalesforceHandler(MetaAPIHandler):
70
70
  )
71
71
  self.is_connected = True
72
72
 
73
- # Register Salesforce tables.
74
- for resource_name in self._get_resource_names():
73
+ resource_tables = self._get_resource_names()
74
+ for resource_name in resource_tables:
75
75
  table_class = create_table_class(resource_name)
76
76
  self._register_table(resource_name, table_class(self))
77
77
 
@@ -154,23 +154,109 @@ class SalesforceHandler(MetaAPIHandler):
154
154
 
155
155
  return response
156
156
 
157
- def _get_resource_names(self) -> None:
157
+ def _get_resource_names(self) -> List[str]:
158
158
  """
159
- Retrieves the names of the Salesforce resources.
160
-
159
+ Retrieves the names of the Salesforce resources, with more aggressive filtering to remove tables.
161
160
  Returns:
162
- None
161
+ List[str]: A list of filtered resource names.
163
162
  """
164
163
  if not self.resource_names:
165
- # Fetch the queryable list of Salesforce resources (sobjects).
166
- self.resource_names = [
164
+ all_resources = [
167
165
  resource["name"]
168
166
  for resource in self.connection.sobjects.describe()["sobjects"]
169
167
  if resource.get("queryable", False)
170
168
  ]
171
169
 
170
+ # Define patterns for tables to be filtered out.
171
+ # Expanded suffixes and prefixes and exact matches
172
+ ignore_suffixes = ("Share", "History", "Feed", "ChangeEvent", "Tag", "Permission", "Setup", "Consent")
173
+ ignore_prefixes = (
174
+ "Apex",
175
+ "CommPlatform",
176
+ "Lightning",
177
+ "Flow",
178
+ "Transaction",
179
+ "AI",
180
+ "Aura",
181
+ "ContentWorkspace",
182
+ "Collaboration",
183
+ "Datacloud",
184
+ )
185
+ ignore_exact = {
186
+ "EntityDefinition",
187
+ "FieldDefinition",
188
+ "RecordType",
189
+ "CaseStatus",
190
+ "UserRole",
191
+ "UserLicense",
192
+ "UserPermissionAccess",
193
+ "UserRecordAccess",
194
+ "Folder",
195
+ "Group",
196
+ "Note",
197
+ "ProcessDefinition",
198
+ "ProcessInstance",
199
+ "ContentFolder",
200
+ "ContentDocumentSubscription",
201
+ "DashboardComponent",
202
+ "Report",
203
+ "Dashboard",
204
+ "Topic",
205
+ "TopicAssignment",
206
+ "Period",
207
+ "Partner",
208
+ "PackageLicense",
209
+ "ColorDefinition",
210
+ "DataUsePurpose",
211
+ "DataUseLegalBasis",
212
+ }
213
+
214
+ ignore_substrings = (
215
+ "CleanInfo",
216
+ "Template",
217
+ "Rule",
218
+ "Definition",
219
+ "Status",
220
+ "Policy",
221
+ "Setting",
222
+ "Access",
223
+ "Config",
224
+ "Subscription",
225
+ "DataType",
226
+ "MilestoneType",
227
+ "Entitlement",
228
+ "Auth",
229
+ )
230
+
231
+ filtered = []
232
+ for r in all_resources:
233
+ if (
234
+ not r.endswith(ignore_suffixes)
235
+ and not r.startswith(ignore_prefixes)
236
+ and not any(sub in r for sub in ignore_substrings)
237
+ and r not in ignore_exact
238
+ ):
239
+ filtered.append(r)
240
+
241
+ self.resource_names = [r for r in filtered]
172
242
  return self.resource_names
173
243
 
244
+ def meta_get_handler_info(self, **kwargs) -> str:
245
+ """
246
+ Retrieves information about the design and implementation of the API handler.
247
+ This should include, but not be limited to, the following:
248
+ - The type of SQL queries and operations that the handler supports.
249
+ - etc.
250
+
251
+ Args:
252
+ kwargs: Additional keyword arguments that may be used in generating the handler information.
253
+
254
+ Returns:
255
+ str: A string containing information about the API handler's design and implementation.
256
+ """
257
+ # TODO: Relationships? Aliases?
258
+ return "When filtering on a Date or DateTime field, the value MUST be an unquoted literal in YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ format. For example, CloseDate >= 2025-05-28 is correct; CloseDate >= '2025-05-28' is incorrect."
259
+
174
260
  def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response:
175
261
  """
176
262
  Retrieves metadata for the specified tables (or all tables if no list is provided).
@@ -6,7 +6,7 @@ from snowflake.sqlalchemy import snowdialect
6
6
  from snowflake import connector
7
7
  from snowflake.connector.errors import NotSupportedError
8
8
  from snowflake.connector.cursor import SnowflakeCursor, ResultMetadata
9
- from typing import Optional, List
9
+ from typing import Any, Optional, List
10
10
 
11
11
  from mindsdb_sql_parser.ast.base import ASTNode
12
12
  from mindsdb_sql_parser.ast import Select, Identifier
@@ -706,3 +706,21 @@ class SnowflakeHandler(MetaDatabaseHandler):
706
706
  except Exception as e:
707
707
  logger.error(f"Exception in meta_get_primary_keys: {e!r}")
708
708
  return Response(RESPONSE_TYPE.ERROR, error_message=f"Exception querying primary keys: {e!r}")
709
+
710
+ def meta_get_handler_info(self, **kwargs: Any) -> str:
711
+ """
712
+ Retrieves information about the design and implementation of the database handler.
713
+ This should include, but not be limited to, the following:
714
+ - The type of SQL queries and operations that the handler supports.
715
+ - etc.
716
+
717
+ Args:
718
+ kwargs: Additional keyword arguments that may be used in generating the handler information.
719
+
720
+ Returns:
721
+ str: A string containing information about the database handler's design and implementation.
722
+ """
723
+ return (
724
+ "To query columns that contain special characters, use ticks around the column name, e.g. `column name`.\n"
725
+ "DO NOT use double quotes for this purpose."
726
+ )
@@ -457,8 +457,11 @@ class APIHandler(BaseHandler):
457
457
 
458
458
  def query(self, query: ASTNode):
459
459
  if isinstance(query, Select):
460
+ # If the list method exists, it should be overridden in the child class.
461
+ # The APIResource class could be used as a base class by overriding the select method, but not the list method.
460
462
  table = self._get_table(query.from_table)
461
- if not hasattr(table, "list"):
463
+ list_method = getattr(table, "list", None)
464
+ if not list_method or (list_method and list_method.__func__ is APIResource.list):
462
465
  # for back compatibility, targets wasn't passed in previous version
463
466
  query.targets = [Star()]
464
467
  result = self._get_table(query.from_table).select(query)
@@ -515,6 +518,21 @@ class MetaAPIHandler(APIHandler):
515
518
  This class is used when the handler is also needed to store information in the data catalog.
516
519
  """
517
520
 
521
+ def meta_get_handler_info(self, **kwargs) -> str:
522
+ """
523
+ Retrieves information about the design and implementation of the API handler.
524
+ This should include, but not be limited to, the following:
525
+ - The type of SQL queries and operations that the handler supports.
526
+ - etc.
527
+
528
+ Args:
529
+ kwargs: Additional keyword arguments that may be used in generating the handler information.
530
+
531
+ Returns:
532
+ str: A string containing information about the API handler's design and implementation.
533
+ """
534
+ pass
535
+
518
536
  def meta_get_tables(self, table_names: Optional[List[str]] = None, **kwargs) -> Response:
519
537
  """
520
538
  Retrieves metadata for the specified tables (or all tables if no list is provided).
@@ -1,4 +1,5 @@
1
1
  import ast
2
+ import concurrent.futures
2
3
  import inspect
3
4
  import textwrap
4
5
  from _ast import AnnAssign, AugAssign
@@ -8,7 +9,7 @@ import pandas as pd
8
9
  from mindsdb_sql_parser.ast.base import ASTNode
9
10
  from mindsdb.utilities import log
10
11
 
11
- from mindsdb.integrations.libs.response import HandlerResponse, HandlerStatusResponse
12
+ from mindsdb.integrations.libs.response import HandlerResponse, HandlerStatusResponse, RESPONSE_TYPE
12
13
 
13
14
  logger = log.getLogger(__name__)
14
15
 
@@ -156,6 +157,7 @@ class MetaDatabaseHandler(DatabaseHandler):
156
157
  def meta_get_column_statistics(self, table_names: Optional[List[str]]) -> HandlerResponse:
157
158
  """
158
159
  Returns metadata statisical information about the columns in the tables to be stored in the data catalog.
160
+ Either this method should be overridden in the handler or `meta_get_column_statistics_for_table` should be implemented.
159
161
 
160
162
  Returns:
161
163
  HandlerResponse: The response should consist of the following columns:
@@ -168,7 +170,74 @@ class MetaDatabaseHandler(DatabaseHandler):
168
170
  - MAXIMUM_VALUE (str): Maximum value in the column (optional).
169
171
  - DISTINCT_VALUES_COUNT (int): Count of distinct values in the column (optional).
170
172
  """
171
- raise NotImplementedError()
173
+ method = getattr(self, "meta_get_column_statistics_for_table")
174
+ if method.__func__ is not MetaDatabaseHandler.meta_get_column_statistics_for_table:
175
+ meta_columns = self.meta_get_columns(table_names)
176
+ grouped_columns = (
177
+ meta_columns.data_frame.groupby("table_name")
178
+ .agg(
179
+ {
180
+ "column_name": list,
181
+ }
182
+ )
183
+ .reset_index()
184
+ )
185
+
186
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
187
+ futures = []
188
+
189
+ results = []
190
+ with executor:
191
+ for _, row in grouped_columns.iterrows():
192
+ table_name = row["table_name"]
193
+ columns = row["column_name"]
194
+ futures.append(executor.submit(self.meta_get_column_statistics_for_table, table_name, columns))
195
+
196
+ for future in concurrent.futures.as_completed(futures):
197
+ try:
198
+ result = future.result(timeout=120)
199
+ if result.resp_type == RESPONSE_TYPE.TABLE:
200
+ results.append(result.data_frame)
201
+ else:
202
+ logger.error(
203
+ f"Error retrieving column statistics for table {table_name}: {result.error_message}"
204
+ )
205
+ except Exception as e:
206
+ logger.error(f"Exception occurred while retrieving column statistics for table {table_name}: {e}")
207
+
208
+ if not results:
209
+ logger.warning("No column statistics could be retrieved for the specified tables.")
210
+ return HandlerResponse(RESPONSE_TYPE.ERROR, error_message="No column statistics could be retrieved.")
211
+ return HandlerResponse(
212
+ RESPONSE_TYPE.TABLE, pd.concat(results, ignore_index=True) if results else pd.DataFrame()
213
+ )
214
+
215
+ else:
216
+ raise NotImplementedError()
217
+
218
+ def meta_get_column_statistics_for_table(
219
+ self, table_name: str, column_names: Optional[List[str]] = None
220
+ ) -> HandlerResponse:
221
+ """
222
+ Returns metadata statistical information about the columns in a specific table to be stored in the data catalog.
223
+ Either this method should be implemented in the handler or `meta_get_column_statistics` should be overridden.
224
+
225
+ Args:
226
+ table_name (str): Name of the table.
227
+ column_names (Optional[List[str]]): List of column names to retrieve statistics for. If None, statistics for all columns will be returned.
228
+
229
+ Returns:
230
+ HandlerResponse: The response should consist of the following columns:
231
+ - TABLE_NAME (str): Name of the table.
232
+ - COLUMN_NAME (str): Name of the column.
233
+ - MOST_COMMON_VALUES (List[str]): Most common values in the column (optional).
234
+ - MOST_COMMON_FREQUENCIES (List[str]): Frequencies of the most common values in the column (optional).
235
+ - NULL_PERCENTAGE: Percentage of NULL values in the column (optional).
236
+ - MINIMUM_VALUE (str): Minimum value in the column (optional).
237
+ - MAXIMUM_VALUE (str): Maximum value in the column (optional).
238
+ - DISTINCT_VALUES_COUNT (int): Count of distinct values in the column (optional).
239
+ """
240
+ pass
172
241
 
173
242
  def meta_get_primary_keys(self, table_names: Optional[List[str]]) -> HandlerResponse:
174
243
  """
@@ -197,6 +266,21 @@ class MetaDatabaseHandler(DatabaseHandler):
197
266
  """
198
267
  raise NotImplementedError()
199
268
 
269
+ def meta_get_handler_info(self, **kwargs) -> str:
270
+ """
271
+ Retrieves information about the design and implementation of the database handler.
272
+ This should include, but not be limited to, the following:
273
+ - The type of SQL queries and operations that the handler supports.
274
+ - etc.
275
+
276
+ Args:
277
+ kwargs: Additional keyword arguments that may be used in generating the handler information.
278
+
279
+ Returns:
280
+ str: A string containing information about the database handler's design and implementation.
281
+ """
282
+ pass
283
+
200
284
 
201
285
  class ArgProbeMixin:
202
286
  """
@@ -10,6 +10,7 @@ import pandas as pd
10
10
  from mindsdb.interfaces.storage import db
11
11
  from mindsdb.interfaces.storage.db import Predictor
12
12
  from mindsdb.utilities.context import context as ctx
13
+ from mindsdb.interfaces.data_catalog.data_catalog_loader import DataCatalogLoader
13
14
  from mindsdb.interfaces.database.projects import ProjectController
14
15
  from mindsdb.interfaces.model.functions import PredictorRecordNotFound
15
16
  from mindsdb.interfaces.model.model_controller import ModelController
@@ -52,7 +53,7 @@ class AgentsController:
52
53
  """
53
54
  Checks if a model exists, and gets the provider of the model.
54
55
 
55
- The provider is either the provider of the model, or the provider given as an argument.
56
+ The provider is either the provider of the model or the provider given as an argument.
56
57
 
57
58
  Parameters:
58
59
  model_name (str): The name of the model
@@ -325,12 +326,37 @@ class AgentsController:
325
326
  db.session.rollback()
326
327
  raise ValueError(f"Skill with name does not exist: {skill_name}")
327
328
 
328
- # Add table restrictions if this is a text2sql skill
329
- if existing_skill.type == "sql" and (include_tables or ignore_tables):
330
- parameters["tables"] = include_tables or ignore_tables
331
-
332
- # Add knowledge base restrictions if this is a text2sql skill
333
329
  if existing_skill.type == "sql":
330
+ # Run Data Catalog loader if enabled
331
+ if config.get("data_catalog", {}).get("enabled", False):
332
+ if include_tables:
333
+ database_table_map = {}
334
+ for table in include_tables:
335
+ parts = table.split(".", 1)
336
+ database_table_map[parts[0]] = database_table_map.get(parts[0], []) + [parts[1]]
337
+
338
+ for database_name, table_names in database_table_map.items():
339
+ data_catalog_loader = DataCatalogLoader(
340
+ database_name=database_name, table_names=table_names
341
+ )
342
+ data_catalog_loader.load_metadata()
343
+
344
+ elif "database" in existing_skill.params:
345
+ data_catalog_loader = DataCatalogLoader(
346
+ database_name=existing_skill.params["database"],
347
+ table_names=parameters["tables"] if "tables" in parameters else None,
348
+ )
349
+ data_catalog_loader.load_metadata()
350
+
351
+ else:
352
+ raise ValueError(
353
+ "Data Catalog loading is enabled, but the provided parameters are insufficient to load metadata. "
354
+ )
355
+
356
+ # Add table restrictions if this is a text2sql skill
357
+ if include_tables or ignore_tables:
358
+ parameters["tables"] = include_tables or ignore_tables
359
+
334
360
  # Pass database parameter if provided
335
361
  if database and "database" not in parameters:
336
362
  parameters["database"] = database
@@ -221,6 +221,7 @@ You are an AI assistant powered by MindsDB. When answering questions, follow the
221
221
  2. For questions about database tables and their contents:
222
222
  - Use the sql_db_query to query the tables directly
223
223
  - You can join tables if needed to get comprehensive information
224
+ - You are running on a federated query engine, so joins across multiple databases are allowed and supported
224
225
  - **Important Rule for SQL Queries:** If you formulate an SQL query as part of answering a user's question, you *must* then use the `sql_db_query` tool to execute that query and get its results. The SQL query string itself is NOT the final answer to the user unless the user has specifically asked for the query. Your final AI response should be based on the *results* obtained from executing the query.
225
226
 
226
227
 
@@ -1,7 +1,8 @@
1
1
  """
2
- Wrapper around MindsDB's executor and integration controller following the implementation of the original
3
- langchain.sql_database.SQLDatabase class to partly replicate its behavior.
2
+ Wrapper around MindsDB's executor and integration controller following the implementation of the original
3
+ langchain.sql_database.SQLDatabase class to partly replicate its behavior.
4
4
  """
5
+
5
6
  import traceback
6
7
  from typing import Any, Iterable, List, Optional
7
8
 
@@ -13,26 +14,25 @@ logger = log.getLogger(__name__)
13
14
 
14
15
 
15
16
  def extract_essential(input: str) -> str:
16
- """ Sometimes LLM include to input unnecessary data. We can't control stochastic nature of LLM, so we need to
17
- 'clean' input somehow. LLM prompt contains instruction to enclose input between '$START$' and '$STOP$'.
17
+ """Sometimes LLM include to input unnecessary data. We can't control stochastic nature of LLM, so we need to
18
+ 'clean' input somehow. LLM prompt contains instruction to enclose input between '$START$' and '$STOP$'.
18
19
  """
19
- if '$START$' in input:
20
- input = input.partition('$START$')[-1]
21
- if '$STOP$' in input:
22
- input = input.partition('$STOP$')[0]
23
- return input.strip(' ')
20
+ if "$START$" in input:
21
+ input = input.partition("$START$")[-1]
22
+ if "$STOP$" in input:
23
+ input = input.partition("$STOP$")[0]
24
+ return input.strip(" ")
24
25
 
25
26
 
26
27
  class MindsDBSQL(SQLDatabase):
27
28
  @staticmethod
28
- def custom_init(
29
- sql_agent: 'SQLAgent'
30
- ) -> 'MindsDBSQL':
29
+ def custom_init(sql_agent: "SQLAgent") -> "MindsDBSQL":
31
30
  instance = MindsDBSQL()
32
31
  instance._sql_agent = sql_agent
33
32
  return instance
34
33
 
35
34
  """ Can't modify signature, as LangChain does a Pydantic check."""
35
+
36
36
  def __init__(
37
37
  self,
38
38
  engine: Optional[Any] = None,
@@ -51,7 +51,7 @@ class MindsDBSQL(SQLDatabase):
51
51
 
52
52
  @property
53
53
  def dialect(self) -> str:
54
- return 'mindsdb'
54
+ return "mindsdb"
55
55
 
56
56
  @property
57
57
  def table_info(self) -> str:
@@ -93,23 +93,26 @@ class MindsDBSQL(SQLDatabase):
93
93
  command = extract_essential(command)
94
94
 
95
95
  try:
96
-
97
96
  # Log the query for debugging
98
97
  logger.info(f"Executing SQL query: {command}")
99
98
 
99
+ # Removing backticks causes in query execution.
100
100
  # remove backticks
101
- command = command.replace('`', '')
101
+ # command = command.replace('`', '')
102
102
 
103
103
  # Parse the SQL string to an AST object first
104
104
  from mindsdb_sql_parser import parse_sql
105
+
105
106
  ast_query = parse_sql(command)
106
107
 
107
108
  # Now execute the parsed query
108
- result = self._sql_agent.skill_tool.get_command_executor().execute_command(ast_query, database_name="mindsdb")
109
+ result = self._sql_agent.skill_tool.get_command_executor().execute_command(
110
+ ast_query, database_name="mindsdb"
111
+ )
109
112
 
110
113
  # Convert ExecuteAnswer to a DataFrame for easier manipulation
111
114
  df = None
112
- if hasattr(result, 'data') and hasattr(result.data, 'data_frame'):
115
+ if hasattr(result, "data") and hasattr(result.data, "data_frame"):
113
116
  df = result.data.data_frame
114
117
  else:
115
118
  # Fallback to to_df when data_frame attr not available
@@ -130,7 +133,9 @@ class MindsDBSQL(SQLDatabase):
130
133
  except Exception as e:
131
134
  logger.error(f"Error executing SQL command: {str(e)}\n{traceback.format_exc()}")
132
135
  # If this is a knowledge base query, provide a more helpful error message
133
- if "knowledge_base" in command.lower() or any(kb in command for kb in self._sql_agent.get_usable_knowledge_base_names()):
136
+ if "knowledge_base" in command.lower() or any(
137
+ kb in command for kb in self._sql_agent.get_usable_knowledge_base_names()
138
+ ):
134
139
  return f"Error executing knowledge base query: {str(e)}. Please check that the knowledge base exists and your query syntax is correct."
135
140
  return f"Error: {str(e)}"
136
141
 
@@ -60,15 +60,20 @@ class DataCatalogLoader(BaseDataCatalog):
60
60
  """
61
61
  self.logger.info(f"Loading tables for {self.database_name}")
62
62
  response = self.data_handler.meta_get_tables(self.table_names)
63
- if response.resp_type != RESPONSE_TYPE.TABLE:
63
+ if response.resp_type == RESPONSE_TYPE.ERROR:
64
64
  self.logger.error(f"Failed to load tables for {self.database_name}: {response.error_message}")
65
65
  return []
66
+ elif response.resp_type == RESPONSE_TYPE.OK:
67
+ self.logger.error(f"No tables found for {self.database_name}.")
68
+ return []
66
69
 
67
70
  df = response.data_frame
68
71
  if df.empty:
69
72
  self.logger.info(f"No tables to add for {self.database_name}.")
70
73
  return []
71
74
 
75
+ df.columns = df.columns.str.lower()
76
+
72
77
  # Filter out tables that are already loaded in the data catalog
73
78
  if loaded_table_names:
74
79
  df = df[~df["table_name"].isin(loaded_table_names)]
@@ -77,7 +82,6 @@ class DataCatalogLoader(BaseDataCatalog):
77
82
  self.logger.info(f"No new tables to load for {self.database_name}.")
78
83
  return []
79
84
 
80
- df.columns = df.columns.str.lower()
81
85
  tables = self._add_table_metadata(df)
82
86
  self.logger.info(f"Tables loaded for {self.database_name}.")
83
87
  return tables
@@ -117,9 +121,12 @@ class DataCatalogLoader(BaseDataCatalog):
117
121
  """
118
122
  self.logger.info(f"Loading columns for {self.database_name}")
119
123
  response = self.data_handler.meta_get_columns(self.table_names)
120
- if response.resp_type != RESPONSE_TYPE.TABLE:
124
+ if response.resp_type == RESPONSE_TYPE.ERROR:
121
125
  self.logger.error(f"Failed to load columns for {self.database_name}: {response.error_message}")
122
126
  return []
127
+ elif response.resp_type == RESPONSE_TYPE.OK:
128
+ self.logger.error(f"No columns found for {self.database_name}.")
129
+ return []
123
130
 
124
131
  df = response.data_frame
125
132
  if df.empty:
@@ -162,9 +169,12 @@ class DataCatalogLoader(BaseDataCatalog):
162
169
  """
163
170
  self.logger.info(f"Loading column statistics for {self.database_name}")
164
171
  response = self.data_handler.meta_get_column_statistics(self.table_names)
165
- if response.resp_type != RESPONSE_TYPE.TABLE:
172
+ if response.resp_type == RESPONSE_TYPE.ERROR:
166
173
  self.logger.error(f"Failed to load column statistics for {self.database_name}: {response.error_message}")
167
174
  return
175
+ elif response.resp_type == RESPONSE_TYPE.OK:
176
+ self.logger.error(f"No column statistics found for {self.database_name}.")
177
+ return
168
178
 
169
179
  df = response.data_frame
170
180
  if df.empty:
@@ -222,9 +232,12 @@ class DataCatalogLoader(BaseDataCatalog):
222
232
  """
223
233
  self.logger.info(f"Loading primary keys for {self.database_name}")
224
234
  response = self.data_handler.meta_get_primary_keys(self.table_names)
225
- if response.resp_type != RESPONSE_TYPE.TABLE:
235
+ if response.resp_type == RESPONSE_TYPE.ERROR:
226
236
  self.logger.error(f"Failed to load primary keys for {self.database_name}: {response.error_message}")
227
237
  return
238
+ elif response.resp_type == RESPONSE_TYPE.OK:
239
+ self.logger.error(f"No primary keys found for {self.database_name}.")
240
+ return
228
241
 
229
242
  df = response.data_frame
230
243
  if df.empty:
@@ -271,9 +284,12 @@ class DataCatalogLoader(BaseDataCatalog):
271
284
  """
272
285
  self.logger.info(f"Loading foreign keys for {self.database_name}")
273
286
  response = self.data_handler.meta_get_foreign_keys(self.table_names)
274
- if response.resp_type != RESPONSE_TYPE.TABLE:
287
+ if response.resp_type == RESPONSE_TYPE.ERROR:
275
288
  self.logger.error(f"Failed to foreign keys for {self.database_name}: {response.error_message}")
276
289
  return
290
+ elif response.resp_type == RESPONSE_TYPE.OK:
291
+ self.logger.error(f"No foreign keys found for {self.database_name}.")
292
+ return
277
293
 
278
294
  df = response.data_frame
279
295
  if df.empty:
@@ -17,7 +17,11 @@ class DataCatalogReader(BaseDataCatalog):
17
17
  if not tables:
18
18
  self.logger.warning(f"No metadata found for database '{self.database_name}'")
19
19
  return f"No metadata found for database '{self.database_name}'"
20
+
20
21
  metadata_str = "Data Catalog: \n"
22
+ if hasattr(self.data_handler, "meta_get_handler_info"):
23
+ metadata_str += self.data_handler.meta_get_handler_info() + "\n\n"
24
+
21
25
  for table in tables:
22
26
  metadata_str += table.as_string() + "\n\n"
23
27
  return metadata_str
@@ -256,7 +256,9 @@ class IntegrationController:
256
256
 
257
257
  # Remove the integration metadata from the data catalog (if enabled).
258
258
  # TODO: Can this be handled via cascading delete in the database?
259
- if Config().get("data_catalog", {}).get("enabled", False):
259
+ if self.get_handler_meta(integration_record.engine).get("type") == HANDLER_TYPE.DATA and Config().get(
260
+ "data_catalog", {}
261
+ ).get("enabled", False):
260
262
  data_catalog_reader = DataCatalogLoader(database_name=name)
261
263
  data_catalog_reader.unload_metadata()
262
264
 
@@ -798,7 +800,7 @@ class IntegrationController:
798
800
  for item in code.body:
799
801
  if isinstance(item, ast.ClassDef):
800
802
  bases = [base.id for base in item.bases]
801
- if "APIHandler" in bases:
803
+ if "APIHandler" in bases or "MetaAPIHandler" in bases:
802
804
  return "api"
803
805
  return "sql"
804
806
 
@@ -1201,22 +1201,10 @@ class KnowledgeBaseController:
1201
1201
  project_names = {i.id: i.name for i in project_controller.get_list()}
1202
1202
 
1203
1203
  for record in query:
1204
- vector_database = record.vector_database
1205
- embedding_model = record.embedding_model
1204
+ kb = record.as_dict(with_secrets=self.session.show_secrets)
1205
+ kb["project_name"] = project_names[record.project_id]
1206
1206
 
1207
- data.append(
1208
- {
1209
- "id": record.id,
1210
- "name": record.name,
1211
- "project_id": record.project_id,
1212
- "project_name": project_names[record.project_id],
1213
- "embedding_model": embedding_model.name if embedding_model is not None else None,
1214
- "vector_database": None if vector_database is None else vector_database.name,
1215
- "vector_database_table": record.vector_database_table,
1216
- "query_id": record.query_id,
1217
- "params": record.params,
1218
- }
1219
- )
1207
+ data.append(kb)
1220
1208
 
1221
1209
  return data
1222
1210
 
@@ -492,8 +492,6 @@ class EvaluateDocID(EvaluateBase):
492
492
  total_questions = len(stats)
493
493
  total_found = sum([1 for stat in stats if stat["doc_found"]])
494
494
 
495
- total_accurately_retrieved = sum([1 for stat in stats if stat["doc_found"]])
496
-
497
495
  accurate_in_top_10 = sum([1 for stat in stats if stat["doc_found"] and stat["doc_position"] < 10])
498
496
 
499
497
  # calculate recall curve by position
@@ -512,7 +510,6 @@ class EvaluateDocID(EvaluateBase):
512
510
  return {
513
511
  "total": total_questions,
514
512
  "total_found": total_found,
515
- "retrieved_in_top_k": total_accurately_retrieved,
516
513
  "retrieved_in_top_10": accurate_in_top_10,
517
514
  "cumulative_recall": cumulative_recall,
518
515
  "avg_query_time": avg_query_time,