MindsDB 25.7.1.0__py3-none-any.whl → 25.7.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (38) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +54 -95
  3. mindsdb/api/a2a/agent.py +30 -206
  4. mindsdb/api/a2a/common/server/server.py +26 -27
  5. mindsdb/api/a2a/task_manager.py +93 -227
  6. mindsdb/api/a2a/utils.py +21 -0
  7. mindsdb/api/executor/command_executor.py +7 -2
  8. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
  9. mindsdb/api/executor/utilities/sql.py +97 -21
  10. mindsdb/api/http/namespaces/agents.py +127 -202
  11. mindsdb/api/http/namespaces/config.py +12 -1
  12. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +11 -1
  13. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
  14. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +94 -1
  15. mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
  16. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +4 -3
  17. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -3
  18. mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
  19. mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
  20. mindsdb/integrations/libs/keyword_search_base.py +41 -0
  21. mindsdb/integrations/libs/vectordatabase_handler.py +35 -14
  22. mindsdb/integrations/utilities/sql_utils.py +11 -0
  23. mindsdb/interfaces/agents/agents_controller.py +2 -2
  24. mindsdb/interfaces/data_catalog/data_catalog_loader.py +18 -4
  25. mindsdb/interfaces/database/projects.py +1 -3
  26. mindsdb/interfaces/functions/controller.py +54 -64
  27. mindsdb/interfaces/functions/to_markdown.py +47 -14
  28. mindsdb/interfaces/knowledge_base/controller.py +134 -35
  29. mindsdb/interfaces/knowledge_base/evaluate.py +53 -10
  30. mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
  31. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
  32. mindsdb/utilities/config.py +46 -39
  33. mindsdb/utilities/exception.py +11 -0
  34. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/METADATA +236 -236
  35. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/RECORD +38 -36
  36. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/WHEEL +0 -0
  37. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/licenses/LICENSE +0 -0
  38. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ from mindsdb_sql_parser import ast
7
7
  from mindsdb.integrations.utilities.handlers.query_utilities import (
8
8
  SELECTQueryParser,
9
9
  SELECTQueryExecutor,
10
- INSERTQueryParser
10
+ INSERTQueryParser,
11
11
  )
12
12
 
13
13
  import pandas as pd
@@ -66,9 +66,13 @@ class YoutubeCommentsTable(APITable):
66
66
  select_statement_executor = SELECTQueryExecutor(
67
67
  comments_df,
68
68
  selected_columns,
69
- [where_condition for where_condition in where_conditions if where_condition[1] not in ['video_id', 'channel_id']],
69
+ [
70
+ where_condition
71
+ for where_condition in where_conditions
72
+ if where_condition[1] not in ["video_id", "channel_id"]
73
+ ],
70
74
  order_by_conditions,
71
- result_limit if query.limit else None
75
+ result_limit if query.limit else None,
72
76
  )
73
77
 
74
78
  comments_df = select_statement_executor.execute_query()
@@ -98,50 +102,30 @@ class YoutubeCommentsTable(APITable):
98
102
  values_to_insert = insert_query_parser.parse_query()
99
103
 
100
104
  for value in values_to_insert:
101
- if not value.get('comment_id'):
102
- if not value.get('comment'):
105
+ if not value.get("comment_id"):
106
+ if not value.get("comment"):
103
107
  raise ValueError("comment is mandatory for inserting a top-level comment.")
104
108
  else:
105
- self.insert_comment(video_id=value['video_id'], text=value['comment'])
109
+ self.insert_comment(video_id=value["video_id"], text=value["comment"])
106
110
 
107
111
  else:
108
- if not value.get('reply'):
112
+ if not value.get("reply"):
109
113
  raise ValueError("reply is mandatory for inserting a reply.")
110
114
  else:
111
- self.insert_comment(comment_id=value['comment_id'], text=value['reply'])
115
+ self.insert_comment(comment_id=value["comment_id"], text=value["reply"])
112
116
 
113
117
  def insert_comment(self, text, video_id: str = None, comment_id: str = None):
114
118
  # if comment_id is provided, define the request body for a reply and insert it
115
119
  if comment_id:
116
- request_body = {
117
- 'snippet': {
118
- 'parentId': comment_id,
119
- 'textOriginal': text
120
- }
121
- }
120
+ request_body = {"snippet": {"parentId": comment_id, "textOriginal": text}}
122
121
 
123
- self.handler.connect().comments().insert(
124
- part='snippet',
125
- body=request_body
126
- ).execute()
122
+ self.handler.connect().comments().insert(part="snippet", body=request_body).execute()
127
123
 
128
124
  # else if video_id is provided, define the request body for a top-level comment and insert it
129
125
  elif video_id:
130
- request_body = {
131
- 'snippet': {
132
- 'topLevelComment': {
133
- 'snippet': {
134
- 'videoId': video_id,
135
- 'textOriginal': text
136
- }
137
- }
138
- }
139
- }
126
+ request_body = {"snippet": {"topLevelComment": {"snippet": {"videoId": video_id, "textOriginal": text}}}}
140
127
 
141
- self.handler.connect().commentThreads().insert(
142
- part='snippet',
143
- body=request_body
144
- ).execute()
128
+ self.handler.connect().commentThreads().insert(part="snippet", body=request_body).execute()
145
129
 
146
130
  def get_columns(self) -> List[str]:
147
131
  """Gets all columns to be returned in pandas DataFrame responses
@@ -150,7 +134,19 @@ class YoutubeCommentsTable(APITable):
150
134
  List[str]
151
135
  List of columns
152
136
  """
153
- return ['comment_id', 'channel_id', 'video_id', 'user_id', 'display_name', 'comment', "published_at", "updated_at", 'reply_user_id', 'reply_author', 'reply']
137
+ return [
138
+ "comment_id",
139
+ "channel_id",
140
+ "video_id",
141
+ "user_id",
142
+ "display_name",
143
+ "comment",
144
+ "published_at",
145
+ "updated_at",
146
+ "reply_user_id",
147
+ "reply_author",
148
+ "reply",
149
+ ]
154
150
 
155
151
  def get_comments(self, video_id: str, channel_id: str):
156
152
  """Pulls all the records from the given youtube api end point and returns it select()
@@ -166,7 +162,12 @@ class YoutubeCommentsTable(APITable):
166
162
  resource = (
167
163
  self.handler.connect()
168
164
  .commentThreads()
169
- .list(part="snippet, replies", videoId=video_id, allThreadsRelatedToChannelId=channel_id, textFormat="plainText")
165
+ .list(
166
+ part="snippet, replies",
167
+ videoId=video_id,
168
+ allThreadsRelatedToChannelId=channel_id,
169
+ textFormat="plainText",
170
+ )
170
171
  )
171
172
 
172
173
  data = []
@@ -175,7 +176,7 @@ class YoutubeCommentsTable(APITable):
175
176
 
176
177
  for comment in comments["items"]:
177
178
  replies = []
178
- if 'replies' in comment:
179
+ if "replies" in comment:
179
180
  for reply in comment["replies"]["comments"]:
180
181
  replies.append(
181
182
  {
@@ -222,18 +223,51 @@ class YoutubeCommentsTable(APITable):
222
223
  else:
223
224
  break
224
225
 
225
- youtube_comments_df = pd.json_normalize(data, 'replies', ['comment_id', 'channel_id', 'video_id', 'user_id', 'display_name', 'comment', "published_at", "updated_at"], record_prefix='replies.')
226
- youtube_comments_df = youtube_comments_df.rename(columns={'replies.user_id': 'reply_user_id', 'replies.reply_author': 'reply_author', 'replies.reply': 'reply'})
226
+ youtube_comments_df = pd.json_normalize(
227
+ data,
228
+ "replies",
229
+ [
230
+ "comment_id",
231
+ "channel_id",
232
+ "video_id",
233
+ "user_id",
234
+ "display_name",
235
+ "comment",
236
+ "published_at",
237
+ "updated_at",
238
+ ],
239
+ record_prefix="replies.",
240
+ )
241
+ youtube_comments_df = youtube_comments_df.rename(
242
+ columns={
243
+ "replies.user_id": "reply_user_id",
244
+ "replies.reply_author": "reply_author",
245
+ "replies.reply": "reply",
246
+ }
247
+ )
227
248
 
228
249
  # check if DataFrame is empty
229
250
  if youtube_comments_df.empty:
230
251
  return youtube_comments_df
231
252
  else:
232
- return youtube_comments_df[['comment_id', 'channel_id', 'video_id', 'user_id', 'display_name', 'comment', "published_at", "updated_at", 'reply_user_id', 'reply_author', 'reply']]
253
+ return youtube_comments_df[
254
+ [
255
+ "comment_id",
256
+ "channel_id",
257
+ "video_id",
258
+ "user_id",
259
+ "display_name",
260
+ "comment",
261
+ "published_at",
262
+ "updated_at",
263
+ "reply_user_id",
264
+ "reply_author",
265
+ "reply",
266
+ ]
267
+ ]
233
268
 
234
269
 
235
270
  class YoutubeChannelsTable(APITable):
236
-
237
271
  """Youtube Channel Info by channel id Table implementation"""
238
272
 
239
273
  def select(self, query: ast.Select) -> pd.DataFrame:
@@ -263,9 +297,9 @@ class YoutubeChannelsTable(APITable):
263
297
  select_statement_executor = SELECTQueryExecutor(
264
298
  channel_df,
265
299
  selected_columns,
266
- [where_condition for where_condition in where_conditions if where_condition[1] == 'channel_id'],
300
+ [where_condition for where_condition in where_conditions if where_condition[1] == "channel_id"],
267
301
  order_by_conditions,
268
- result_limit if query.limit else None
302
+ result_limit if query.limit else None,
269
303
  )
270
304
 
271
305
  channel_df = select_statement_executor.execute_query()
@@ -304,7 +338,6 @@ class YoutubeChannelsTable(APITable):
304
338
 
305
339
 
306
340
  class YoutubeVideosTable(APITable):
307
-
308
341
  """Youtube Video info by video id Table implementation"""
309
342
 
310
343
  def select(self, query: ast.Select) -> pd.DataFrame:
@@ -317,7 +350,7 @@ class YoutubeVideosTable(APITable):
317
350
  result_limit,
318
351
  ) = select_statement_parser.parse_query()
319
352
 
320
- video_id, channel_id = None, None
353
+ video_id, channel_id, search_query = None, None, None
321
354
  for op, arg1, arg2 in where_conditions:
322
355
  if arg1 == "video_id":
323
356
  if op == "=":
@@ -331,38 +364,126 @@ class YoutubeVideosTable(APITable):
331
364
  else:
332
365
  raise NotImplementedError("Only '=' operator is supported for channel_id column.")
333
366
 
334
- if not video_id and not channel_id:
335
- raise ValueError("Either video_id or channel_id has to be present in where clause.")
367
+ elif arg1 == "query":
368
+ if op == "=":
369
+ search_query = arg2
370
+ else:
371
+ raise NotImplementedError("Only '=' operator is supported for query column.")
372
+
373
+ if not video_id and not channel_id and not search_query:
374
+ raise ValueError("At least one of video_id, channel_id, or query must be present in the WHERE clause.")
336
375
 
337
376
  if video_id:
338
377
  video_df = self.get_videos_by_video_ids([video_id])
378
+ elif channel_id and search_query:
379
+ video_df = self.get_videos_by_search_query_in_channel(search_query, channel_id, result_limit)
380
+ elif channel_id:
381
+ video_df = self.get_videos_by_channel_id(channel_id, result_limit)
339
382
  else:
340
- video_df = self.get_videos_by_channel_id(channel_id)
383
+ video_df = self.get_videos_by_search_query(search_query, result_limit)
341
384
 
342
385
  select_statement_executor = SELECTQueryExecutor(
343
386
  video_df,
344
387
  selected_columns,
345
- [where_condition for where_condition in where_conditions if where_condition[1] not in ['video_id', 'channel_id']],
388
+ [
389
+ where_condition
390
+ for where_condition in where_conditions
391
+ if where_condition[1] not in ["video_id", "channel_id", "query"]
392
+ ],
346
393
  order_by_conditions,
347
- result_limit if query.limit else None
394
+ result_limit if query.limit else None,
348
395
  )
349
396
 
350
397
  video_df = select_statement_executor.execute_query()
351
398
 
352
399
  return video_df
353
400
 
354
- def get_videos_by_channel_id(self, channel_id):
401
+ def get_videos_by_search_query(self, search_query, limit=10):
355
402
  video_ids = []
356
403
  resource = (
357
404
  self.handler.connect()
358
405
  .search()
359
- .list(part="snippet", channelId=channel_id, type="video")
406
+ .list(part="snippet", q=search_query, type="video", maxResults=min(50, limit))
360
407
  )
361
- while resource:
408
+ total_fetched = 0
409
+
410
+ while resource and total_fetched < limit:
411
+ response = resource.execute()
412
+ for item in response["items"]:
413
+ video_ids.append(item["id"]["videoId"])
414
+ total_fetched += 1
415
+ if total_fetched >= limit:
416
+ break
417
+
418
+ if "nextPageToken" in response and total_fetched < limit:
419
+ resource = (
420
+ self.handler.connect()
421
+ .search()
422
+ .list(
423
+ part="snippet",
424
+ q=search_query,
425
+ type="video",
426
+ maxResults=min(50, limit - total_fetched),
427
+ pageToken=response["nextPageToken"],
428
+ )
429
+ )
430
+ else:
431
+ break
432
+
433
+ return self.get_videos_by_video_ids(video_ids)
434
+
435
+ def get_videos_by_search_query_in_channel(self, search_query, channel_id, limit=10):
436
+ """Search for videos within a specific channel"""
437
+ video_ids = []
438
+ resource = (
439
+ self.handler.connect()
440
+ .search()
441
+ .list(part="snippet", q=search_query, channelId=channel_id, type="video", maxResults=min(50, limit))
442
+ )
443
+ total_fetched = 0
444
+
445
+ while resource and total_fetched < limit:
446
+ response = resource.execute()
447
+ for item in response["items"]:
448
+ video_ids.append(item["id"]["videoId"])
449
+ total_fetched += 1
450
+ if total_fetched >= limit:
451
+ break
452
+
453
+ if "nextPageToken" in response and total_fetched < limit:
454
+ resource = (
455
+ self.handler.connect()
456
+ .search()
457
+ .list(
458
+ part="snippet",
459
+ q=search_query,
460
+ channelId=channel_id,
461
+ type="video",
462
+ maxResults=min(50, limit - total_fetched),
463
+ pageToken=response["nextPageToken"],
464
+ )
465
+ )
466
+ else:
467
+ break
468
+
469
+ return self.get_videos_by_video_ids(video_ids)
470
+
471
+ def get_videos_by_channel_id(self, channel_id, limit=10):
472
+ video_ids = []
473
+ resource = (
474
+ self.handler.connect()
475
+ .search()
476
+ .list(part="snippet", channelId=channel_id, type="video", maxResults=min(50, limit))
477
+ )
478
+ total_fetched = 0
479
+ while resource and total_fetched < limit:
362
480
  response = resource.execute()
363
481
  for item in response["items"]:
364
482
  video_ids.append(item["id"]["videoId"])
365
- if "nextPageToken" in response:
483
+ total_fetched += 1
484
+ if total_fetched >= limit:
485
+ break
486
+ if "nextPageToken" in response and total_fetched < limit:
366
487
  resource = (
367
488
  self.handler.connect()
368
489
  .search()
@@ -370,6 +491,7 @@ class YoutubeVideosTable(APITable):
370
491
  part="snippet",
371
492
  channelId=channel_id,
372
493
  type="video",
494
+ maxResults=min(50, limit - total_fetched),
373
495
  pageToken=response["nextPageToken"],
374
496
  )
375
497
  )
@@ -388,7 +510,13 @@ class YoutubeVideosTable(APITable):
388
510
  # loop over 50 video ids at a time
389
511
  # an invalid request error is caused otherwise
390
512
  for i in range(0, len(video_ids), 50):
391
- resource = self.handler.connect().videos().list(part="statistics,snippet,contentDetails", id=",".join(video_ids[i:i + 50])).execute()
513
+ resource = (
514
+ self.handler.connect()
515
+ .videos()
516
+ .list(part="statistics,snippet,contentDetails", id=",".join(video_ids[i : i + 50]))
517
+ .execute()
518
+ )
519
+
392
520
  for item in resource["items"]:
393
521
  data.append(
394
522
  {
@@ -415,7 +543,7 @@ class YoutubeVideosTable(APITable):
415
543
  return json_formatted_transcript
416
544
 
417
545
  except Exception as e:
418
- logger.error(f"Encountered an error while fetching transcripts for video ${video_id}: ${e}"),
546
+ (logger.error(f"Encountered an error while fetching transcripts for video ${video_id}: ${e}"),)
419
547
  return "Transcript not available for this video"
420
548
 
421
549
  def parse_duration(self, video_id, duration):
@@ -428,7 +556,7 @@ class YoutubeVideosTable(APITable):
428
556
 
429
557
  return duration_str.strip(":")
430
558
  except Exception as e:
431
- logger.error(f"Encountered an error while parsing duration for video ${video_id}: ${e}"),
559
+ (logger.error(f"Encountered an error while parsing duration for video ${video_id}: ${e}"),)
432
560
  return "Duration not available for this video"
433
561
 
434
562
  def get_columns(self) -> List[str]:
@@ -0,0 +1,41 @@
1
+ from mindsdb_sql_parser.ast import Select
2
+ from typing import List
3
+ import pandas as pd
4
+
5
+ from mindsdb.integrations.utilities.sql_utils import FilterCondition, KeywordSearchArgs
6
+
7
+
8
+ class KeywordSearchBase:
9
+ """
10
+ Base class for keyword search integrations.
11
+ This class provides a common interface for keyword search functionality.
12
+ """
13
+
14
+ def __init__(self, *args, **kwargs):
15
+ pass
16
+
17
+ def dispatch_keyword_select(
18
+ self, query: Select, conditions: List[FilterCondition] = None, keyword_search_args: KeywordSearchArgs = None
19
+ ):
20
+ """Dispatches a keyword search select query to the appropriate method."""
21
+ raise NotImplementedError()
22
+
23
+ def keyword_select(
24
+ self,
25
+ table_name: str,
26
+ columns: List[str] = None,
27
+ conditions: List[FilterCondition] = None,
28
+ offset: int = None,
29
+ limit: int = None,
30
+ ) -> pd.DataFrame:
31
+ """Select data from table
32
+
33
+ Args:
34
+ table_name (str): table name
35
+ columns (List[str]): columns to select
36
+ conditions (List[FilterCondition]): conditions to select
37
+
38
+ Returns:
39
+ HandlerResponse
40
+ """
41
+ raise NotImplementedError()
@@ -21,7 +21,7 @@ from mindsdb_sql_parser.ast.base import ASTNode
21
21
 
22
22
  from mindsdb.integrations.libs.response import RESPONSE_TYPE, HandlerResponse
23
23
  from mindsdb.utilities import log
24
- from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
24
+ from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
25
25
 
26
26
  from mindsdb.integrations.utilities.query_traversal import query_traversal
27
27
  from .base import BaseHandler
@@ -372,44 +372,65 @@ class VectorStoreHandler(BaseHandler):
372
372
  return self.delete(table_name, conditions=conditions)
373
373
 
374
374
  def dispatch_select(
375
- self, query: Select, conditions: List[FilterCondition] = None, allowed_metadata_columns: List[str] = None
375
+ self,
376
+ query: Select,
377
+ conditions: Optional[List[FilterCondition]] = None,
378
+ allowed_metadata_columns: List[str] = None,
379
+ keyword_search_args: Optional[KeywordSearchArgs] = None,
376
380
  ):
377
381
  """
378
- Dispatch select query to the appropriate method.
382
+ Dispatches a select query to the appropriate method, handling both
383
+ standard selections and keyword searches based on the provided arguments.
379
384
  """
380
- # parse key arguments
385
+ # 1. Parse common query arguments
381
386
  table_name = query.from_table.parts[-1]
382
- # if targets are star, select all columns
387
+
388
+ # If targets are a star (*), select all schema columns
383
389
  if isinstance(query.targets[0], Star):
384
390
  columns = [col["name"] for col in self.SCHEMA]
385
391
  else:
386
392
  columns = [col.parts[-1] for col in query.targets]
387
393
 
394
+ # 2. Validate columns
388
395
  if not self._is_columns_allowed(columns):
389
- raise Exception(f"Columns {columns} not allowed.Allowed columns are {[col['name'] for col in self.SCHEMA]}")
396
+ allowed_cols = [col["name"] for col in self.SCHEMA]
397
+ raise Exception(f"Columns {columns} not allowed. Allowed columns are {allowed_cols}")
390
398
 
391
- # check if columns are allowed
399
+ # 3. Extract and process conditions
392
400
  if conditions is None:
393
401
  where_statement = query.where
394
402
  conditions = self.extract_conditions(where_statement)
395
403
  self._convert_metadata_filters(conditions, allowed_metadata_columns=allowed_metadata_columns)
396
404
 
397
- # get offset and limit
405
+ # 4. Get offset and limit
398
406
  offset = query.offset.value if query.offset is not None else None
399
407
  limit = query.limit.value if query.limit is not None else None
400
408
 
401
- # dispatch select
402
- try:
403
- return self.select(
409
+ # 5. Conditionally dispatch to the correct select method
410
+ if keyword_search_args:
411
+ # It's a keyword search
412
+ return self.keyword_select(
404
413
  table_name,
405
414
  columns=columns,
406
415
  conditions=conditions,
407
416
  offset=offset,
408
417
  limit=limit,
418
+ keyword_search_args=keyword_search_args,
409
419
  )
410
- except Exception as e:
411
- handler_engine = self.__class__.name
412
- raise VectorHandlerException(f"Error in {handler_engine} database: {e}")
420
+ else:
421
+ # It's a standard select
422
+ try:
423
+ return self.select(
424
+ table_name,
425
+ columns=columns,
426
+ conditions=conditions,
427
+ offset=offset,
428
+ limit=limit,
429
+ )
430
+
431
+ except Exception as e:
432
+ handler_engine = self.__class__.name
433
+ raise VectorHandlerException(f"Error in {handler_engine} database: {e}")
413
434
 
414
435
  def _dispatch(self, query: ASTNode) -> HandlerResponse:
415
436
  """
@@ -60,6 +60,17 @@ class FilterCondition:
60
60
  """
61
61
 
62
62
 
63
+ class KeywordSearchArgs:
64
+ def __init__(self, column: str, query: str):
65
+ """
66
+ Args:
67
+ column: The column to search in.
68
+ query: The search query string.
69
+ """
70
+ self.column = column
71
+ self.query = query
72
+
73
+
63
74
  class SortColumn:
64
75
  def __init__(self, column: str, ascending: bool = True):
65
76
  self.column = column
@@ -180,7 +180,7 @@ class AgentsController:
180
180
  agent (db.Agents): The created agent
181
181
 
182
182
  Raises:
183
- ValueError: Agent with given name already exists, or skill/model with given name does not exist.
183
+ EntityExistsError: Agent with given name already exists, or skill/model with given name does not exist.
184
184
  """
185
185
  if project_name is None:
186
186
  project_name = default_project
@@ -189,7 +189,7 @@ class AgentsController:
189
189
  agent = self.get_agent(name, project_name)
190
190
 
191
191
  if agent is not None:
192
- raise ValueError(f"Agent with name already exists: {name}")
192
+ raise EntityExistsError("Agent already exists", name)
193
193
 
194
194
  # No need to copy params since we're not preserving the original reference
195
195
  params = params or {}
@@ -1,7 +1,7 @@
1
1
  from typing import List, Union
2
-
3
2
  import pandas as pd
4
-
3
+ import json
4
+ import datetime
5
5
  from mindsdb.integrations.libs.response import RESPONSE_TYPE
6
6
  from mindsdb.interfaces.data_catalog.base_data_catalog import BaseDataCatalog
7
7
  from mindsdb.interfaces.storage import db
@@ -204,6 +204,8 @@ class DataCatalogLoader(BaseDataCatalog):
204
204
  # Convert the distinct_values_count to an integer if it is not NaN, otherwise set it to None.
205
205
  val = row.get("distinct_values_count")
206
206
  distinct_values_count = int(val) if pd.notna(val) else None
207
+ min_val = row.get("minimum_value")
208
+ max_val = row.get("maximum_value")
207
209
 
208
210
  # Convert the most_common_frequencies to a list of strings.
209
211
  most_common_frequencies = [str(val) for val in row.get("most_common_frequencies") or []]
@@ -214,8 +216,8 @@ class DataCatalogLoader(BaseDataCatalog):
214
216
  most_common_frequencies=most_common_frequencies,
215
217
  null_percentage=row.get("null_percentage"),
216
218
  distinct_values_count=distinct_values_count,
217
- minimum_value=row.get("minimum_value"),
218
- maximum_value=row.get("maximum_value"),
219
+ minimum_value=self.to_str(min_val),
220
+ maximum_value=self.to_str(max_val),
219
221
  )
220
222
  column_statistics.append(record)
221
223
 
@@ -373,3 +375,15 @@ class DataCatalogLoader(BaseDataCatalog):
373
375
  db.session.delete(table)
374
376
  db.session.commit()
375
377
  self.logger.info(f"Metadata for {self.database_name} removed successfully.")
378
+
379
+ def to_str(self, val) -> str:
380
+ """
381
+ Convert a value to a string.
382
+ """
383
+ if val is None:
384
+ return None
385
+ if isinstance(val, (datetime.datetime, datetime.date)):
386
+ return val.isoformat()
387
+ if isinstance(val, (list, dict, set, tuple)):
388
+ return json.dumps(val, default=str)
389
+ return str(val)
@@ -362,9 +362,7 @@ class Project:
362
362
 
363
363
  columns = [ASSISTANT_COLUMN, USER_COLUMN]
364
364
  case "KNOWLEDGE_BASE":
365
- from mindsdb.interfaces.knowledge_base.controller import KB_TO_VECTORDB_COLUMNS
366
-
367
- columns = list(KB_TO_VECTORDB_COLUMNS.keys()) + ["metadata", "relevance", "distance"]
365
+ columns = ["id", "chunk_id", "chunk_content", "metadata", "relevance", "distance"]
368
366
  case "TABLE":
369
367
  # like 'mindsdb.models'
370
368
  pass