MindsDB 25.7.1.0__py3-none-any.whl → 25.7.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +54 -95
- mindsdb/api/a2a/agent.py +30 -206
- mindsdb/api/a2a/common/server/server.py +26 -27
- mindsdb/api/a2a/task_manager.py +93 -227
- mindsdb/api/a2a/utils.py +21 -0
- mindsdb/api/executor/command_executor.py +7 -2
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
- mindsdb/api/executor/utilities/sql.py +97 -21
- mindsdb/api/http/namespaces/agents.py +127 -202
- mindsdb/api/http/namespaces/config.py +12 -1
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +11 -1
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +94 -1
- mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +4 -3
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -3
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
- mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
- mindsdb/integrations/libs/keyword_search_base.py +41 -0
- mindsdb/integrations/libs/vectordatabase_handler.py +35 -14
- mindsdb/integrations/utilities/sql_utils.py +11 -0
- mindsdb/interfaces/agents/agents_controller.py +2 -2
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +18 -4
- mindsdb/interfaces/database/projects.py +1 -3
- mindsdb/interfaces/functions/controller.py +54 -64
- mindsdb/interfaces/functions/to_markdown.py +47 -14
- mindsdb/interfaces/knowledge_base/controller.py +134 -35
- mindsdb/interfaces/knowledge_base/evaluate.py +53 -10
- mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
- mindsdb/utilities/config.py +46 -39
- mindsdb/utilities/exception.py +11 -0
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/METADATA +236 -236
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/RECORD +38 -36
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,7 @@ from mindsdb_sql_parser import ast
|
|
|
7
7
|
from mindsdb.integrations.utilities.handlers.query_utilities import (
|
|
8
8
|
SELECTQueryParser,
|
|
9
9
|
SELECTQueryExecutor,
|
|
10
|
-
INSERTQueryParser
|
|
10
|
+
INSERTQueryParser,
|
|
11
11
|
)
|
|
12
12
|
|
|
13
13
|
import pandas as pd
|
|
@@ -66,9 +66,13 @@ class YoutubeCommentsTable(APITable):
|
|
|
66
66
|
select_statement_executor = SELECTQueryExecutor(
|
|
67
67
|
comments_df,
|
|
68
68
|
selected_columns,
|
|
69
|
-
[
|
|
69
|
+
[
|
|
70
|
+
where_condition
|
|
71
|
+
for where_condition in where_conditions
|
|
72
|
+
if where_condition[1] not in ["video_id", "channel_id"]
|
|
73
|
+
],
|
|
70
74
|
order_by_conditions,
|
|
71
|
-
result_limit if query.limit else None
|
|
75
|
+
result_limit if query.limit else None,
|
|
72
76
|
)
|
|
73
77
|
|
|
74
78
|
comments_df = select_statement_executor.execute_query()
|
|
@@ -98,50 +102,30 @@ class YoutubeCommentsTable(APITable):
|
|
|
98
102
|
values_to_insert = insert_query_parser.parse_query()
|
|
99
103
|
|
|
100
104
|
for value in values_to_insert:
|
|
101
|
-
if not value.get(
|
|
102
|
-
if not value.get(
|
|
105
|
+
if not value.get("comment_id"):
|
|
106
|
+
if not value.get("comment"):
|
|
103
107
|
raise ValueError("comment is mandatory for inserting a top-level comment.")
|
|
104
108
|
else:
|
|
105
|
-
self.insert_comment(video_id=value[
|
|
109
|
+
self.insert_comment(video_id=value["video_id"], text=value["comment"])
|
|
106
110
|
|
|
107
111
|
else:
|
|
108
|
-
if not value.get(
|
|
112
|
+
if not value.get("reply"):
|
|
109
113
|
raise ValueError("reply is mandatory for inserting a reply.")
|
|
110
114
|
else:
|
|
111
|
-
self.insert_comment(comment_id=value[
|
|
115
|
+
self.insert_comment(comment_id=value["comment_id"], text=value["reply"])
|
|
112
116
|
|
|
113
117
|
def insert_comment(self, text, video_id: str = None, comment_id: str = None):
|
|
114
118
|
# if comment_id is provided, define the request body for a reply and insert it
|
|
115
119
|
if comment_id:
|
|
116
|
-
request_body = {
|
|
117
|
-
'snippet': {
|
|
118
|
-
'parentId': comment_id,
|
|
119
|
-
'textOriginal': text
|
|
120
|
-
}
|
|
121
|
-
}
|
|
120
|
+
request_body = {"snippet": {"parentId": comment_id, "textOriginal": text}}
|
|
122
121
|
|
|
123
|
-
self.handler.connect().comments().insert(
|
|
124
|
-
part='snippet',
|
|
125
|
-
body=request_body
|
|
126
|
-
).execute()
|
|
122
|
+
self.handler.connect().comments().insert(part="snippet", body=request_body).execute()
|
|
127
123
|
|
|
128
124
|
# else if video_id is provided, define the request body for a top-level comment and insert it
|
|
129
125
|
elif video_id:
|
|
130
|
-
request_body = {
|
|
131
|
-
'snippet': {
|
|
132
|
-
'topLevelComment': {
|
|
133
|
-
'snippet': {
|
|
134
|
-
'videoId': video_id,
|
|
135
|
-
'textOriginal': text
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
}
|
|
126
|
+
request_body = {"snippet": {"topLevelComment": {"snippet": {"videoId": video_id, "textOriginal": text}}}}
|
|
140
127
|
|
|
141
|
-
self.handler.connect().commentThreads().insert(
|
|
142
|
-
part='snippet',
|
|
143
|
-
body=request_body
|
|
144
|
-
).execute()
|
|
128
|
+
self.handler.connect().commentThreads().insert(part="snippet", body=request_body).execute()
|
|
145
129
|
|
|
146
130
|
def get_columns(self) -> List[str]:
|
|
147
131
|
"""Gets all columns to be returned in pandas DataFrame responses
|
|
@@ -150,7 +134,19 @@ class YoutubeCommentsTable(APITable):
|
|
|
150
134
|
List[str]
|
|
151
135
|
List of columns
|
|
152
136
|
"""
|
|
153
|
-
return [
|
|
137
|
+
return [
|
|
138
|
+
"comment_id",
|
|
139
|
+
"channel_id",
|
|
140
|
+
"video_id",
|
|
141
|
+
"user_id",
|
|
142
|
+
"display_name",
|
|
143
|
+
"comment",
|
|
144
|
+
"published_at",
|
|
145
|
+
"updated_at",
|
|
146
|
+
"reply_user_id",
|
|
147
|
+
"reply_author",
|
|
148
|
+
"reply",
|
|
149
|
+
]
|
|
154
150
|
|
|
155
151
|
def get_comments(self, video_id: str, channel_id: str):
|
|
156
152
|
"""Pulls all the records from the given youtube api end point and returns it select()
|
|
@@ -166,7 +162,12 @@ class YoutubeCommentsTable(APITable):
|
|
|
166
162
|
resource = (
|
|
167
163
|
self.handler.connect()
|
|
168
164
|
.commentThreads()
|
|
169
|
-
.list(
|
|
165
|
+
.list(
|
|
166
|
+
part="snippet, replies",
|
|
167
|
+
videoId=video_id,
|
|
168
|
+
allThreadsRelatedToChannelId=channel_id,
|
|
169
|
+
textFormat="plainText",
|
|
170
|
+
)
|
|
170
171
|
)
|
|
171
172
|
|
|
172
173
|
data = []
|
|
@@ -175,7 +176,7 @@ class YoutubeCommentsTable(APITable):
|
|
|
175
176
|
|
|
176
177
|
for comment in comments["items"]:
|
|
177
178
|
replies = []
|
|
178
|
-
if
|
|
179
|
+
if "replies" in comment:
|
|
179
180
|
for reply in comment["replies"]["comments"]:
|
|
180
181
|
replies.append(
|
|
181
182
|
{
|
|
@@ -222,18 +223,51 @@ class YoutubeCommentsTable(APITable):
|
|
|
222
223
|
else:
|
|
223
224
|
break
|
|
224
225
|
|
|
225
|
-
youtube_comments_df = pd.json_normalize(
|
|
226
|
-
|
|
226
|
+
youtube_comments_df = pd.json_normalize(
|
|
227
|
+
data,
|
|
228
|
+
"replies",
|
|
229
|
+
[
|
|
230
|
+
"comment_id",
|
|
231
|
+
"channel_id",
|
|
232
|
+
"video_id",
|
|
233
|
+
"user_id",
|
|
234
|
+
"display_name",
|
|
235
|
+
"comment",
|
|
236
|
+
"published_at",
|
|
237
|
+
"updated_at",
|
|
238
|
+
],
|
|
239
|
+
record_prefix="replies.",
|
|
240
|
+
)
|
|
241
|
+
youtube_comments_df = youtube_comments_df.rename(
|
|
242
|
+
columns={
|
|
243
|
+
"replies.user_id": "reply_user_id",
|
|
244
|
+
"replies.reply_author": "reply_author",
|
|
245
|
+
"replies.reply": "reply",
|
|
246
|
+
}
|
|
247
|
+
)
|
|
227
248
|
|
|
228
249
|
# check if DataFrame is empty
|
|
229
250
|
if youtube_comments_df.empty:
|
|
230
251
|
return youtube_comments_df
|
|
231
252
|
else:
|
|
232
|
-
return youtube_comments_df[
|
|
253
|
+
return youtube_comments_df[
|
|
254
|
+
[
|
|
255
|
+
"comment_id",
|
|
256
|
+
"channel_id",
|
|
257
|
+
"video_id",
|
|
258
|
+
"user_id",
|
|
259
|
+
"display_name",
|
|
260
|
+
"comment",
|
|
261
|
+
"published_at",
|
|
262
|
+
"updated_at",
|
|
263
|
+
"reply_user_id",
|
|
264
|
+
"reply_author",
|
|
265
|
+
"reply",
|
|
266
|
+
]
|
|
267
|
+
]
|
|
233
268
|
|
|
234
269
|
|
|
235
270
|
class YoutubeChannelsTable(APITable):
|
|
236
|
-
|
|
237
271
|
"""Youtube Channel Info by channel id Table implementation"""
|
|
238
272
|
|
|
239
273
|
def select(self, query: ast.Select) -> pd.DataFrame:
|
|
@@ -263,9 +297,9 @@ class YoutubeChannelsTable(APITable):
|
|
|
263
297
|
select_statement_executor = SELECTQueryExecutor(
|
|
264
298
|
channel_df,
|
|
265
299
|
selected_columns,
|
|
266
|
-
[where_condition for where_condition in where_conditions if where_condition[1] ==
|
|
300
|
+
[where_condition for where_condition in where_conditions if where_condition[1] == "channel_id"],
|
|
267
301
|
order_by_conditions,
|
|
268
|
-
result_limit if query.limit else None
|
|
302
|
+
result_limit if query.limit else None,
|
|
269
303
|
)
|
|
270
304
|
|
|
271
305
|
channel_df = select_statement_executor.execute_query()
|
|
@@ -304,7 +338,6 @@ class YoutubeChannelsTable(APITable):
|
|
|
304
338
|
|
|
305
339
|
|
|
306
340
|
class YoutubeVideosTable(APITable):
|
|
307
|
-
|
|
308
341
|
"""Youtube Video info by video id Table implementation"""
|
|
309
342
|
|
|
310
343
|
def select(self, query: ast.Select) -> pd.DataFrame:
|
|
@@ -317,7 +350,7 @@ class YoutubeVideosTable(APITable):
|
|
|
317
350
|
result_limit,
|
|
318
351
|
) = select_statement_parser.parse_query()
|
|
319
352
|
|
|
320
|
-
video_id, channel_id = None, None
|
|
353
|
+
video_id, channel_id, search_query = None, None, None
|
|
321
354
|
for op, arg1, arg2 in where_conditions:
|
|
322
355
|
if arg1 == "video_id":
|
|
323
356
|
if op == "=":
|
|
@@ -331,38 +364,126 @@ class YoutubeVideosTable(APITable):
|
|
|
331
364
|
else:
|
|
332
365
|
raise NotImplementedError("Only '=' operator is supported for channel_id column.")
|
|
333
366
|
|
|
334
|
-
|
|
335
|
-
|
|
367
|
+
elif arg1 == "query":
|
|
368
|
+
if op == "=":
|
|
369
|
+
search_query = arg2
|
|
370
|
+
else:
|
|
371
|
+
raise NotImplementedError("Only '=' operator is supported for query column.")
|
|
372
|
+
|
|
373
|
+
if not video_id and not channel_id and not search_query:
|
|
374
|
+
raise ValueError("At least one of video_id, channel_id, or query must be present in the WHERE clause.")
|
|
336
375
|
|
|
337
376
|
if video_id:
|
|
338
377
|
video_df = self.get_videos_by_video_ids([video_id])
|
|
378
|
+
elif channel_id and search_query:
|
|
379
|
+
video_df = self.get_videos_by_search_query_in_channel(search_query, channel_id, result_limit)
|
|
380
|
+
elif channel_id:
|
|
381
|
+
video_df = self.get_videos_by_channel_id(channel_id, result_limit)
|
|
339
382
|
else:
|
|
340
|
-
video_df = self.
|
|
383
|
+
video_df = self.get_videos_by_search_query(search_query, result_limit)
|
|
341
384
|
|
|
342
385
|
select_statement_executor = SELECTQueryExecutor(
|
|
343
386
|
video_df,
|
|
344
387
|
selected_columns,
|
|
345
|
-
[
|
|
388
|
+
[
|
|
389
|
+
where_condition
|
|
390
|
+
for where_condition in where_conditions
|
|
391
|
+
if where_condition[1] not in ["video_id", "channel_id", "query"]
|
|
392
|
+
],
|
|
346
393
|
order_by_conditions,
|
|
347
|
-
result_limit if query.limit else None
|
|
394
|
+
result_limit if query.limit else None,
|
|
348
395
|
)
|
|
349
396
|
|
|
350
397
|
video_df = select_statement_executor.execute_query()
|
|
351
398
|
|
|
352
399
|
return video_df
|
|
353
400
|
|
|
354
|
-
def
|
|
401
|
+
def get_videos_by_search_query(self, search_query, limit=10):
|
|
355
402
|
video_ids = []
|
|
356
403
|
resource = (
|
|
357
404
|
self.handler.connect()
|
|
358
405
|
.search()
|
|
359
|
-
.list(part="snippet",
|
|
406
|
+
.list(part="snippet", q=search_query, type="video", maxResults=min(50, limit))
|
|
360
407
|
)
|
|
361
|
-
|
|
408
|
+
total_fetched = 0
|
|
409
|
+
|
|
410
|
+
while resource and total_fetched < limit:
|
|
411
|
+
response = resource.execute()
|
|
412
|
+
for item in response["items"]:
|
|
413
|
+
video_ids.append(item["id"]["videoId"])
|
|
414
|
+
total_fetched += 1
|
|
415
|
+
if total_fetched >= limit:
|
|
416
|
+
break
|
|
417
|
+
|
|
418
|
+
if "nextPageToken" in response and total_fetched < limit:
|
|
419
|
+
resource = (
|
|
420
|
+
self.handler.connect()
|
|
421
|
+
.search()
|
|
422
|
+
.list(
|
|
423
|
+
part="snippet",
|
|
424
|
+
q=search_query,
|
|
425
|
+
type="video",
|
|
426
|
+
maxResults=min(50, limit - total_fetched),
|
|
427
|
+
pageToken=response["nextPageToken"],
|
|
428
|
+
)
|
|
429
|
+
)
|
|
430
|
+
else:
|
|
431
|
+
break
|
|
432
|
+
|
|
433
|
+
return self.get_videos_by_video_ids(video_ids)
|
|
434
|
+
|
|
435
|
+
def get_videos_by_search_query_in_channel(self, search_query, channel_id, limit=10):
|
|
436
|
+
"""Search for videos within a specific channel"""
|
|
437
|
+
video_ids = []
|
|
438
|
+
resource = (
|
|
439
|
+
self.handler.connect()
|
|
440
|
+
.search()
|
|
441
|
+
.list(part="snippet", q=search_query, channelId=channel_id, type="video", maxResults=min(50, limit))
|
|
442
|
+
)
|
|
443
|
+
total_fetched = 0
|
|
444
|
+
|
|
445
|
+
while resource and total_fetched < limit:
|
|
446
|
+
response = resource.execute()
|
|
447
|
+
for item in response["items"]:
|
|
448
|
+
video_ids.append(item["id"]["videoId"])
|
|
449
|
+
total_fetched += 1
|
|
450
|
+
if total_fetched >= limit:
|
|
451
|
+
break
|
|
452
|
+
|
|
453
|
+
if "nextPageToken" in response and total_fetched < limit:
|
|
454
|
+
resource = (
|
|
455
|
+
self.handler.connect()
|
|
456
|
+
.search()
|
|
457
|
+
.list(
|
|
458
|
+
part="snippet",
|
|
459
|
+
q=search_query,
|
|
460
|
+
channelId=channel_id,
|
|
461
|
+
type="video",
|
|
462
|
+
maxResults=min(50, limit - total_fetched),
|
|
463
|
+
pageToken=response["nextPageToken"],
|
|
464
|
+
)
|
|
465
|
+
)
|
|
466
|
+
else:
|
|
467
|
+
break
|
|
468
|
+
|
|
469
|
+
return self.get_videos_by_video_ids(video_ids)
|
|
470
|
+
|
|
471
|
+
def get_videos_by_channel_id(self, channel_id, limit=10):
|
|
472
|
+
video_ids = []
|
|
473
|
+
resource = (
|
|
474
|
+
self.handler.connect()
|
|
475
|
+
.search()
|
|
476
|
+
.list(part="snippet", channelId=channel_id, type="video", maxResults=min(50, limit))
|
|
477
|
+
)
|
|
478
|
+
total_fetched = 0
|
|
479
|
+
while resource and total_fetched < limit:
|
|
362
480
|
response = resource.execute()
|
|
363
481
|
for item in response["items"]:
|
|
364
482
|
video_ids.append(item["id"]["videoId"])
|
|
365
|
-
|
|
483
|
+
total_fetched += 1
|
|
484
|
+
if total_fetched >= limit:
|
|
485
|
+
break
|
|
486
|
+
if "nextPageToken" in response and total_fetched < limit:
|
|
366
487
|
resource = (
|
|
367
488
|
self.handler.connect()
|
|
368
489
|
.search()
|
|
@@ -370,6 +491,7 @@ class YoutubeVideosTable(APITable):
|
|
|
370
491
|
part="snippet",
|
|
371
492
|
channelId=channel_id,
|
|
372
493
|
type="video",
|
|
494
|
+
maxResults=min(50, limit - total_fetched),
|
|
373
495
|
pageToken=response["nextPageToken"],
|
|
374
496
|
)
|
|
375
497
|
)
|
|
@@ -388,7 +510,13 @@ class YoutubeVideosTable(APITable):
|
|
|
388
510
|
# loop over 50 video ids at a time
|
|
389
511
|
# an invalid request error is caused otherwise
|
|
390
512
|
for i in range(0, len(video_ids), 50):
|
|
391
|
-
resource =
|
|
513
|
+
resource = (
|
|
514
|
+
self.handler.connect()
|
|
515
|
+
.videos()
|
|
516
|
+
.list(part="statistics,snippet,contentDetails", id=",".join(video_ids[i : i + 50]))
|
|
517
|
+
.execute()
|
|
518
|
+
)
|
|
519
|
+
|
|
392
520
|
for item in resource["items"]:
|
|
393
521
|
data.append(
|
|
394
522
|
{
|
|
@@ -415,7 +543,7 @@ class YoutubeVideosTable(APITable):
|
|
|
415
543
|
return json_formatted_transcript
|
|
416
544
|
|
|
417
545
|
except Exception as e:
|
|
418
|
-
logger.error(f"Encountered an error while fetching transcripts for video ${video_id}: ${e}"),
|
|
546
|
+
(logger.error(f"Encountered an error while fetching transcripts for video ${video_id}: ${e}"),)
|
|
419
547
|
return "Transcript not available for this video"
|
|
420
548
|
|
|
421
549
|
def parse_duration(self, video_id, duration):
|
|
@@ -428,7 +556,7 @@ class YoutubeVideosTable(APITable):
|
|
|
428
556
|
|
|
429
557
|
return duration_str.strip(":")
|
|
430
558
|
except Exception as e:
|
|
431
|
-
logger.error(f"Encountered an error while parsing duration for video ${video_id}: ${e}"),
|
|
559
|
+
(logger.error(f"Encountered an error while parsing duration for video ${video_id}: ${e}"),)
|
|
432
560
|
return "Duration not available for this video"
|
|
433
561
|
|
|
434
562
|
def get_columns(self) -> List[str]:
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from mindsdb_sql_parser.ast import Select
|
|
2
|
+
from typing import List
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from mindsdb.integrations.utilities.sql_utils import FilterCondition, KeywordSearchArgs
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class KeywordSearchBase:
|
|
9
|
+
"""
|
|
10
|
+
Base class for keyword search integrations.
|
|
11
|
+
This class provides a common interface for keyword search functionality.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, *args, **kwargs):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
def dispatch_keyword_select(
|
|
18
|
+
self, query: Select, conditions: List[FilterCondition] = None, keyword_search_args: KeywordSearchArgs = None
|
|
19
|
+
):
|
|
20
|
+
"""Dispatches a keyword search select query to the appropriate method."""
|
|
21
|
+
raise NotImplementedError()
|
|
22
|
+
|
|
23
|
+
def keyword_select(
|
|
24
|
+
self,
|
|
25
|
+
table_name: str,
|
|
26
|
+
columns: List[str] = None,
|
|
27
|
+
conditions: List[FilterCondition] = None,
|
|
28
|
+
offset: int = None,
|
|
29
|
+
limit: int = None,
|
|
30
|
+
) -> pd.DataFrame:
|
|
31
|
+
"""Select data from table
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
table_name (str): table name
|
|
35
|
+
columns (List[str]): columns to select
|
|
36
|
+
conditions (List[FilterCondition]): conditions to select
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
HandlerResponse
|
|
40
|
+
"""
|
|
41
|
+
raise NotImplementedError()
|
|
@@ -21,7 +21,7 @@ from mindsdb_sql_parser.ast.base import ASTNode
|
|
|
21
21
|
|
|
22
22
|
from mindsdb.integrations.libs.response import RESPONSE_TYPE, HandlerResponse
|
|
23
23
|
from mindsdb.utilities import log
|
|
24
|
-
from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
|
|
24
|
+
from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
|
|
25
25
|
|
|
26
26
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
27
27
|
from .base import BaseHandler
|
|
@@ -372,44 +372,65 @@ class VectorStoreHandler(BaseHandler):
|
|
|
372
372
|
return self.delete(table_name, conditions=conditions)
|
|
373
373
|
|
|
374
374
|
def dispatch_select(
|
|
375
|
-
self,
|
|
375
|
+
self,
|
|
376
|
+
query: Select,
|
|
377
|
+
conditions: Optional[List[FilterCondition]] = None,
|
|
378
|
+
allowed_metadata_columns: List[str] = None,
|
|
379
|
+
keyword_search_args: Optional[KeywordSearchArgs] = None,
|
|
376
380
|
):
|
|
377
381
|
"""
|
|
378
|
-
|
|
382
|
+
Dispatches a select query to the appropriate method, handling both
|
|
383
|
+
standard selections and keyword searches based on the provided arguments.
|
|
379
384
|
"""
|
|
380
|
-
#
|
|
385
|
+
# 1. Parse common query arguments
|
|
381
386
|
table_name = query.from_table.parts[-1]
|
|
382
|
-
|
|
387
|
+
|
|
388
|
+
# If targets are a star (*), select all schema columns
|
|
383
389
|
if isinstance(query.targets[0], Star):
|
|
384
390
|
columns = [col["name"] for col in self.SCHEMA]
|
|
385
391
|
else:
|
|
386
392
|
columns = [col.parts[-1] for col in query.targets]
|
|
387
393
|
|
|
394
|
+
# 2. Validate columns
|
|
388
395
|
if not self._is_columns_allowed(columns):
|
|
389
|
-
|
|
396
|
+
allowed_cols = [col["name"] for col in self.SCHEMA]
|
|
397
|
+
raise Exception(f"Columns {columns} not allowed. Allowed columns are {allowed_cols}")
|
|
390
398
|
|
|
391
|
-
#
|
|
399
|
+
# 3. Extract and process conditions
|
|
392
400
|
if conditions is None:
|
|
393
401
|
where_statement = query.where
|
|
394
402
|
conditions = self.extract_conditions(where_statement)
|
|
395
403
|
self._convert_metadata_filters(conditions, allowed_metadata_columns=allowed_metadata_columns)
|
|
396
404
|
|
|
397
|
-
#
|
|
405
|
+
# 4. Get offset and limit
|
|
398
406
|
offset = query.offset.value if query.offset is not None else None
|
|
399
407
|
limit = query.limit.value if query.limit is not None else None
|
|
400
408
|
|
|
401
|
-
# dispatch select
|
|
402
|
-
|
|
403
|
-
|
|
409
|
+
# 5. Conditionally dispatch to the correct select method
|
|
410
|
+
if keyword_search_args:
|
|
411
|
+
# It's a keyword search
|
|
412
|
+
return self.keyword_select(
|
|
404
413
|
table_name,
|
|
405
414
|
columns=columns,
|
|
406
415
|
conditions=conditions,
|
|
407
416
|
offset=offset,
|
|
408
417
|
limit=limit,
|
|
418
|
+
keyword_search_args=keyword_search_args,
|
|
409
419
|
)
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
420
|
+
else:
|
|
421
|
+
# It's a standard select
|
|
422
|
+
try:
|
|
423
|
+
return self.select(
|
|
424
|
+
table_name,
|
|
425
|
+
columns=columns,
|
|
426
|
+
conditions=conditions,
|
|
427
|
+
offset=offset,
|
|
428
|
+
limit=limit,
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
except Exception as e:
|
|
432
|
+
handler_engine = self.__class__.name
|
|
433
|
+
raise VectorHandlerException(f"Error in {handler_engine} database: {e}")
|
|
413
434
|
|
|
414
435
|
def _dispatch(self, query: ASTNode) -> HandlerResponse:
|
|
415
436
|
"""
|
|
@@ -60,6 +60,17 @@ class FilterCondition:
|
|
|
60
60
|
"""
|
|
61
61
|
|
|
62
62
|
|
|
63
|
+
class KeywordSearchArgs:
|
|
64
|
+
def __init__(self, column: str, query: str):
|
|
65
|
+
"""
|
|
66
|
+
Args:
|
|
67
|
+
column: The column to search in.
|
|
68
|
+
query: The search query string.
|
|
69
|
+
"""
|
|
70
|
+
self.column = column
|
|
71
|
+
self.query = query
|
|
72
|
+
|
|
73
|
+
|
|
63
74
|
class SortColumn:
|
|
64
75
|
def __init__(self, column: str, ascending: bool = True):
|
|
65
76
|
self.column = column
|
|
@@ -180,7 +180,7 @@ class AgentsController:
|
|
|
180
180
|
agent (db.Agents): The created agent
|
|
181
181
|
|
|
182
182
|
Raises:
|
|
183
|
-
|
|
183
|
+
EntityExistsError: Agent with given name already exists, or skill/model with given name does not exist.
|
|
184
184
|
"""
|
|
185
185
|
if project_name is None:
|
|
186
186
|
project_name = default_project
|
|
@@ -189,7 +189,7 @@ class AgentsController:
|
|
|
189
189
|
agent = self.get_agent(name, project_name)
|
|
190
190
|
|
|
191
191
|
if agent is not None:
|
|
192
|
-
raise
|
|
192
|
+
raise EntityExistsError("Agent already exists", name)
|
|
193
193
|
|
|
194
194
|
# No need to copy params since we're not preserving the original reference
|
|
195
195
|
params = params or {}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import List, Union
|
|
2
|
-
|
|
3
2
|
import pandas as pd
|
|
4
|
-
|
|
3
|
+
import json
|
|
4
|
+
import datetime
|
|
5
5
|
from mindsdb.integrations.libs.response import RESPONSE_TYPE
|
|
6
6
|
from mindsdb.interfaces.data_catalog.base_data_catalog import BaseDataCatalog
|
|
7
7
|
from mindsdb.interfaces.storage import db
|
|
@@ -204,6 +204,8 @@ class DataCatalogLoader(BaseDataCatalog):
|
|
|
204
204
|
# Convert the distinct_values_count to an integer if it is not NaN, otherwise set it to None.
|
|
205
205
|
val = row.get("distinct_values_count")
|
|
206
206
|
distinct_values_count = int(val) if pd.notna(val) else None
|
|
207
|
+
min_val = row.get("minimum_value")
|
|
208
|
+
max_val = row.get("maximum_value")
|
|
207
209
|
|
|
208
210
|
# Convert the most_common_frequencies to a list of strings.
|
|
209
211
|
most_common_frequencies = [str(val) for val in row.get("most_common_frequencies") or []]
|
|
@@ -214,8 +216,8 @@ class DataCatalogLoader(BaseDataCatalog):
|
|
|
214
216
|
most_common_frequencies=most_common_frequencies,
|
|
215
217
|
null_percentage=row.get("null_percentage"),
|
|
216
218
|
distinct_values_count=distinct_values_count,
|
|
217
|
-
minimum_value=
|
|
218
|
-
maximum_value=
|
|
219
|
+
minimum_value=self.to_str(min_val),
|
|
220
|
+
maximum_value=self.to_str(max_val),
|
|
219
221
|
)
|
|
220
222
|
column_statistics.append(record)
|
|
221
223
|
|
|
@@ -373,3 +375,15 @@ class DataCatalogLoader(BaseDataCatalog):
|
|
|
373
375
|
db.session.delete(table)
|
|
374
376
|
db.session.commit()
|
|
375
377
|
self.logger.info(f"Metadata for {self.database_name} removed successfully.")
|
|
378
|
+
|
|
379
|
+
def to_str(self, val) -> str:
|
|
380
|
+
"""
|
|
381
|
+
Convert a value to a string.
|
|
382
|
+
"""
|
|
383
|
+
if val is None:
|
|
384
|
+
return None
|
|
385
|
+
if isinstance(val, (datetime.datetime, datetime.date)):
|
|
386
|
+
return val.isoformat()
|
|
387
|
+
if isinstance(val, (list, dict, set, tuple)):
|
|
388
|
+
return json.dumps(val, default=str)
|
|
389
|
+
return str(val)
|
|
@@ -362,9 +362,7 @@ class Project:
|
|
|
362
362
|
|
|
363
363
|
columns = [ASSISTANT_COLUMN, USER_COLUMN]
|
|
364
364
|
case "KNOWLEDGE_BASE":
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
columns = list(KB_TO_VECTORDB_COLUMNS.keys()) + ["metadata", "relevance", "distance"]
|
|
365
|
+
columns = ["id", "chunk_id", "chunk_content", "metadata", "relevance", "distance"]
|
|
368
366
|
case "TABLE":
|
|
369
367
|
# like 'mindsdb.models'
|
|
370
368
|
pass
|