MindsDB 25.2.4.0__py3-none-any.whl → 25.3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +16 -1
- mindsdb/api/executor/command_executor.py +1 -1
- mindsdb/api/executor/datahub/datanodes/system_tables.py +6 -1
- mindsdb/api/executor/planner/query_planner.py +6 -2
- mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -1
- mindsdb/api/executor/sql_query/steps/union_step.py +21 -24
- mindsdb/api/http/gui.py +5 -4
- mindsdb/api/http/initialize.py +19 -19
- mindsdb/api/mongo/classes/query_sql.py +2 -1
- mindsdb/api/mongo/responders/aggregate.py +2 -2
- mindsdb/api/mongo/responders/coll_stats.py +3 -2
- mindsdb/api/mongo/responders/db_stats.py +2 -1
- mindsdb/api/mongo/responders/insert.py +4 -2
- mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +5 -4
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +2 -4
- mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
- mindsdb/integrations/handlers/gmail_handler/connection_args.py +2 -2
- mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +19 -66
- mindsdb/integrations/handlers/gmail_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/google_calendar_handler/connection_args.py +15 -0
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +31 -41
- mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/jira_handler/__init__.py +1 -0
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +22 -80
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +3 -3
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +2 -1
- mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +2 -38
- mindsdb/integrations/libs/api_handler_generator.py +583 -0
- mindsdb/integrations/libs/llm/utils.py +2 -1
- mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py +29 -38
- mindsdb/integrations/utilities/pydantic_utils.py +208 -0
- mindsdb/integrations/utilities/rag/pipelines/rag.py +11 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +800 -135
- mindsdb/integrations/utilities/rag/settings.py +390 -152
- mindsdb/integrations/utilities/sql_utils.py +2 -1
- mindsdb/interfaces/agents/agents_controller.py +11 -7
- mindsdb/interfaces/agents/mindsdb_chat_model.py +4 -2
- mindsdb/interfaces/chatbot/chatbot_controller.py +9 -8
- mindsdb/interfaces/database/database.py +2 -1
- mindsdb/interfaces/database/projects.py +28 -2
- mindsdb/interfaces/jobs/jobs_controller.py +4 -1
- mindsdb/interfaces/model/model_controller.py +5 -2
- mindsdb/interfaces/skills/retrieval_tool.py +128 -39
- mindsdb/interfaces/skills/skill_tool.py +7 -7
- mindsdb/interfaces/skills/skills_controller.py +8 -4
- mindsdb/interfaces/storage/db.py +14 -0
- mindsdb/interfaces/storage/json.py +59 -0
- mindsdb/interfaces/storage/model_fs.py +85 -3
- mindsdb/interfaces/triggers/triggers_controller.py +2 -1
- mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py +17 -3
- mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py +29 -0
- mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py +41 -0
- mindsdb/utilities/config.py +6 -2
- mindsdb/utilities/functions.py +11 -0
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/METADATA +219 -222
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/RECORD +61 -60
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/WHEEL +1 -1
- mindsdb/integrations/handlers/gmail_handler/utils.py +0 -45
- mindsdb/integrations/handlers/jira_handler/jira_table.py +0 -172
- mindsdb/integrations/handlers/jira_handler/requirements.txt +0 -1
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,7 @@ from mindsdb_sql_parser import ast
|
|
|
7
7
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
8
8
|
|
|
9
9
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
10
|
+
from mindsdb.utilities.config import config
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class FilterOperator(Enum):
|
|
@@ -74,7 +75,7 @@ def make_sql_session():
|
|
|
74
75
|
from mindsdb.api.executor.controllers.session_controller import SessionController
|
|
75
76
|
|
|
76
77
|
sql_session = SessionController()
|
|
77
|
-
sql_session.database = '
|
|
78
|
+
sql_session.database = config.get('default_project')
|
|
78
79
|
return sql_session
|
|
79
80
|
|
|
80
81
|
|
|
@@ -13,12 +13,16 @@ from mindsdb.interfaces.database.projects import ProjectController
|
|
|
13
13
|
from mindsdb.interfaces.model.functions import PredictorRecordNotFound
|
|
14
14
|
from mindsdb.interfaces.model.model_controller import ModelController
|
|
15
15
|
from mindsdb.interfaces.skills.skills_controller import SkillsController
|
|
16
|
+
from mindsdb.utilities.config import config
|
|
16
17
|
from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
|
|
17
18
|
|
|
18
19
|
from .constants import ASSISTANT_COLUMN, SUPPORTED_PROVIDERS, PROVIDER_TO_MODELS
|
|
19
20
|
from .langchain_agent import get_llm_provider
|
|
20
21
|
|
|
21
22
|
|
|
23
|
+
default_project = config.get('default_project')
|
|
24
|
+
|
|
25
|
+
|
|
22
26
|
class AgentsController:
|
|
23
27
|
'''Handles CRUD operations at the database level for Agents'''
|
|
24
28
|
|
|
@@ -70,7 +74,7 @@ class AgentsController:
|
|
|
70
74
|
|
|
71
75
|
return model, provider
|
|
72
76
|
|
|
73
|
-
def get_agent(self, agent_name: str, project_name: str =
|
|
77
|
+
def get_agent(self, agent_name: str, project_name: str = default_project) -> Optional[db.Agents]:
|
|
74
78
|
'''
|
|
75
79
|
Gets an agent by name.
|
|
76
80
|
|
|
@@ -91,7 +95,7 @@ class AgentsController:
|
|
|
91
95
|
).first()
|
|
92
96
|
return agent
|
|
93
97
|
|
|
94
|
-
def get_agent_by_id(self, id: int, project_name: str =
|
|
98
|
+
def get_agent_by_id(self, id: int, project_name: str = default_project) -> db.Agents:
|
|
95
99
|
'''
|
|
96
100
|
Gets an agent by id.
|
|
97
101
|
|
|
@@ -162,7 +166,7 @@ class AgentsController:
|
|
|
162
166
|
ValueError: Agent with given name already exists, or skill/model with given name does not exist.
|
|
163
167
|
'''
|
|
164
168
|
if project_name is None:
|
|
165
|
-
project_name =
|
|
169
|
+
project_name = default_project
|
|
166
170
|
project = self.project_controller.get(name=project_name)
|
|
167
171
|
|
|
168
172
|
agent = self.get_agent(name, project_name)
|
|
@@ -208,7 +212,7 @@ class AgentsController:
|
|
|
208
212
|
def update_agent(
|
|
209
213
|
self,
|
|
210
214
|
agent_name: str,
|
|
211
|
-
project_name: str =
|
|
215
|
+
project_name: str = default_project,
|
|
212
216
|
name: str = None,
|
|
213
217
|
model_name: str = None,
|
|
214
218
|
skills_to_add: List[Union[str, dict]] = None,
|
|
@@ -347,7 +351,7 @@ class AgentsController:
|
|
|
347
351
|
|
|
348
352
|
return existing_agent
|
|
349
353
|
|
|
350
|
-
def delete_agent(self, agent_name: str, project_name: str =
|
|
354
|
+
def delete_agent(self, agent_name: str, project_name: str = default_project):
|
|
351
355
|
'''
|
|
352
356
|
Deletes an agent by name.
|
|
353
357
|
|
|
@@ -371,7 +375,7 @@ class AgentsController:
|
|
|
371
375
|
self,
|
|
372
376
|
agent: db.Agents,
|
|
373
377
|
messages: List[Dict[str, str]],
|
|
374
|
-
project_name: str =
|
|
378
|
+
project_name: str = default_project,
|
|
375
379
|
tools: List[BaseTool] = None,
|
|
376
380
|
stream: bool = False) -> Union[Iterator[object], pd.DataFrame]:
|
|
377
381
|
"""
|
|
@@ -412,7 +416,7 @@ class AgentsController:
|
|
|
412
416
|
self,
|
|
413
417
|
agent: db.Agents,
|
|
414
418
|
messages: List[Dict[str, str]],
|
|
415
|
-
project_name: str =
|
|
419
|
+
project_name: str = default_project,
|
|
416
420
|
tools: List[BaseTool] = None) -> Iterator[object]:
|
|
417
421
|
'''
|
|
418
422
|
Queries an agent to get a stream of completion chunks.
|
|
@@ -31,8 +31,10 @@ from langchain_core.outputs import (
|
|
|
31
31
|
from pydantic import model_validator
|
|
32
32
|
|
|
33
33
|
from mindsdb.interfaces.agents.constants import USER_COLUMN
|
|
34
|
+
from mindsdb.utilities.config import config
|
|
34
35
|
|
|
35
36
|
logger = logging.getLogger(__name__)
|
|
37
|
+
default_project = config.get('default_project')
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
def _convert_message_to_dict(message: BaseMessage) -> dict:
|
|
@@ -63,7 +65,7 @@ class ChatMindsdb(BaseChatModel):
|
|
|
63
65
|
"""A chat model that uses the Mindsdb"""
|
|
64
66
|
|
|
65
67
|
model_name: str
|
|
66
|
-
project_name: Optional[str] =
|
|
68
|
+
project_name: Optional[str] = default_project
|
|
67
69
|
model_info: Optional[dict] = None
|
|
68
70
|
project_datanode: Optional[Any] = None
|
|
69
71
|
|
|
@@ -139,7 +141,7 @@ class ChatMindsdb(BaseChatModel):
|
|
|
139
141
|
from mindsdb.api.executor.controllers import SessionController
|
|
140
142
|
|
|
141
143
|
session = SessionController()
|
|
142
|
-
session.database =
|
|
144
|
+
session.database = default_project
|
|
143
145
|
|
|
144
146
|
values['model_info'] = session.model_controller.get_model(model_name, project_name=project_name)
|
|
145
147
|
|
|
@@ -9,7 +9,10 @@ from mindsdb.interfaces.model.functions import get_project_records
|
|
|
9
9
|
from mindsdb.utilities.context import context as ctx
|
|
10
10
|
|
|
11
11
|
from mindsdb.api.executor.controllers.session_controller import SessionController
|
|
12
|
-
from mindsdb.utilities.config import
|
|
12
|
+
from mindsdb.utilities.config import config
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
default_project = config.get('default_project')
|
|
13
16
|
|
|
14
17
|
|
|
15
18
|
class ChatBotController:
|
|
@@ -25,7 +28,7 @@ class ChatBotController:
|
|
|
25
28
|
self.project_controller = project_controller
|
|
26
29
|
self.agents_controller = agents_controller
|
|
27
30
|
|
|
28
|
-
def get_chatbot(self, chatbot_name: str, project_name: str =
|
|
31
|
+
def get_chatbot(self, chatbot_name: str, project_name: str = default_project) -> dict:
|
|
29
32
|
'''
|
|
30
33
|
Gets a chatbot by name.
|
|
31
34
|
|
|
@@ -118,7 +121,7 @@ class ChatBotController:
|
|
|
118
121
|
|
|
119
122
|
return bot_obj
|
|
120
123
|
|
|
121
|
-
def get_chatbots(self, project_name: str =
|
|
124
|
+
def get_chatbots(self, project_name: str = default_project) -> List[dict]:
|
|
122
125
|
'''
|
|
123
126
|
Gets all chatbots in a project.
|
|
124
127
|
|
|
@@ -199,14 +202,12 @@ class ChatBotController:
|
|
|
199
202
|
bot (db.ChatBots): The created chatbot
|
|
200
203
|
'''
|
|
201
204
|
|
|
202
|
-
config = Config()
|
|
203
|
-
|
|
204
205
|
is_cloud = config.get('cloud', False)
|
|
205
206
|
if is_cloud and ctx.user_class == 0:
|
|
206
207
|
raise Exception("You can't create chatbot")
|
|
207
208
|
|
|
208
209
|
if project_name is None:
|
|
209
|
-
project_name =
|
|
210
|
+
project_name = default_project
|
|
210
211
|
project = self.project_controller.get(name=project_name)
|
|
211
212
|
|
|
212
213
|
bot = self.get_chatbot(name, project_name)
|
|
@@ -260,7 +261,7 @@ class ChatBotController:
|
|
|
260
261
|
def update_chatbot(
|
|
261
262
|
self,
|
|
262
263
|
chatbot_name: str,
|
|
263
|
-
project_name: str =
|
|
264
|
+
project_name: str = default_project,
|
|
264
265
|
name: str = None,
|
|
265
266
|
model_name: str = None,
|
|
266
267
|
agent_name: str = None,
|
|
@@ -338,7 +339,7 @@ class ChatBotController:
|
|
|
338
339
|
|
|
339
340
|
return existing_chatbot_rec
|
|
340
341
|
|
|
341
|
-
def delete_chatbot(self, chatbot_name: str, project_name: str =
|
|
342
|
+
def delete_chatbot(self, chatbot_name: str, project_name: str = default_project):
|
|
342
343
|
'''
|
|
343
344
|
Deletes a chatbot by name.
|
|
344
345
|
|
|
@@ -3,6 +3,7 @@ from collections import OrderedDict
|
|
|
3
3
|
|
|
4
4
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
5
5
|
import mindsdb.utilities.profiler as profiler
|
|
6
|
+
from mindsdb.utilities.config import config
|
|
6
7
|
from mindsdb.utilities.exception import EntityNotExistsError
|
|
7
8
|
from mindsdb.interfaces.database.log import LogDBController
|
|
8
9
|
|
|
@@ -58,7 +59,7 @@ class DatabaseController:
|
|
|
58
59
|
'id': x.id,
|
|
59
60
|
'engine': None,
|
|
60
61
|
'visible': True,
|
|
61
|
-
'deletable': x.name.lower() != '
|
|
62
|
+
'deletable': x.name.lower() != config.get('default_project')
|
|
62
63
|
})
|
|
63
64
|
for key, value in integrations.items():
|
|
64
65
|
db_type = value.get('type', 'data')
|
|
@@ -4,6 +4,7 @@ from typing import List, Optional
|
|
|
4
4
|
from collections import OrderedDict
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sa
|
|
7
|
+
from sqlalchemy.orm.attributes import flag_modified
|
|
7
8
|
import numpy as np
|
|
8
9
|
|
|
9
10
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
@@ -30,6 +31,7 @@ class Project:
|
|
|
30
31
|
p.name = db_record.name
|
|
31
32
|
p.company_id = ctx.company_id
|
|
32
33
|
p.id = db_record.id
|
|
34
|
+
p.metadata = db_record.metadata_
|
|
33
35
|
return p
|
|
34
36
|
|
|
35
37
|
def create(self, name: str):
|
|
@@ -405,9 +407,9 @@ class ProjectController:
|
|
|
405
407
|
|
|
406
408
|
return [Project.from_record(x) for x in records]
|
|
407
409
|
|
|
408
|
-
def get(self, id: Optional[int] = None, name: Optional[str] = None, deleted: bool = False) -> Project:
|
|
410
|
+
def get(self, id: Optional[int] = None, name: Optional[str] = None, deleted: bool = False, is_default: bool = False) -> Project:
|
|
409
411
|
if id is not None and name is not None:
|
|
410
|
-
raise ValueError("Both 'id' and 'name'
|
|
412
|
+
raise ValueError("Both 'id' and 'name' can't be provided at the same time")
|
|
411
413
|
|
|
412
414
|
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
413
415
|
q = db.Project.query.filter_by(company_id=company_id)
|
|
@@ -424,6 +426,9 @@ class ProjectController:
|
|
|
424
426
|
else:
|
|
425
427
|
q = q.filter_by(deleted_at=sa.null())
|
|
426
428
|
|
|
429
|
+
if is_default:
|
|
430
|
+
q = q.filter(db.Project.metadata_['is_default'].as_boolean() == is_default)
|
|
431
|
+
|
|
427
432
|
record = q.first()
|
|
428
433
|
|
|
429
434
|
if record is None:
|
|
@@ -434,3 +439,24 @@ class ProjectController:
|
|
|
434
439
|
project = Project()
|
|
435
440
|
project.create(name=name)
|
|
436
441
|
return project
|
|
442
|
+
|
|
443
|
+
def update(self, id: Optional[int] = None, name: Optional[str] = None, new_name: str = None, new_metadata: dict = None) -> Project:
|
|
444
|
+
if id is not None and name is not None:
|
|
445
|
+
raise ValueError("Both 'id' and 'name' can't be provided at the same time")
|
|
446
|
+
|
|
447
|
+
if id is not None:
|
|
448
|
+
project = self.get(id=id)
|
|
449
|
+
else:
|
|
450
|
+
project = self.get(name=name)
|
|
451
|
+
|
|
452
|
+
if new_name is not None:
|
|
453
|
+
project.name = new_name
|
|
454
|
+
project.record.name = new_name
|
|
455
|
+
|
|
456
|
+
if new_metadata is not None:
|
|
457
|
+
project.metadata = new_metadata
|
|
458
|
+
project.record.metadata = new_metadata
|
|
459
|
+
flag_modified(project.record, 'metadata_')
|
|
460
|
+
|
|
461
|
+
db.session.commit()
|
|
462
|
+
return project
|
|
@@ -9,6 +9,7 @@ from mindsdb_sql_parser import parse_sql, ParsingException
|
|
|
9
9
|
from mindsdb_sql_parser.ast.mindsdb import CreateJob
|
|
10
10
|
from mindsdb_sql_parser.ast import Select, Star, Identifier, BinaryOperation, Constant
|
|
11
11
|
|
|
12
|
+
from mindsdb.utilities.config import config
|
|
12
13
|
from mindsdb.utilities.context import context as ctx
|
|
13
14
|
from mindsdb.utilities.exception import EntityNotExistsError, EntityExistsError
|
|
14
15
|
from mindsdb.interfaces.storage import db
|
|
@@ -20,6 +21,8 @@ from mindsdb.utilities import log
|
|
|
20
21
|
|
|
21
22
|
logger = log.getLogger(__name__)
|
|
22
23
|
|
|
24
|
+
default_project = config.get('default_project')
|
|
25
|
+
|
|
23
26
|
|
|
24
27
|
def split_sql(sql):
|
|
25
28
|
# split sql by ';' ignoring delimiter in quotes
|
|
@@ -199,7 +202,7 @@ class JobsController:
|
|
|
199
202
|
"""
|
|
200
203
|
|
|
201
204
|
if project_name is None:
|
|
202
|
-
project_name =
|
|
205
|
+
project_name = default_project
|
|
203
206
|
|
|
204
207
|
start_at = None
|
|
205
208
|
if query.start_str is not None:
|
|
@@ -19,6 +19,7 @@ from mindsdb.interfaces.model.functions import (
|
|
|
19
19
|
)
|
|
20
20
|
from mindsdb.interfaces.storage.json import get_json_storage
|
|
21
21
|
from mindsdb.interfaces.storage.model_fs import ModelStorage
|
|
22
|
+
from mindsdb.utilities.config import config
|
|
22
23
|
from mindsdb.utilities.context import context as ctx
|
|
23
24
|
from mindsdb.utilities.functions import resolve_model_identifier
|
|
24
25
|
import mindsdb.utilities.profiler as profiler
|
|
@@ -29,6 +30,8 @@ logger = log.getLogger(__name__)
|
|
|
29
30
|
|
|
30
31
|
IS_PY36 = sys.version_info[1] <= 6
|
|
31
32
|
|
|
33
|
+
default_project = config.get('default_project')
|
|
34
|
+
|
|
32
35
|
|
|
33
36
|
def delete_model_storage(model_id, ctx_dump):
|
|
34
37
|
try:
|
|
@@ -149,7 +152,7 @@ class ModelController():
|
|
|
149
152
|
models.append(model_data)
|
|
150
153
|
return models
|
|
151
154
|
|
|
152
|
-
def delete_model(self, model_name: str, project_name: str =
|
|
155
|
+
def delete_model(self, model_name: str, project_name: str = default_project, version=None):
|
|
153
156
|
from mindsdb.interfaces.database.database import DatabaseController
|
|
154
157
|
|
|
155
158
|
project_record = get_project_record(func.lower(project_name))
|
|
@@ -344,7 +347,7 @@ class ModelController():
|
|
|
344
347
|
def prepare_finetune_statement(self, statement, database_controller):
|
|
345
348
|
project_name, model_name, model_version = resolve_model_identifier(statement.name)
|
|
346
349
|
if project_name is None:
|
|
347
|
-
project_name =
|
|
350
|
+
project_name = default_project
|
|
348
351
|
data_integration_ref, fetch_data_query = self._get_data_integration_ref(statement, database_controller)
|
|
349
352
|
|
|
350
353
|
set_active = True
|
|
@@ -1,33 +1,24 @@
|
|
|
1
|
+
import traceback
|
|
2
|
+
|
|
1
3
|
from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
|
|
2
4
|
from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
|
|
5
|
+
from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
|
|
6
|
+
from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
|
|
3
7
|
|
|
4
8
|
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
|
|
5
9
|
from mindsdb.interfaces.skills.skill_tool import skill_tool
|
|
6
10
|
from mindsdb.interfaces.storage import db
|
|
7
11
|
from mindsdb.interfaces.storage.db import KnowledgeBase
|
|
8
12
|
from mindsdb.utilities import log
|
|
13
|
+
from langchain_core.documents import Document
|
|
9
14
|
from langchain_core.tools import Tool
|
|
15
|
+
from mindsdb.integrations.libs.response import RESPONSE_TYPE
|
|
10
16
|
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
|
|
11
17
|
|
|
12
18
|
logger = log.getLogger(__name__)
|
|
13
19
|
|
|
14
20
|
|
|
15
|
-
def
|
|
16
|
-
"""
|
|
17
|
-
Builds a retrieval tool i.e RAG
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
tool: Tool configuration dictionary
|
|
21
|
-
pred_args: Predictor arguments dictionary
|
|
22
|
-
skill: Skills database object
|
|
23
|
-
|
|
24
|
-
Returns:
|
|
25
|
-
Tool: Configured retrieval tool
|
|
26
|
-
|
|
27
|
-
Raises:
|
|
28
|
-
ValueError: If knowledge base is not found or configuration is invalid
|
|
29
|
-
"""
|
|
30
|
-
# build RAG config
|
|
21
|
+
def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipelineModel:
|
|
31
22
|
tools_config = tool['config']
|
|
32
23
|
tools_config.update(pred_args)
|
|
33
24
|
|
|
@@ -71,34 +62,132 @@ def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
|
71
62
|
logger.debug("Using default embedding model as no knowledge base provided")
|
|
72
63
|
|
|
73
64
|
# Load and validate config
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
65
|
+
return load_rag_config(tools_config, kb_params, embeddings_model)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _build_rag_pipeline_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
69
|
+
rag_config = _load_rag_config(tool, pred_args, skill)
|
|
70
|
+
# build retriever
|
|
71
|
+
rag_pipeline = RAG(rag_config)
|
|
72
|
+
logger.debug(f"RAG pipeline created with config: {rag_config}")
|
|
73
|
+
|
|
74
|
+
def rag_wrapper(query: str) -> str:
|
|
75
|
+
try:
|
|
76
|
+
result = rag_pipeline(query)
|
|
77
|
+
logger.debug(f"RAG pipeline result: {result}")
|
|
78
|
+
return result['answer']
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.error(f"Error in RAG pipeline: {str(e)}")
|
|
81
|
+
logger.error(traceback.format_exc())
|
|
82
|
+
return f"Error in retrieval: {str(e)}"
|
|
83
|
+
|
|
84
|
+
# Create RAG tool
|
|
85
|
+
tools_config = tool['config']
|
|
86
|
+
tools_config.update(pred_args)
|
|
87
|
+
return Tool(
|
|
88
|
+
func=rag_wrapper,
|
|
89
|
+
name=tool['name'],
|
|
90
|
+
description=tool['description'],
|
|
91
|
+
response_format='content',
|
|
92
|
+
# Return directly by default since we already use an LLM against retrieved context to generate a response.
|
|
93
|
+
return_direct=tools_config.get('return_direct', True)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
98
|
+
if 'source' not in tool:
|
|
99
|
+
raise ValueError("Knowledge base for tool not found")
|
|
100
|
+
kb_name = tool['source']
|
|
101
|
+
executor = skill_tool.get_command_executor()
|
|
102
|
+
kb = _get_knowledge_base(kb_name, skill.project_id, executor)
|
|
103
|
+
if not kb:
|
|
104
|
+
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
105
|
+
kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
|
|
106
|
+
vector_db_handler = kb_table.get_vector_db()
|
|
107
|
+
|
|
108
|
+
rag_config = _load_rag_config(tool, pred_args, skill)
|
|
109
|
+
metadata_config = rag_config.metadata_config
|
|
110
|
+
|
|
111
|
+
def _get_document_by_name(name: str):
|
|
112
|
+
if metadata_config.name_column_index is not None:
|
|
113
|
+
tsquery_str = ' & '.join(name.split(' '))
|
|
114
|
+
documents_response = vector_db_handler.native_query(
|
|
115
|
+
f'SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery(\'{tsquery_str}\') LIMIT 1;'
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
documents_response = vector_db_handler.native_query(
|
|
119
|
+
f'SELECT * FROM {metadata_config.table} WHERE "{metadata_config.name_column}" ILIKE \'%{name}%\' LIMIT 1;'
|
|
120
|
+
)
|
|
121
|
+
if documents_response.resp_type == RESPONSE_TYPE.ERROR:
|
|
122
|
+
raise RuntimeError(f'There was an error looking up documents: {documents_response.error_message}')
|
|
123
|
+
if documents_response.data_frame.empty:
|
|
124
|
+
return None
|
|
125
|
+
document_row = documents_response.data_frame.head(1)
|
|
126
|
+
# Restore document from chunks, keeping in mind max context.
|
|
127
|
+
id_filter_condition = FilterCondition(
|
|
128
|
+
f"{metadata_config.embeddings_metadata_column}->>'{metadata_config.doc_id_key}'",
|
|
129
|
+
FilterOperator.EQUAL,
|
|
130
|
+
str(document_row.get(metadata_config.id_column).item())
|
|
131
|
+
)
|
|
132
|
+
document_chunks_df = vector_db_handler.select(
|
|
133
|
+
metadata_config.embeddings_table,
|
|
134
|
+
conditions=[id_filter_condition]
|
|
97
135
|
)
|
|
136
|
+
if document_chunks_df.empty:
|
|
137
|
+
return None
|
|
138
|
+
sort_col = 'chunk_id' if 'chunk_id' in document_chunks_df.columns else 'id'
|
|
139
|
+
document_chunks_df.sort_values(by=sort_col)
|
|
140
|
+
content = ''
|
|
141
|
+
for _, chunk in document_chunks_df.iterrows():
|
|
142
|
+
if len(content) > metadata_config.max_document_context:
|
|
143
|
+
break
|
|
144
|
+
content += chunk.get(metadata_config.content_column, '')
|
|
145
|
+
|
|
146
|
+
return Document(
|
|
147
|
+
page_content=content,
|
|
148
|
+
metadata=document_row.to_dict(orient='records')[0]
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def _lookup_document_by_name(name: str):
|
|
152
|
+
found_document = _get_document_by_name(name)
|
|
153
|
+
if found_document is None:
|
|
154
|
+
return f'I could not find any document with name {name}. Please make sure the document name matches exactly.'
|
|
155
|
+
return f"I found document {found_document.metadata.get(metadata_config.id_column)} with name {found_document.metadata.get(metadata_config.name_column)}. Here is the full document to use as context:\n\n{found_document.page_content}"
|
|
156
|
+
|
|
157
|
+
return Tool(
|
|
158
|
+
func=_lookup_document_by_name,
|
|
159
|
+
name=tool.get('name', '') + '_name_lookup',
|
|
160
|
+
description='You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.',
|
|
161
|
+
return_direct=False
|
|
162
|
+
)
|
|
98
163
|
|
|
164
|
+
|
|
165
|
+
def build_retrieval_tools(tool: dict, pred_args: dict, skill: db.Skills):
|
|
166
|
+
"""
|
|
167
|
+
Builds a list of tools for retrieval i.e RAG
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
tool: Tool configuration dictionary
|
|
171
|
+
pred_args: Predictor arguments dictionary
|
|
172
|
+
skill: Skills database object
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Tool: Configured list of retrieval tools
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
ValueError: If knowledge base is not found or configuration is invalid
|
|
179
|
+
"""
|
|
180
|
+
# Catch configuration errors before creating tools.
|
|
181
|
+
try:
|
|
182
|
+
rag_config = _load_rag_config(tool, pred_args, skill)
|
|
99
183
|
except Exception as e:
|
|
100
184
|
logger.error(f"Error building RAG pipeline: {str(e)}")
|
|
101
185
|
raise ValueError(f"Failed to build RAG pipeline: {str(e)}")
|
|
186
|
+
tools = [_build_rag_pipeline_tool(tool, pred_args, skill)]
|
|
187
|
+
if rag_config.metadata_config is None:
|
|
188
|
+
return tools
|
|
189
|
+
tools.append(_build_name_lookup_tool(tool, pred_args, skill))
|
|
190
|
+
return tools
|
|
102
191
|
|
|
103
192
|
|
|
104
193
|
def _get_knowledge_base(knowledge_base_name: str, project_id, executor) -> KnowledgeBase:
|
|
@@ -10,6 +10,7 @@ from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant
|
|
|
10
10
|
|
|
11
11
|
from mindsdb.utilities import log
|
|
12
12
|
from mindsdb.utilities.cache import get_cache
|
|
13
|
+
from mindsdb.utilities.config import config
|
|
13
14
|
from mindsdb.interfaces.storage import db
|
|
14
15
|
from mindsdb.interfaces.skills.sql_agent import SQLAgent
|
|
15
16
|
from mindsdb.integrations.libs.vectordatabase_handler import TableField
|
|
@@ -106,7 +107,7 @@ class SkillToolController:
|
|
|
106
107
|
from mindsdb.api.executor.controllers import SessionController # Top-level import produces circular import in some cases TODO: figure out a fix without losing runtime improvements (context: see #9304) # noqa
|
|
107
108
|
|
|
108
109
|
sql_session = SessionController()
|
|
109
|
-
sql_session.database = '
|
|
110
|
+
sql_session.database = config.get('default_project')
|
|
110
111
|
|
|
111
112
|
self.command_executor = ExecuteCommands(sql_session)
|
|
112
113
|
return self.command_executor
|
|
@@ -222,8 +223,8 @@ class SkillToolController:
|
|
|
222
223
|
pred_args = {}
|
|
223
224
|
pred_args['llm'] = llm
|
|
224
225
|
|
|
225
|
-
from .retrieval_tool import
|
|
226
|
-
return
|
|
226
|
+
from .retrieval_tool import build_retrieval_tools
|
|
227
|
+
return build_retrieval_tools(tool, pred_args, skill)
|
|
227
228
|
|
|
228
229
|
def _get_rag_query_function(self, skill: db.Skills):
|
|
229
230
|
session_controller = self.get_command_executor().session
|
|
@@ -295,10 +296,9 @@ class SkillToolController:
|
|
|
295
296
|
for skill in skills
|
|
296
297
|
]
|
|
297
298
|
elif skill_type == SkillType.RETRIEVAL:
|
|
298
|
-
tools[skill_type] = [
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
]
|
|
299
|
+
tools[skill_type] = []
|
|
300
|
+
for skill in skills:
|
|
301
|
+
tools[skill_type] += self._make_retrieval_tools(skill, llm, embedding_model)
|
|
302
302
|
return tools
|
|
303
303
|
|
|
304
304
|
|
|
@@ -6,6 +6,10 @@ from sqlalchemy.orm.attributes import flag_modified
|
|
|
6
6
|
|
|
7
7
|
from mindsdb.interfaces.storage import db
|
|
8
8
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
9
|
+
from mindsdb.utilities.config import config
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
default_project = config.get('default_project')
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
class SkillsController:
|
|
@@ -16,7 +20,7 @@ class SkillsController:
|
|
|
16
20
|
project_controller = ProjectController()
|
|
17
21
|
self.project_controller = project_controller
|
|
18
22
|
|
|
19
|
-
def get_skill(self, skill_name: str, project_name: str =
|
|
23
|
+
def get_skill(self, skill_name: str, project_name: str = default_project) -> Optional[db.Skills]:
|
|
20
24
|
'''
|
|
21
25
|
Gets a skill by name. Skills are expected to have unique names.
|
|
22
26
|
|
|
@@ -90,7 +94,7 @@ class SkillsController:
|
|
|
90
94
|
ValueError: If `project_name` does not exist or skill already exists
|
|
91
95
|
'''
|
|
92
96
|
if project_name is None:
|
|
93
|
-
project_name =
|
|
97
|
+
project_name = default_project
|
|
94
98
|
project = self.project_controller.get(name=project_name)
|
|
95
99
|
|
|
96
100
|
skill = self.get_skill(name, project_name)
|
|
@@ -113,7 +117,7 @@ class SkillsController:
|
|
|
113
117
|
self,
|
|
114
118
|
skill_name: str,
|
|
115
119
|
new_name: str = None,
|
|
116
|
-
project_name: str =
|
|
120
|
+
project_name: str = default_project,
|
|
117
121
|
type: str = None,
|
|
118
122
|
params: Dict[str, str] = None):
|
|
119
123
|
'''
|
|
@@ -158,7 +162,7 @@ class SkillsController:
|
|
|
158
162
|
|
|
159
163
|
return existing_skill
|
|
160
164
|
|
|
161
|
-
def delete_skill(self, skill_name: str, project_name: str =
|
|
165
|
+
def delete_skill(self, skill_name: str, project_name: str = default_project):
|
|
162
166
|
'''
|
|
163
167
|
Deletes a skill by name.
|
|
164
168
|
|
mindsdb/interfaces/storage/db.py
CHANGED
|
@@ -10,6 +10,7 @@ from sqlalchemy import (
|
|
|
10
10
|
DateTime,
|
|
11
11
|
Index,
|
|
12
12
|
Integer,
|
|
13
|
+
LargeBinary,
|
|
13
14
|
Numeric,
|
|
14
15
|
String,
|
|
15
16
|
UniqueConstraint,
|
|
@@ -213,6 +214,7 @@ class Project(Base):
|
|
|
213
214
|
deleted_at = Column(DateTime)
|
|
214
215
|
name = Column(String, nullable=False)
|
|
215
216
|
company_id = Column(Integer, default=0)
|
|
217
|
+
metadata_: dict = Column("metadata", JSON, nullable=True)
|
|
216
218
|
__table_args__ = (
|
|
217
219
|
UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
|
|
218
220
|
)
|
|
@@ -276,8 +278,20 @@ class JsonStorage(Base):
|
|
|
276
278
|
resource_id = Column(Integer)
|
|
277
279
|
name = Column(String)
|
|
278
280
|
content = Column(JSON)
|
|
281
|
+
encrypted_content = Column(LargeBinary, nullable=True)
|
|
279
282
|
company_id = Column(Integer)
|
|
280
283
|
|
|
284
|
+
def to_dict(self) -> Dict:
|
|
285
|
+
return {
|
|
286
|
+
"id": self.id,
|
|
287
|
+
"resource_group": self.resource_group,
|
|
288
|
+
"resource_id": self.resource_id,
|
|
289
|
+
"name": self.name,
|
|
290
|
+
"content": self.content,
|
|
291
|
+
"encrypted_content": self.encrypted_content,
|
|
292
|
+
"company_id": self.company_id,
|
|
293
|
+
}
|
|
294
|
+
|
|
281
295
|
|
|
282
296
|
class Jobs(Base):
|
|
283
297
|
__tablename__ = "jobs"
|