MindsDB 25.2.4.0__py3-none-any.whl → 25.3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (64) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +16 -1
  3. mindsdb/api/executor/command_executor.py +1 -1
  4. mindsdb/api/executor/datahub/datanodes/system_tables.py +6 -1
  5. mindsdb/api/executor/planner/query_planner.py +6 -2
  6. mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -1
  7. mindsdb/api/executor/sql_query/steps/union_step.py +21 -24
  8. mindsdb/api/http/gui.py +5 -4
  9. mindsdb/api/http/initialize.py +19 -19
  10. mindsdb/api/mongo/classes/query_sql.py +2 -1
  11. mindsdb/api/mongo/responders/aggregate.py +2 -2
  12. mindsdb/api/mongo/responders/coll_stats.py +3 -2
  13. mindsdb/api/mongo/responders/db_stats.py +2 -1
  14. mindsdb/api/mongo/responders/insert.py +4 -2
  15. mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py +2 -1
  16. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +5 -4
  17. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +2 -4
  18. mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
  19. mindsdb/integrations/handlers/gmail_handler/connection_args.py +2 -2
  20. mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +19 -66
  21. mindsdb/integrations/handlers/gmail_handler/requirements.txt +0 -1
  22. mindsdb/integrations/handlers/google_calendar_handler/connection_args.py +15 -0
  23. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +31 -41
  24. mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +0 -2
  25. mindsdb/integrations/handlers/jira_handler/__init__.py +1 -0
  26. mindsdb/integrations/handlers/jira_handler/jira_handler.py +22 -80
  27. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +3 -3
  28. mindsdb/integrations/handlers/slack_handler/slack_handler.py +2 -1
  29. mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +2 -38
  30. mindsdb/integrations/libs/api_handler_generator.py +583 -0
  31. mindsdb/integrations/libs/llm/utils.py +2 -1
  32. mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py +29 -38
  33. mindsdb/integrations/utilities/pydantic_utils.py +208 -0
  34. mindsdb/integrations/utilities/rag/pipelines/rag.py +11 -4
  35. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +800 -135
  36. mindsdb/integrations/utilities/rag/settings.py +390 -152
  37. mindsdb/integrations/utilities/sql_utils.py +2 -1
  38. mindsdb/interfaces/agents/agents_controller.py +11 -7
  39. mindsdb/interfaces/agents/mindsdb_chat_model.py +4 -2
  40. mindsdb/interfaces/chatbot/chatbot_controller.py +9 -8
  41. mindsdb/interfaces/database/database.py +2 -1
  42. mindsdb/interfaces/database/projects.py +28 -2
  43. mindsdb/interfaces/jobs/jobs_controller.py +4 -1
  44. mindsdb/interfaces/model/model_controller.py +5 -2
  45. mindsdb/interfaces/skills/retrieval_tool.py +128 -39
  46. mindsdb/interfaces/skills/skill_tool.py +7 -7
  47. mindsdb/interfaces/skills/skills_controller.py +8 -4
  48. mindsdb/interfaces/storage/db.py +14 -0
  49. mindsdb/interfaces/storage/json.py +59 -0
  50. mindsdb/interfaces/storage/model_fs.py +85 -3
  51. mindsdb/interfaces/triggers/triggers_controller.py +2 -1
  52. mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py +17 -3
  53. mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py +29 -0
  54. mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py +41 -0
  55. mindsdb/utilities/config.py +6 -2
  56. mindsdb/utilities/functions.py +11 -0
  57. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/METADATA +219 -222
  58. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/RECORD +61 -60
  59. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/WHEEL +1 -1
  60. mindsdb/integrations/handlers/gmail_handler/utils.py +0 -45
  61. mindsdb/integrations/handlers/jira_handler/jira_table.py +0 -172
  62. mindsdb/integrations/handlers/jira_handler/requirements.txt +0 -1
  63. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/LICENSE +0 -0
  64. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from mindsdb_sql_parser import ast
7
7
  from mindsdb_sql_parser.ast.base import ASTNode
8
8
 
9
9
  from mindsdb.integrations.utilities.query_traversal import query_traversal
10
+ from mindsdb.utilities.config import config
10
11
 
11
12
 
12
13
  class FilterOperator(Enum):
@@ -74,7 +75,7 @@ def make_sql_session():
74
75
  from mindsdb.api.executor.controllers.session_controller import SessionController
75
76
 
76
77
  sql_session = SessionController()
77
- sql_session.database = 'mindsdb'
78
+ sql_session.database = config.get('default_project')
78
79
  return sql_session
79
80
 
80
81
 
@@ -13,12 +13,16 @@ from mindsdb.interfaces.database.projects import ProjectController
13
13
  from mindsdb.interfaces.model.functions import PredictorRecordNotFound
14
14
  from mindsdb.interfaces.model.model_controller import ModelController
15
15
  from mindsdb.interfaces.skills.skills_controller import SkillsController
16
+ from mindsdb.utilities.config import config
16
17
  from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
17
18
 
18
19
  from .constants import ASSISTANT_COLUMN, SUPPORTED_PROVIDERS, PROVIDER_TO_MODELS
19
20
  from .langchain_agent import get_llm_provider
20
21
 
21
22
 
23
+ default_project = config.get('default_project')
24
+
25
+
22
26
  class AgentsController:
23
27
  '''Handles CRUD operations at the database level for Agents'''
24
28
 
@@ -70,7 +74,7 @@ class AgentsController:
70
74
 
71
75
  return model, provider
72
76
 
73
- def get_agent(self, agent_name: str, project_name: str = 'mindsdb') -> Optional[db.Agents]:
77
+ def get_agent(self, agent_name: str, project_name: str = default_project) -> Optional[db.Agents]:
74
78
  '''
75
79
  Gets an agent by name.
76
80
 
@@ -91,7 +95,7 @@ class AgentsController:
91
95
  ).first()
92
96
  return agent
93
97
 
94
- def get_agent_by_id(self, id: int, project_name: str = 'mindsdb') -> db.Agents:
98
+ def get_agent_by_id(self, id: int, project_name: str = default_project) -> db.Agents:
95
99
  '''
96
100
  Gets an agent by id.
97
101
 
@@ -162,7 +166,7 @@ class AgentsController:
162
166
  ValueError: Agent with given name already exists, or skill/model with given name does not exist.
163
167
  '''
164
168
  if project_name is None:
165
- project_name = 'mindsdb'
169
+ project_name = default_project
166
170
  project = self.project_controller.get(name=project_name)
167
171
 
168
172
  agent = self.get_agent(name, project_name)
@@ -208,7 +212,7 @@ class AgentsController:
208
212
  def update_agent(
209
213
  self,
210
214
  agent_name: str,
211
- project_name: str = 'mindsdb',
215
+ project_name: str = default_project,
212
216
  name: str = None,
213
217
  model_name: str = None,
214
218
  skills_to_add: List[Union[str, dict]] = None,
@@ -347,7 +351,7 @@ class AgentsController:
347
351
 
348
352
  return existing_agent
349
353
 
350
- def delete_agent(self, agent_name: str, project_name: str = 'mindsdb'):
354
+ def delete_agent(self, agent_name: str, project_name: str = default_project):
351
355
  '''
352
356
  Deletes an agent by name.
353
357
 
@@ -371,7 +375,7 @@ class AgentsController:
371
375
  self,
372
376
  agent: db.Agents,
373
377
  messages: List[Dict[str, str]],
374
- project_name: str = 'mindsdb',
378
+ project_name: str = default_project,
375
379
  tools: List[BaseTool] = None,
376
380
  stream: bool = False) -> Union[Iterator[object], pd.DataFrame]:
377
381
  """
@@ -412,7 +416,7 @@ class AgentsController:
412
416
  self,
413
417
  agent: db.Agents,
414
418
  messages: List[Dict[str, str]],
415
- project_name: str = 'mindsdb',
419
+ project_name: str = default_project,
416
420
  tools: List[BaseTool] = None) -> Iterator[object]:
417
421
  '''
418
422
  Queries an agent to get a stream of completion chunks.
@@ -31,8 +31,10 @@ from langchain_core.outputs import (
31
31
  from pydantic import model_validator
32
32
 
33
33
  from mindsdb.interfaces.agents.constants import USER_COLUMN
34
+ from mindsdb.utilities.config import config
34
35
 
35
36
  logger = logging.getLogger(__name__)
37
+ default_project = config.get('default_project')
36
38
 
37
39
 
38
40
  def _convert_message_to_dict(message: BaseMessage) -> dict:
@@ -63,7 +65,7 @@ class ChatMindsdb(BaseChatModel):
63
65
  """A chat model that uses the Mindsdb"""
64
66
 
65
67
  model_name: str
66
- project_name: Optional[str] = 'mindsdb'
68
+ project_name: Optional[str] = default_project
67
69
  model_info: Optional[dict] = None
68
70
  project_datanode: Optional[Any] = None
69
71
 
@@ -139,7 +141,7 @@ class ChatMindsdb(BaseChatModel):
139
141
  from mindsdb.api.executor.controllers import SessionController
140
142
 
141
143
  session = SessionController()
142
- session.database = 'mindsdb'
144
+ session.database = default_project
143
145
 
144
146
  values['model_info'] = session.model_controller.get_model(model_name, project_name=project_name)
145
147
 
@@ -9,7 +9,10 @@ from mindsdb.interfaces.model.functions import get_project_records
9
9
  from mindsdb.utilities.context import context as ctx
10
10
 
11
11
  from mindsdb.api.executor.controllers.session_controller import SessionController
12
- from mindsdb.utilities.config import Config
12
+ from mindsdb.utilities.config import config
13
+
14
+
15
+ default_project = config.get('default_project')
13
16
 
14
17
 
15
18
  class ChatBotController:
@@ -25,7 +28,7 @@ class ChatBotController:
25
28
  self.project_controller = project_controller
26
29
  self.agents_controller = agents_controller
27
30
 
28
- def get_chatbot(self, chatbot_name: str, project_name: str = 'mindsdb') -> dict:
31
+ def get_chatbot(self, chatbot_name: str, project_name: str = default_project) -> dict:
29
32
  '''
30
33
  Gets a chatbot by name.
31
34
 
@@ -118,7 +121,7 @@ class ChatBotController:
118
121
 
119
122
  return bot_obj
120
123
 
121
- def get_chatbots(self, project_name: str = 'mindsdb') -> List[dict]:
124
+ def get_chatbots(self, project_name: str = default_project) -> List[dict]:
122
125
  '''
123
126
  Gets all chatbots in a project.
124
127
 
@@ -199,14 +202,12 @@ class ChatBotController:
199
202
  bot (db.ChatBots): The created chatbot
200
203
  '''
201
204
 
202
- config = Config()
203
-
204
205
  is_cloud = config.get('cloud', False)
205
206
  if is_cloud and ctx.user_class == 0:
206
207
  raise Exception("You can't create chatbot")
207
208
 
208
209
  if project_name is None:
209
- project_name = 'mindsdb'
210
+ project_name = default_project
210
211
  project = self.project_controller.get(name=project_name)
211
212
 
212
213
  bot = self.get_chatbot(name, project_name)
@@ -260,7 +261,7 @@ class ChatBotController:
260
261
  def update_chatbot(
261
262
  self,
262
263
  chatbot_name: str,
263
- project_name: str = 'mindsdb',
264
+ project_name: str = default_project,
264
265
  name: str = None,
265
266
  model_name: str = None,
266
267
  agent_name: str = None,
@@ -338,7 +339,7 @@ class ChatBotController:
338
339
 
339
340
  return existing_chatbot_rec
340
341
 
341
- def delete_chatbot(self, chatbot_name: str, project_name: str = 'mindsdb'):
342
+ def delete_chatbot(self, chatbot_name: str, project_name: str = default_project):
342
343
  '''
343
344
  Deletes a chatbot by name.
344
345
 
@@ -3,6 +3,7 @@ from collections import OrderedDict
3
3
 
4
4
  from mindsdb.interfaces.database.projects import ProjectController
5
5
  import mindsdb.utilities.profiler as profiler
6
+ from mindsdb.utilities.config import config
6
7
  from mindsdb.utilities.exception import EntityNotExistsError
7
8
  from mindsdb.interfaces.database.log import LogDBController
8
9
 
@@ -58,7 +59,7 @@ class DatabaseController:
58
59
  'id': x.id,
59
60
  'engine': None,
60
61
  'visible': True,
61
- 'deletable': x.name.lower() != 'mindsdb'
62
+ 'deletable': x.name.lower() != config.get('default_project')
62
63
  })
63
64
  for key, value in integrations.items():
64
65
  db_type = value.get('type', 'data')
@@ -4,6 +4,7 @@ from typing import List, Optional
4
4
  from collections import OrderedDict
5
5
 
6
6
  import sqlalchemy as sa
7
+ from sqlalchemy.orm.attributes import flag_modified
7
8
  import numpy as np
8
9
 
9
10
  from mindsdb_sql_parser.ast.base import ASTNode
@@ -30,6 +31,7 @@ class Project:
30
31
  p.name = db_record.name
31
32
  p.company_id = ctx.company_id
32
33
  p.id = db_record.id
34
+ p.metadata = db_record.metadata_
33
35
  return p
34
36
 
35
37
  def create(self, name: str):
@@ -405,9 +407,9 @@ class ProjectController:
405
407
 
406
408
  return [Project.from_record(x) for x in records]
407
409
 
408
- def get(self, id: Optional[int] = None, name: Optional[str] = None, deleted: bool = False) -> Project:
410
+ def get(self, id: Optional[int] = None, name: Optional[str] = None, deleted: bool = False, is_default: bool = False) -> Project:
409
411
  if id is not None and name is not None:
410
- raise ValueError("Both 'id' and 'name' is None")
412
+ raise ValueError("Both 'id' and 'name' can't be provided at the same time")
411
413
 
412
414
  company_id = ctx.company_id if ctx.company_id is not None else 0
413
415
  q = db.Project.query.filter_by(company_id=company_id)
@@ -424,6 +426,9 @@ class ProjectController:
424
426
  else:
425
427
  q = q.filter_by(deleted_at=sa.null())
426
428
 
429
+ if is_default:
430
+ q = q.filter(db.Project.metadata_['is_default'].as_boolean() == is_default)
431
+
427
432
  record = q.first()
428
433
 
429
434
  if record is None:
@@ -434,3 +439,24 @@ class ProjectController:
434
439
  project = Project()
435
440
  project.create(name=name)
436
441
  return project
442
+
443
+ def update(self, id: Optional[int] = None, name: Optional[str] = None, new_name: str = None, new_metadata: dict = None) -> Project:
444
+ if id is not None and name is not None:
445
+ raise ValueError("Both 'id' and 'name' can't be provided at the same time")
446
+
447
+ if id is not None:
448
+ project = self.get(id=id)
449
+ else:
450
+ project = self.get(name=name)
451
+
452
+ if new_name is not None:
453
+ project.name = new_name
454
+ project.record.name = new_name
455
+
456
+ if new_metadata is not None:
457
+ project.metadata = new_metadata
458
+ project.record.metadata = new_metadata
459
+ flag_modified(project.record, 'metadata_')
460
+
461
+ db.session.commit()
462
+ return project
@@ -9,6 +9,7 @@ from mindsdb_sql_parser import parse_sql, ParsingException
9
9
  from mindsdb_sql_parser.ast.mindsdb import CreateJob
10
10
  from mindsdb_sql_parser.ast import Select, Star, Identifier, BinaryOperation, Constant
11
11
 
12
+ from mindsdb.utilities.config import config
12
13
  from mindsdb.utilities.context import context as ctx
13
14
  from mindsdb.utilities.exception import EntityNotExistsError, EntityExistsError
14
15
  from mindsdb.interfaces.storage import db
@@ -20,6 +21,8 @@ from mindsdb.utilities import log
20
21
 
21
22
  logger = log.getLogger(__name__)
22
23
 
24
+ default_project = config.get('default_project')
25
+
23
26
 
24
27
  def split_sql(sql):
25
28
  # split sql by ';' ignoring delimiter in quotes
@@ -199,7 +202,7 @@ class JobsController:
199
202
  """
200
203
 
201
204
  if project_name is None:
202
- project_name = 'mindsdb'
205
+ project_name = default_project
203
206
 
204
207
  start_at = None
205
208
  if query.start_str is not None:
@@ -19,6 +19,7 @@ from mindsdb.interfaces.model.functions import (
19
19
  )
20
20
  from mindsdb.interfaces.storage.json import get_json_storage
21
21
  from mindsdb.interfaces.storage.model_fs import ModelStorage
22
+ from mindsdb.utilities.config import config
22
23
  from mindsdb.utilities.context import context as ctx
23
24
  from mindsdb.utilities.functions import resolve_model_identifier
24
25
  import mindsdb.utilities.profiler as profiler
@@ -29,6 +30,8 @@ logger = log.getLogger(__name__)
29
30
 
30
31
  IS_PY36 = sys.version_info[1] <= 6
31
32
 
33
+ default_project = config.get('default_project')
34
+
32
35
 
33
36
  def delete_model_storage(model_id, ctx_dump):
34
37
  try:
@@ -149,7 +152,7 @@ class ModelController():
149
152
  models.append(model_data)
150
153
  return models
151
154
 
152
- def delete_model(self, model_name: str, project_name: str = 'mindsdb', version=None):
155
+ def delete_model(self, model_name: str, project_name: str = default_project, version=None):
153
156
  from mindsdb.interfaces.database.database import DatabaseController
154
157
 
155
158
  project_record = get_project_record(func.lower(project_name))
@@ -344,7 +347,7 @@ class ModelController():
344
347
  def prepare_finetune_statement(self, statement, database_controller):
345
348
  project_name, model_name, model_version = resolve_model_identifier(statement.name)
346
349
  if project_name is None:
347
- project_name = 'mindsdb'
350
+ project_name = default_project
348
351
  data_integration_ref, fetch_data_query = self._get_data_integration_ref(statement, database_controller)
349
352
 
350
353
  set_active = True
@@ -1,33 +1,24 @@
1
+ import traceback
2
+
1
3
  from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
2
4
  from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
5
+ from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
6
+ from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
3
7
 
4
8
  from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
5
9
  from mindsdb.interfaces.skills.skill_tool import skill_tool
6
10
  from mindsdb.interfaces.storage import db
7
11
  from mindsdb.interfaces.storage.db import KnowledgeBase
8
12
  from mindsdb.utilities import log
13
+ from langchain_core.documents import Document
9
14
  from langchain_core.tools import Tool
15
+ from mindsdb.integrations.libs.response import RESPONSE_TYPE
10
16
  from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
11
17
 
12
18
  logger = log.getLogger(__name__)
13
19
 
14
20
 
15
- def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
16
- """
17
- Builds a retrieval tool i.e RAG
18
-
19
- Args:
20
- tool: Tool configuration dictionary
21
- pred_args: Predictor arguments dictionary
22
- skill: Skills database object
23
-
24
- Returns:
25
- Tool: Configured retrieval tool
26
-
27
- Raises:
28
- ValueError: If knowledge base is not found or configuration is invalid
29
- """
30
- # build RAG config
21
+ def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipelineModel:
31
22
  tools_config = tool['config']
32
23
  tools_config.update(pred_args)
33
24
 
@@ -71,34 +62,132 @@ def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
71
62
  logger.debug("Using default embedding model as no knowledge base provided")
72
63
 
73
64
  # Load and validate config
74
- try:
75
- rag_config = load_rag_config(tools_config, kb_params, embeddings_model)
76
- # build retriever
77
- rag_pipeline = RAG(rag_config)
78
- logger.debug(f"RAG pipeline created with config: {rag_config}")
79
-
80
- def rag_wrapper(query: str) -> str:
81
- try:
82
- result = rag_pipeline(query)
83
- logger.debug(f"RAG pipeline result: {result}")
84
- return result['answer']
85
- except Exception as e:
86
- logger.error(f"Error in RAG pipeline: {str(e)}")
87
- return f"Error in retrieval: {str(e)}"
88
-
89
- # Create RAG tool
90
- return Tool(
91
- func=rag_wrapper,
92
- name=tool['name'],
93
- description=tool['description'],
94
- response_format='content',
95
- # Return directly by default since we already use an LLM against retrieved context to generate a response.
96
- return_direct=tools_config.get('return_direct', True)
65
+ return load_rag_config(tools_config, kb_params, embeddings_model)
66
+
67
+
68
+ def _build_rag_pipeline_tool(tool: dict, pred_args: dict, skill: db.Skills):
69
+ rag_config = _load_rag_config(tool, pred_args, skill)
70
+ # build retriever
71
+ rag_pipeline = RAG(rag_config)
72
+ logger.debug(f"RAG pipeline created with config: {rag_config}")
73
+
74
+ def rag_wrapper(query: str) -> str:
75
+ try:
76
+ result = rag_pipeline(query)
77
+ logger.debug(f"RAG pipeline result: {result}")
78
+ return result['answer']
79
+ except Exception as e:
80
+ logger.error(f"Error in RAG pipeline: {str(e)}")
81
+ logger.error(traceback.format_exc())
82
+ return f"Error in retrieval: {str(e)}"
83
+
84
+ # Create RAG tool
85
+ tools_config = tool['config']
86
+ tools_config.update(pred_args)
87
+ return Tool(
88
+ func=rag_wrapper,
89
+ name=tool['name'],
90
+ description=tool['description'],
91
+ response_format='content',
92
+ # Return directly by default since we already use an LLM against retrieved context to generate a response.
93
+ return_direct=tools_config.get('return_direct', True)
94
+ )
95
+
96
+
97
+ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
98
+ if 'source' not in tool:
99
+ raise ValueError("Knowledge base for tool not found")
100
+ kb_name = tool['source']
101
+ executor = skill_tool.get_command_executor()
102
+ kb = _get_knowledge_base(kb_name, skill.project_id, executor)
103
+ if not kb:
104
+ raise ValueError(f"Knowledge base not found: {kb_name}")
105
+ kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
106
+ vector_db_handler = kb_table.get_vector_db()
107
+
108
+ rag_config = _load_rag_config(tool, pred_args, skill)
109
+ metadata_config = rag_config.metadata_config
110
+
111
+ def _get_document_by_name(name: str):
112
+ if metadata_config.name_column_index is not None:
113
+ tsquery_str = ' & '.join(name.split(' '))
114
+ documents_response = vector_db_handler.native_query(
115
+ f'SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery(\'{tsquery_str}\') LIMIT 1;'
116
+ )
117
+ else:
118
+ documents_response = vector_db_handler.native_query(
119
+ f'SELECT * FROM {metadata_config.table} WHERE "{metadata_config.name_column}" ILIKE \'%{name}%\' LIMIT 1;'
120
+ )
121
+ if documents_response.resp_type == RESPONSE_TYPE.ERROR:
122
+ raise RuntimeError(f'There was an error looking up documents: {documents_response.error_message}')
123
+ if documents_response.data_frame.empty:
124
+ return None
125
+ document_row = documents_response.data_frame.head(1)
126
+ # Restore document from chunks, keeping in mind max context.
127
+ id_filter_condition = FilterCondition(
128
+ f"{metadata_config.embeddings_metadata_column}->>'{metadata_config.doc_id_key}'",
129
+ FilterOperator.EQUAL,
130
+ str(document_row.get(metadata_config.id_column).item())
131
+ )
132
+ document_chunks_df = vector_db_handler.select(
133
+ metadata_config.embeddings_table,
134
+ conditions=[id_filter_condition]
97
135
  )
136
+ if document_chunks_df.empty:
137
+ return None
138
+ sort_col = 'chunk_id' if 'chunk_id' in document_chunks_df.columns else 'id'
139
+ document_chunks_df.sort_values(by=sort_col)
140
+ content = ''
141
+ for _, chunk in document_chunks_df.iterrows():
142
+ if len(content) > metadata_config.max_document_context:
143
+ break
144
+ content += chunk.get(metadata_config.content_column, '')
145
+
146
+ return Document(
147
+ page_content=content,
148
+ metadata=document_row.to_dict(orient='records')[0]
149
+ )
150
+
151
+ def _lookup_document_by_name(name: str):
152
+ found_document = _get_document_by_name(name)
153
+ if found_document is None:
154
+ return f'I could not find any document with name {name}. Please make sure the document name matches exactly.'
155
+ return f"I found document {found_document.metadata.get(metadata_config.id_column)} with name {found_document.metadata.get(metadata_config.name_column)}. Here is the full document to use as context:\n\n{found_document.page_content}"
156
+
157
+ return Tool(
158
+ func=_lookup_document_by_name,
159
+ name=tool.get('name', '') + '_name_lookup',
160
+ description='You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.',
161
+ return_direct=False
162
+ )
98
163
 
164
+
165
+ def build_retrieval_tools(tool: dict, pred_args: dict, skill: db.Skills):
166
+ """
167
+ Builds a list of tools for retrieval i.e RAG
168
+
169
+ Args:
170
+ tool: Tool configuration dictionary
171
+ pred_args: Predictor arguments dictionary
172
+ skill: Skills database object
173
+
174
+ Returns:
175
+ Tool: Configured list of retrieval tools
176
+
177
+ Raises:
178
+ ValueError: If knowledge base is not found or configuration is invalid
179
+ """
180
+ # Catch configuration errors before creating tools.
181
+ try:
182
+ rag_config = _load_rag_config(tool, pred_args, skill)
99
183
  except Exception as e:
100
184
  logger.error(f"Error building RAG pipeline: {str(e)}")
101
185
  raise ValueError(f"Failed to build RAG pipeline: {str(e)}")
186
+ tools = [_build_rag_pipeline_tool(tool, pred_args, skill)]
187
+ if rag_config.metadata_config is None:
188
+ return tools
189
+ tools.append(_build_name_lookup_tool(tool, pred_args, skill))
190
+ return tools
102
191
 
103
192
 
104
193
  def _get_knowledge_base(knowledge_base_name: str, project_id, executor) -> KnowledgeBase:
@@ -10,6 +10,7 @@ from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant
10
10
 
11
11
  from mindsdb.utilities import log
12
12
  from mindsdb.utilities.cache import get_cache
13
+ from mindsdb.utilities.config import config
13
14
  from mindsdb.interfaces.storage import db
14
15
  from mindsdb.interfaces.skills.sql_agent import SQLAgent
15
16
  from mindsdb.integrations.libs.vectordatabase_handler import TableField
@@ -106,7 +107,7 @@ class SkillToolController:
106
107
  from mindsdb.api.executor.controllers import SessionController # Top-level import produces circular import in some cases TODO: figure out a fix without losing runtime improvements (context: see #9304) # noqa
107
108
 
108
109
  sql_session = SessionController()
109
- sql_session.database = 'mindsdb'
110
+ sql_session.database = config.get('default_project')
110
111
 
111
112
  self.command_executor = ExecuteCommands(sql_session)
112
113
  return self.command_executor
@@ -222,8 +223,8 @@ class SkillToolController:
222
223
  pred_args = {}
223
224
  pred_args['llm'] = llm
224
225
 
225
- from .retrieval_tool import build_retrieval_tool
226
- return build_retrieval_tool(tool, pred_args, skill)
226
+ from .retrieval_tool import build_retrieval_tools
227
+ return build_retrieval_tools(tool, pred_args, skill)
227
228
 
228
229
  def _get_rag_query_function(self, skill: db.Skills):
229
230
  session_controller = self.get_command_executor().session
@@ -295,10 +296,9 @@ class SkillToolController:
295
296
  for skill in skills
296
297
  ]
297
298
  elif skill_type == SkillType.RETRIEVAL:
298
- tools[skill_type] = [
299
- self._make_retrieval_tools(skill, llm, embedding_model)
300
- for skill in skills
301
- ]
299
+ tools[skill_type] = []
300
+ for skill in skills:
301
+ tools[skill_type] += self._make_retrieval_tools(skill, llm, embedding_model)
302
302
  return tools
303
303
 
304
304
 
@@ -6,6 +6,10 @@ from sqlalchemy.orm.attributes import flag_modified
6
6
 
7
7
  from mindsdb.interfaces.storage import db
8
8
  from mindsdb.interfaces.database.projects import ProjectController
9
+ from mindsdb.utilities.config import config
10
+
11
+
12
+ default_project = config.get('default_project')
9
13
 
10
14
 
11
15
  class SkillsController:
@@ -16,7 +20,7 @@ class SkillsController:
16
20
  project_controller = ProjectController()
17
21
  self.project_controller = project_controller
18
22
 
19
- def get_skill(self, skill_name: str, project_name: str = 'mindsdb') -> Optional[db.Skills]:
23
+ def get_skill(self, skill_name: str, project_name: str = default_project) -> Optional[db.Skills]:
20
24
  '''
21
25
  Gets a skill by name. Skills are expected to have unique names.
22
26
 
@@ -90,7 +94,7 @@ class SkillsController:
90
94
  ValueError: If `project_name` does not exist or skill already exists
91
95
  '''
92
96
  if project_name is None:
93
- project_name = 'mindsdb'
97
+ project_name = default_project
94
98
  project = self.project_controller.get(name=project_name)
95
99
 
96
100
  skill = self.get_skill(name, project_name)
@@ -113,7 +117,7 @@ class SkillsController:
113
117
  self,
114
118
  skill_name: str,
115
119
  new_name: str = None,
116
- project_name: str = 'mindsdb',
120
+ project_name: str = default_project,
117
121
  type: str = None,
118
122
  params: Dict[str, str] = None):
119
123
  '''
@@ -158,7 +162,7 @@ class SkillsController:
158
162
 
159
163
  return existing_skill
160
164
 
161
- def delete_skill(self, skill_name: str, project_name: str = 'mindsdb'):
165
+ def delete_skill(self, skill_name: str, project_name: str = default_project):
162
166
  '''
163
167
  Deletes a skill by name.
164
168
 
@@ -10,6 +10,7 @@ from sqlalchemy import (
10
10
  DateTime,
11
11
  Index,
12
12
  Integer,
13
+ LargeBinary,
13
14
  Numeric,
14
15
  String,
15
16
  UniqueConstraint,
@@ -213,6 +214,7 @@ class Project(Base):
213
214
  deleted_at = Column(DateTime)
214
215
  name = Column(String, nullable=False)
215
216
  company_id = Column(Integer, default=0)
217
+ metadata_: dict = Column("metadata", JSON, nullable=True)
216
218
  __table_args__ = (
217
219
  UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
218
220
  )
@@ -276,8 +278,20 @@ class JsonStorage(Base):
276
278
  resource_id = Column(Integer)
277
279
  name = Column(String)
278
280
  content = Column(JSON)
281
+ encrypted_content = Column(LargeBinary, nullable=True)
279
282
  company_id = Column(Integer)
280
283
 
284
+ def to_dict(self) -> Dict:
285
+ return {
286
+ "id": self.id,
287
+ "resource_group": self.resource_group,
288
+ "resource_id": self.resource_id,
289
+ "name": self.name,
290
+ "content": self.content,
291
+ "encrypted_content": self.encrypted_content,
292
+ "company_id": self.company_id,
293
+ }
294
+
281
295
 
282
296
  class Jobs(Base):
283
297
  __tablename__ = "jobs"