MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (77) hide show
  1. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
  2. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/__main__.py +5 -3
  5. mindsdb/api/executor/__init__.py +0 -1
  6. mindsdb/api/executor/command_executor.py +2 -1
  7. mindsdb/api/executor/data_types/answer.py +1 -1
  8. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
  9. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
  10. mindsdb/api/executor/sql_query/__init__.py +1 -0
  11. mindsdb/api/executor/sql_query/result_set.py +36 -21
  12. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
  13. mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
  14. mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
  15. mindsdb/api/executor/utilities/sql.py +2 -10
  16. mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
  17. mindsdb/api/http/namespaces/sql.py +3 -1
  18. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
  19. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
  20. mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
  21. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
  22. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
  24. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
  25. mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
  26. mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
  27. mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
  28. mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
  29. mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
  30. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
  31. mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
  32. mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
  33. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
  34. mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
  35. mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
  36. mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
  37. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
  38. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
  39. mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
  40. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
  41. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
  42. mindsdb/integrations/utilities/rag/settings.py +8 -2
  43. mindsdb/integrations/utilities/sql_utils.py +1 -1
  44. mindsdb/interfaces/agents/agents_controller.py +3 -5
  45. mindsdb/interfaces/agents/langchain_agent.py +112 -150
  46. mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
  47. mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
  48. mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
  49. mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
  50. mindsdb/interfaces/chatbot/memory.py +58 -13
  51. mindsdb/interfaces/database/projects.py +17 -15
  52. mindsdb/interfaces/database/views.py +12 -25
  53. mindsdb/interfaces/knowledge_base/controller.py +39 -15
  54. mindsdb/interfaces/model/functions.py +15 -4
  55. mindsdb/interfaces/model/model_controller.py +4 -7
  56. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
  57. mindsdb/interfaces/skills/retrieval_tool.py +10 -3
  58. mindsdb/interfaces/skills/skill_tool.py +97 -53
  59. mindsdb/interfaces/skills/sql_agent.py +77 -36
  60. mindsdb/interfaces/storage/db.py +1 -1
  61. mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
  62. mindsdb/utilities/cache.py +7 -4
  63. mindsdb/utilities/context.py +11 -1
  64. mindsdb/utilities/langfuse.py +264 -0
  65. mindsdb/utilities/log.py +20 -2
  66. mindsdb/utilities/otel/__init__.py +206 -0
  67. mindsdb/utilities/otel/logger.py +25 -0
  68. mindsdb/utilities/otel/meter.py +19 -0
  69. mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
  70. mindsdb/utilities/otel/tracer.py +16 -0
  71. mindsdb/utilities/partitioning.py +52 -0
  72. mindsdb/utilities/render/sqlalchemy_render.py +7 -1
  73. mindsdb/utilities/utils.py +34 -0
  74. mindsdb/utilities/otel.py +0 -72
  75. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
  76. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
  77. {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
@@ -53,15 +53,23 @@ class ChatBotTask(BaseTask):
53
53
 
54
54
  chat_params = self.chat_handler.get_chat_config()
55
55
  polling = chat_params['polling']['type']
56
+
57
+ memory = chat_params['memory']['type'] if 'memory' in chat_params else None
58
+ memory_cls = None
59
+ if memory:
60
+ memory_cls = DBMemory if memory == 'db' else HandlerMemory
61
+
56
62
  if polling == 'message_count':
57
63
  chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
58
64
  self.chat_pooling = MessageCountPolling(self, chat_params)
59
- self.memory = HandlerMemory(self, chat_params)
65
+ # The default type for message count polling is HandlerMemory if not specified.
66
+ self.memory = HandlerMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
60
67
 
61
68
  elif polling == 'realtime':
62
69
  chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
63
70
  self.chat_pooling = RealtimePolling(self, chat_params)
64
- self.memory = DBMemory(self, chat_params)
71
+ # The default type for real-time polling is DBMemory if not specified.
72
+ self.memory = DBMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
65
73
 
66
74
  elif polling == 'webhook':
67
75
  self.chat_pooling = WebhookPolling(self, chat_params)
@@ -80,11 +88,11 @@ class ChatBotTask(BaseTask):
80
88
  self.chat_pooling.run(stop_event)
81
89
 
82
90
  def on_message(self, message: ChatBotMessage, chat_id=None, chat_memory=None, table_name=None):
83
- if not chat_id and chat_memory:
91
+ if not chat_id and not chat_memory:
84
92
  raise Exception('chat_id or chat_memory should be provided')
85
93
 
86
94
  try:
87
- self._on_holding_message(chat_id, table_name)
95
+ self._on_holding_message(chat_id, chat_memory, table_name)
88
96
  self._on_message(message, chat_id, chat_memory, table_name)
89
97
  except (SystemExit, KeyboardInterrupt):
90
98
  raise
@@ -93,15 +101,18 @@ class ChatBotTask(BaseTask):
93
101
  logger.error(error)
94
102
  self.set_error(str(error))
95
103
 
96
- def _on_holding_message(self, chat_id: str, table_name: str = None):
104
+ def _on_holding_message(self, chat_id: str = None, chat_memory: BaseMemory = None, table_name: str = None):
97
105
  """
98
106
  Send a message to hold the user's attention while the bot is processing the request.
99
107
  This message will not be saved in the chat memory.
100
108
 
101
109
  Args:
102
110
  chat_id (str): The ID of the chat.
111
+ chat_memory (BaseMemory): The memory of the chat.
103
112
  table_name (str): The name of the table.
104
113
  """
114
+ chat_id = chat_id if chat_id else chat_memory.chat_id
115
+
105
116
  response_message = ChatBotMessage(
106
117
  ChatBotMessage.Type.DIRECT,
107
118
  HOLDING_MESSAGE,
@@ -1,9 +1,9 @@
1
1
 
2
+ from typing import Union
3
+
2
4
  from mindsdb_sql_parser.ast import Identifier, Select, BinaryOperation, Constant, OrderBy
3
5
 
4
6
  from mindsdb.interfaces.storage import db
5
-
6
-
7
7
  from .types import ChatBotMessage
8
8
 
9
9
 
@@ -60,7 +60,7 @@ class BaseMemory:
60
60
 
61
61
  # If the chat_id is a tuple, convert it to a string when storing the message in the database.
62
62
  self._add_to_history(
63
- str(chat_id) if isinstance(chat_id, tuple) else chat_id,
63
+ chat_id,
64
64
  chat_message,
65
65
  table_name=table_name
66
66
  )
@@ -74,7 +74,7 @@ class BaseMemory:
74
74
 
75
75
  else:
76
76
  history = self._get_chat_history(
77
- str(chat_id) if isinstance(chat_id, tuple) else chat_id,
77
+ chat_id,
78
78
  table_name
79
79
  )
80
80
  self._cache[key] = history
@@ -108,18 +108,44 @@ class HandlerMemory(BaseMemory):
108
108
  time_col = t_params['time_col']
109
109
  chat_id_cols = t_params['chat_id_col'] if isinstance(t_params['chat_id_col'], list) else [t_params['chat_id_col']]
110
110
 
111
- ast_query = Select(
112
- targets=[Identifier(text_col),
113
- Identifier(username_col),
114
- Identifier(time_col)],
115
- from_table=Identifier(t_params['name']),
116
- where=[BinaryOperation(
111
+ chat_id = chat_id if isinstance(chat_id, tuple) else (chat_id,)
112
+ # Add a WHERE clause for each chat_id column.
113
+ where_conditions = [
114
+ BinaryOperation(
117
115
  op='=',
118
116
  args=[
119
117
  Identifier(chat_id_col),
120
118
  Constant(chat_id[idx])
121
119
  ]
122
- ) for idx, chat_id_col in enumerate(chat_id_cols)],
120
+ ) for idx, chat_id_col in enumerate(chat_id_cols)
121
+ ]
122
+ # Add a WHERE clause to ignore holding messages from the bot.
123
+ from .chatbot_task import HOLDING_MESSAGE
124
+
125
+ where_conditions.append(
126
+ BinaryOperation(
127
+ op='!=',
128
+ args=[
129
+ Identifier(text_col),
130
+ Constant(HOLDING_MESSAGE)
131
+ ]
132
+ )
133
+ )
134
+
135
+ # Convert the WHERE conditions to a BinaryOperation object.
136
+ where_conditions_binary_operation = None
137
+ for condition in where_conditions:
138
+ if where_conditions_binary_operation is None:
139
+ where_conditions_binary_operation = condition
140
+ else:
141
+ where_conditions_binary_operation = BinaryOperation('and', args=[where_conditions_binary_operation, condition])
142
+
143
+ ast_query = Select(
144
+ targets=[Identifier(text_col),
145
+ Identifier(username_col),
146
+ Identifier(time_col)],
147
+ from_table=Identifier(t_params['name']),
148
+ where=where_conditions_binary_operation,
123
149
  order_by=[OrderBy(Identifier(time_col))],
124
150
  limit=Constant(self.MAX_DEPTH),
125
151
  )
@@ -151,9 +177,28 @@ class DBMemory(BaseMemory):
151
177
  uses mindsdb database to store messages
152
178
  '''
153
179
 
180
+ def _generate_chat_id_for_db(self, chat_id: Union[str, tuple], table_name: str = None) -> str:
181
+ """
182
+ Generate an ID for the chat to store in the database.
183
+ The ID is a string that includes the components of the chat ID and the table name (if provided) separated by underscores.
184
+
185
+ Args:
186
+ chat_id (str | tuple): The ID of the chat.
187
+ table_name (str): The name of the table the chat belongs to.
188
+ """
189
+ if isinstance(chat_id, tuple):
190
+ char_id_str = "_".join(str(val) for val in chat_id)
191
+ else:
192
+ char_id_str = str(chat_id)
193
+
194
+ if table_name:
195
+ chat_id_str = f"{table_name}_{char_id_str}"
196
+
197
+ return chat_id_str
198
+
154
199
  def _add_to_history(self, chat_id, message, table_name=None):
155
200
  chat_bot_id = self.chat_task.bot_id
156
- destination = str((chat_id, table_name)) if table_name else chat_id
201
+ destination = self._generate_chat_id_for_db(chat_id, table_name)
157
202
 
158
203
  message = db.ChatBotsHistory(
159
204
  chat_bot_id=chat_bot_id,
@@ -167,7 +212,7 @@ class DBMemory(BaseMemory):
167
212
 
168
213
  def _get_chat_history(self, chat_id, table_name=None):
169
214
  chat_bot_id = self.chat_task.bot_id
170
- destination = str((chat_id, table_name)) if table_name else chat_id
215
+ destination = self._generate_chat_id_for_db(chat_id, table_name)
171
216
 
172
217
  query = db.ChatBotsHistory.query\
173
218
  .filter(
@@ -24,19 +24,14 @@ class Project:
24
24
  p = Project()
25
25
  p.record = db_record
26
26
  p.name = db_record.name
27
- p.company_id = db_record.company_id
27
+ p.company_id = ctx.company_id
28
28
  p.id = db_record.id
29
29
  return p
30
30
 
31
31
  def create(self, name: str):
32
32
  name = name.lower()
33
- existing_record = db.Project.query.filter(
34
- (sa.func.lower(db.Project.name) == name)
35
- & (db.Project.company_id == ctx.company_id)
36
- & (db.Project.deleted_at == sa.null())
37
- ).first()
38
- if existing_record is not None:
39
- raise EntityExistsError('Project already exists', name)
33
+
34
+ company_id = ctx.company_id if ctx.company_id is not None else 0
40
35
 
41
36
  existing_record = db.Integration.query.filter(
42
37
  sa.func.lower(db.Integration.name) == name,
@@ -45,23 +40,28 @@ class Project:
45
40
  if existing_record is not None:
46
41
  raise EntityExistsError('Database exists with this name ', name)
47
42
 
43
+ existing_record = db.Project.query.filter(
44
+ (sa.func.lower(db.Project.name) == name)
45
+ & (db.Project.company_id == company_id)
46
+ & (db.Project.deleted_at == sa.null())
47
+ ).first()
48
+ if existing_record is not None:
49
+ raise EntityExistsError('Project already exists', name)
50
+
48
51
  record = db.Project(
49
52
  name=name,
50
- company_id=ctx.company_id
53
+ company_id=company_id
51
54
  )
52
55
 
53
56
  self.record = record
54
57
  self.name = name
55
- self.company_id = ctx.company_id
58
+ self.company_id = company_id
56
59
 
57
60
  db.session.add(record)
58
61
  db.session.commit()
59
62
 
60
63
  self.id = record.id
61
64
 
62
- def save(self):
63
- db.session.commit()
64
-
65
65
  def delete(self):
66
66
  tables = self.get_tables()
67
67
  tables = [key for key, val in tables.items() if val['type'] != 'table']
@@ -360,8 +360,9 @@ class ProjectController:
360
360
  pass
361
361
 
362
362
  def get_list(self) -> List[Project]:
363
+ company_id = ctx.company_id if ctx.company_id is not None else 0
363
364
  records = db.Project.query.filter(
364
- (db.Project.company_id == ctx.company_id)
365
+ (db.Project.company_id == company_id)
365
366
  & (db.Project.deleted_at == sa.null())
366
367
  ).order_by(db.Project.name)
367
368
 
@@ -371,7 +372,8 @@ class ProjectController:
371
372
  if id is not None and name is not None:
372
373
  raise ValueError("Both 'id' and 'name' is None")
373
374
 
374
- q = db.Project.query.filter_by(company_id=ctx.company_id)
375
+ company_id = ctx.company_id if ctx.company_id is not None else 0
376
+ q = db.Project.query.filter_by(company_id=company_id)
375
377
 
376
378
  if id is not None:
377
379
  q = q.filter_by(id=id)
@@ -3,6 +3,7 @@ from mindsdb.interfaces.storage import db
3
3
  from mindsdb.interfaces.query_context.context_controller import query_context_controller
4
4
  from mindsdb.utilities.context import context as ctx
5
5
  from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
6
+ from mindsdb.interfaces.model.functions import get_project_record, get_project_records
6
7
 
7
8
 
8
9
  class ViewController:
@@ -39,11 +40,8 @@ class ViewController:
39
40
 
40
41
  def update(self, name, query, project_name):
41
42
  name = name.lower()
42
- project_record = db.session.query(db.Project).filter_by(
43
- name=project_name,
44
- company_id=ctx.company_id,
45
- deleted_at=None
46
- ).first()
43
+ project_record = get_project_record(project_name)
44
+
47
45
  rec = db.session.query(db.View).filter(
48
46
  func.lower(db.View.name) == name,
49
47
  db.View.company_id == ctx.company_id,
@@ -56,11 +54,8 @@ class ViewController:
56
54
 
57
55
  def delete(self, name, project_name):
58
56
  name = name.lower()
59
- project_record = db.session.query(db.Project).filter_by(
60
- name=project_name,
61
- company_id=ctx.company_id,
62
- deleted_at=None
63
- ).first()
57
+ project_record = get_project_record(project_name)
58
+
64
59
  rec = db.session.query(db.View).filter(
65
60
  func.lower(db.View.name) == name,
66
61
  db.View.company_id == ctx.company_id,
@@ -74,17 +69,12 @@ class ViewController:
74
69
  query_context_controller.drop_query_context('view', rec.id)
75
70
 
76
71
  def list(self, project_name):
77
- query = db.session.query(db.Project).filter_by(
78
- company_id=ctx.company_id,
79
- deleted_at=None
80
- )
81
- if project_name is not None:
82
- query = query.filter_by(name=project_name)
83
72
 
84
- project_names = {
85
- i.id: i.name
86
- for i in query
87
- }
73
+ project_names = {}
74
+ for project in get_project_records():
75
+ if project_name is not None and project.name != project_name:
76
+ continue
77
+ project_names[project.id] = project.name
88
78
 
89
79
  query = db.session.query(db.View).filter(
90
80
  db.View.company_id == ctx.company_id,
@@ -112,11 +102,8 @@ class ViewController:
112
102
  }
113
103
 
114
104
  def get(self, id=None, name=None, project_name=None):
115
- project_record = db.session.query(db.Project).filter_by(
116
- name=project_name,
117
- company_id=ctx.company_id,
118
- deleted_at=None
119
- ).first()
105
+ project_record = get_project_record(project_name)
106
+
120
107
  if id is not None:
121
108
  records = db.session.query(db.View).filter_by(
122
109
  id=id,
@@ -52,6 +52,7 @@ class KnowledgeBaseTable:
52
52
  self.session = session
53
53
  self.document_preprocessor = None
54
54
  self.document_loader = None
55
+ self.model_params = None
55
56
 
56
57
  def configure_preprocessing(self, config: Optional[dict] = None):
57
58
  """Configure preprocessing for the knowledge base table"""
@@ -488,6 +489,7 @@ class KnowledgeBaseTable:
488
489
  df_out = project_datanode.predict(
489
490
  model_name=model_rec.name,
490
491
  df=df,
492
+ params=self.model_params
491
493
  )
492
494
 
493
495
  target = model_rec.to_predict[0]
@@ -642,11 +644,13 @@ class KnowledgeBaseController:
642
644
  storage: Identifier,
643
645
  params: dict,
644
646
  preprocessing_config: Optional[dict] = None,
645
- if_not_exists: bool = False,
647
+ if_not_exists: bool = False
646
648
  ) -> db.KnowledgeBase:
647
649
  """
648
650
  Add a new knowledge base to the database
649
651
  :param preprocessing_config: Optional preprocessing configuration to validate and store
652
+ :param is_sparse: Whether to use sparse vectors for embeddings
653
+ :param vector_size: Optional size specification for vectors, required when is_sparse=True
650
654
  """
651
655
  # Validate preprocessing config first if provided
652
656
  if preprocessing_config is not None:
@@ -654,6 +658,12 @@ class KnowledgeBaseController:
654
658
  params = params or {}
655
659
  params['preprocessing'] = preprocessing_config
656
660
 
661
+ # Check if vector_size is provided when using sparse vectors
662
+ is_sparse = params.get('is_sparse')
663
+ vector_size = params.get('vector_size')
664
+ if is_sparse and vector_size is None:
665
+ raise ValueError("vector_size is required when is_sparse=True")
666
+
657
667
  # get project id
658
668
  project = self.session.database_controller.get_project(project_name)
659
669
  project_id = project.id
@@ -693,7 +703,20 @@ class KnowledgeBaseController:
693
703
  cloud_pg_vector = os.environ.get('KB_PGVECTOR_URL')
694
704
  if cloud_pg_vector:
695
705
  vector_table_name = name
696
- vector_db_name = self._create_persistent_pgvector()
706
+ # Add sparse vector support for pgvector
707
+ vector_db_params = {}
708
+ # Check both explicit parameter and model configuration
709
+ is_sparse = is_sparse or model_record.learn_args.get('using', {}).get('sparse')
710
+ if is_sparse:
711
+ vector_db_params['is_sparse'] = True
712
+ if vector_size is not None:
713
+ vector_db_params['vector_size'] = vector_size
714
+ vector_db_name = self._create_persistent_pgvector(vector_db_params)
715
+
716
+ # create table in vectordb before creating KB
717
+ self.session.datahub.get(vector_db_name).integration_handler.create_table(
718
+ vector_table_name
719
+ )
697
720
  else:
698
721
  # create chroma db with same name
699
722
  vector_table_name = "default_collection"
@@ -707,15 +730,14 @@ class KnowledgeBaseController:
707
730
 
708
731
  vector_database_id = self.session.integration_controller.get(vector_db_name)['id']
709
732
 
710
- # create table in vectordb
711
- if model_record.learn_args.get('using', {}).get('sparse') is not None:
712
- self.session.datahub.get(vector_db_name).integration_handler.create_table(
713
- vector_table_name, sparse=model_record.learn_args.get('using', {}).get('sparse')
714
- )
715
- else:
716
- self.session.datahub.get(vector_db_name).integration_handler.create_table(
717
- vector_table_name
718
- )
733
+ # Store sparse vector settings in params if specified
734
+ if is_sparse:
735
+ params = params or {}
736
+ params['vector_config'] = {
737
+ 'is_sparse': is_sparse
738
+ }
739
+ if vector_size is not None:
740
+ params['vector_config']['vector_size'] = vector_size
719
741
 
720
742
  kb = db.KnowledgeBase(
721
743
  name=name,
@@ -729,16 +751,15 @@ class KnowledgeBaseController:
729
751
  db.session.commit()
730
752
  return kb
731
753
 
732
- def _create_persistent_pgvector(self):
754
+ def _create_persistent_pgvector(self, params=None):
733
755
  """Create default vector database for knowledge base, if not specified"""
734
-
735
756
  vector_store_name = "kb_pgvector_store"
736
757
 
737
758
  # check if exists
738
759
  if self.session.integration_controller.get(vector_store_name):
739
760
  return vector_store_name
740
761
 
741
- self.session.integration_controller.add(vector_store_name, 'pgvector', {})
762
+ self.session.integration_controller.add(vector_store_name, 'pgvector', params or {})
742
763
  return vector_store_name
743
764
 
744
765
  def _create_persistent_chroma(self, kb_name, engine="chromadb"):
@@ -840,16 +861,19 @@ class KnowledgeBaseController:
840
861
  )
841
862
  return kb
842
863
 
843
- def get_table(self, name: str, project_id: int) -> KnowledgeBaseTable:
864
+ def get_table(self, name: str, project_id: int, params: dict = None) -> KnowledgeBaseTable:
844
865
  """
845
866
  Returns kb table object with properly configured preprocessing
846
867
  :param name: table name
847
868
  :param project_id: project id
869
+ :param params: runtime parameters for KB. Keys: 'model' - parameters for embedding model
848
870
  :return: kb table object
849
871
  """
850
872
  kb = self.get(name, project_id)
851
873
  if kb is not None:
852
874
  table = KnowledgeBaseTable(kb, self.session)
875
+ if params:
876
+ table.model_params = params.get('model')
853
877
 
854
878
  # Always configure preprocessing - either from params or default
855
879
  if kb.params and 'preprocessing' in kb.params:
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Optional, List
2
2
 
3
3
  from sqlalchemy import null, func
4
4
 
@@ -41,9 +41,7 @@ def get_integration_record(name: str) -> db.Integration:
41
41
 
42
42
  @profiler.profile()
43
43
  def get_project_record(name: str) -> db.Project:
44
- company_id = ctx.company_id
45
- if company_id is None:
46
- company_id = null()
44
+ company_id = ctx.company_id if ctx.company_id is not None else 0
47
45
 
48
46
  project_record = (
49
47
  db.session.query(db.Project)
@@ -56,6 +54,19 @@ def get_project_record(name: str) -> db.Project:
56
54
  return project_record
57
55
 
58
56
 
57
+ @profiler.profile()
58
+ def get_project_records() -> List[db.Project]:
59
+ company_id = ctx.company_id if ctx.company_id is not None else 0
60
+
61
+ return (
62
+ db.session.query(db.Project)
63
+ .filter(
64
+ (db.Project.company_id == company_id)
65
+ & (db.Project.deleted_at == null())
66
+ ).all()
67
+ )
68
+
69
+
59
70
  @profiler.profile()
60
71
  def get_predictor_integration(record: db.Predictor) -> db.Integration:
61
72
  integration_record = (
@@ -7,14 +7,15 @@ from multiprocessing.pool import ThreadPool
7
7
  import pandas as pd
8
8
  from dateutil.parser import parse as parse_datetime
9
9
 
10
- from sqlalchemy import func, null
10
+ from sqlalchemy import func
11
11
  import numpy as np
12
12
 
13
13
  import mindsdb.interfaces.storage.db as db
14
14
  from mindsdb.utilities.config import Config
15
15
  from mindsdb.interfaces.model.functions import (
16
16
  get_model_record,
17
- get_model_records
17
+ get_model_records,
18
+ get_project_record
18
19
  )
19
20
  from mindsdb.interfaces.storage.json import get_json_storage
20
21
  from mindsdb.interfaces.storage.model_fs import ModelStorage
@@ -151,11 +152,7 @@ class ModelController():
151
152
  def delete_model(self, model_name: str, project_name: str = 'mindsdb', version=None):
152
153
  from mindsdb.interfaces.database.database import DatabaseController
153
154
 
154
- project_record = db.Project.query.filter(
155
- (func.lower(db.Project.name) == func.lower(project_name))
156
- & (db.Project.company_id == ctx.company_id)
157
- & (db.Project.deleted_at == null())
158
- ).first()
155
+ project_record = get_project_record(func.lower(project_name))
159
156
  if project_record is None:
160
157
  raise Exception(f"Project '{project_name}' does not exists")
161
158
 
@@ -1,4 +1,5 @@
1
1
  from typing import List
2
+ from textwrap import dedent
2
3
 
3
4
  from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
4
5
  from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool
@@ -11,7 +12,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
11
12
 
12
13
  def get_tools(self, prefix='') -> List[BaseTool]:
13
14
  """Get the tools in the toolkit."""
14
- list_sql_database_tool = ListSQLDatabaseTool(name=f'sql_db_list_tables{prefix}', db=self.db)
15
+ list_sql_database_tool = ListSQLDatabaseTool(
16
+ name=f'sql_db_list_tables{prefix}',
17
+ db=self.db,
18
+ description=(
19
+ "Input is an empty string, output is a comma-separated list of tables in the database. "
20
+ "Each table name in the list may be in one of two formats: database_name.table_name or "
21
+ "database_name.schema_name.table_name."
22
+ )
23
+ )
15
24
 
16
25
  info_sql_database_tool_description = (
17
26
  "Input: A comma-separated list of tables. Output: Schema and sample rows for those tables. "
@@ -25,43 +34,43 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
25
34
  db=self.db, description=info_sql_database_tool_description
26
35
  )
27
36
 
28
- query_sql_database_tool_description = (
29
- "Input: A detailed SQL query. Output: Database result or error message. "
30
- "For errors, rewrite and retry the query. For 'Unknown column' errors, use "
31
- f"{info_sql_database_tool.name} to check table fields. "
32
- "This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases. "
33
- "Follow these instructions with utmost precision: "
34
- "1. Query Output Format: "
35
- " - Always return results in well-formatted **Markdown tables**. "
36
- " - Ensure clarity and proper structure for easy readability. "
37
- "2. Sample Data: "
38
- " - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers. "
39
- "3. Categorical Data: "
40
- " - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first. "
41
- " - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses. "
42
- "4. Result Limiting and Counting: "
43
- " - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`. "
44
- " - If the count is greater than 10, limit the query to return only 10 results initially. "
45
- " - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results. "
46
- " - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped. "
47
- "5. Date Handling: "
48
- " - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date. "
49
- " - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..` "
50
- " - Do not compare date values without casting columns to date. "
51
- " - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples: "
52
- " SELECT NOW() + INTERVAL 5 DAY; "
53
- " SELECT NOW() - INTERVAL 3 HOUR; "
54
- " SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY; "
55
- " SELECT NOW() - INTERVAL 1 YEAR; "
56
- "6. Query Best Practices: "
57
- " - Query only necessary columns, not all. "
58
- " - Use only existing column names from correct tables. "
59
- " - Use database-specific syntax for date operations. "
60
- "7. Error Handling: "
61
- " - For errors, rewrite and retry the query. "
62
- " - For 'Unknown column' errors, check table fields using info_sql_database_tool. "
63
- "Adhere to these guidelines for all queries and responses. Ask for clarification if needed."
64
- )
37
+ query_sql_database_tool_description = dedent(f"""\
38
+ Input: A detailed SQL query.
39
+ Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
40
+ This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
41
+ Follow these instructions with utmost precision:
42
+ 1. Query Output Format:
43
+ - Always return results in well-formatted **Markdown tables**.
44
+ - Ensure clarity and proper structure for easy readability.
45
+ 2. Sample Data:
46
+ - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers.
47
+ 3. Categorical Data:
48
+ - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first.
49
+ - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses.
50
+ 4. Result Limiting and Counting:
51
+ - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`.
52
+ - If the count is greater than 10, limit the query to return only 10 results initially.
53
+ - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results.
54
+ - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
55
+ 5. Date Handling:
56
+ - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
57
+ - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
58
+ - Do not compare date values without casting columns to date.
59
+ - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
60
+ SELECT NOW() + INTERVAL 5 DAY;
61
+ SELECT NOW() - INTERVAL 3 HOUR;
62
+ SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY;
63
+ SELECT NOW() - INTERVAL 1 YEAR;
64
+ 6. Query Best Practices:
65
+ - Always send only one query at a time.
66
+ - Query only necessary columns, not all.
67
+ - Use only existing column names from correct tables.
68
+ - Use database-specific syntax for date operations.
69
+ 7. Error Handling:
70
+ - For errors, rewrite and retry the query.
71
+ - For 'Unknown column' errors, check table fields using info_sql_database_tool.
72
+ Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
73
+ """)
65
74
 
66
75
  query_sql_database_tool = QuerySQLDataBaseTool(
67
76
  name=f'sql_db_query{prefix}',
@@ -43,10 +43,17 @@ def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
43
43
  raise ValueError(f"Knowledge base not found: {kb_name}")
44
44
 
45
45
  kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
46
+ vector_store_config = {
47
+ 'kb_table': kb_table
48
+ }
49
+ is_sparse = tools_config.pop('is_sparse', None)
50
+ vector_size = tools_config.pop('vector_size', None)
51
+ if is_sparse is not None:
52
+ vector_store_config['is_sparse'] = is_sparse
53
+ if vector_size is not None:
54
+ vector_store_config['vector_size'] = vector_size
46
55
  kb_params = {
47
- 'vector_store_config': {
48
- 'kb_table': kb_table
49
- }
56
+ 'vector_store_config': vector_store_config
50
57
  }
51
58
 
52
59
  # Get embedding model from knowledge base table