MindsDB 25.1.2.1__py3-none-any.whl → 25.1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +244 -242
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +76 -67
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +5 -3
- mindsdb/api/executor/__init__.py +0 -1
- mindsdb/api/executor/command_executor.py +2 -1
- mindsdb/api/executor/data_types/answer.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
- mindsdb/api/executor/sql_query/__init__.py +1 -0
- mindsdb/api/executor/sql_query/result_set.py +36 -21
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
- mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
- mindsdb/api/executor/utilities/sql.py +2 -10
- mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
- mindsdb/api/http/namespaces/sql.py +3 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
- mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
- mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
- mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +48 -16
- mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
- mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +36 -14
- mindsdb/integrations/utilities/rag/settings.py +8 -2
- mindsdb/integrations/utilities/sql_utils.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +3 -5
- mindsdb/interfaces/agents/langchain_agent.py +112 -150
- mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
- mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
- mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
- mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
- mindsdb/interfaces/chatbot/memory.py +58 -13
- mindsdb/interfaces/database/projects.py +17 -15
- mindsdb/interfaces/database/views.py +12 -25
- mindsdb/interfaces/knowledge_base/controller.py +39 -15
- mindsdb/interfaces/model/functions.py +15 -4
- mindsdb/interfaces/model/model_controller.py +4 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
- mindsdb/interfaces/skills/retrieval_tool.py +10 -3
- mindsdb/interfaces/skills/skill_tool.py +97 -53
- mindsdb/interfaces/skills/sql_agent.py +77 -36
- mindsdb/interfaces/storage/db.py +1 -1
- mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
- mindsdb/utilities/cache.py +7 -4
- mindsdb/utilities/context.py +11 -1
- mindsdb/utilities/langfuse.py +264 -0
- mindsdb/utilities/log.py +20 -2
- mindsdb/utilities/otel/__init__.py +206 -0
- mindsdb/utilities/otel/logger.py +25 -0
- mindsdb/utilities/otel/meter.py +19 -0
- mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
- mindsdb/utilities/otel/tracer.py +16 -0
- mindsdb/utilities/partitioning.py +52 -0
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- mindsdb/utilities/utils.py +34 -0
- mindsdb/utilities/otel.py +0 -72
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -53,15 +53,23 @@ class ChatBotTask(BaseTask):
|
|
|
53
53
|
|
|
54
54
|
chat_params = self.chat_handler.get_chat_config()
|
|
55
55
|
polling = chat_params['polling']['type']
|
|
56
|
+
|
|
57
|
+
memory = chat_params['memory']['type'] if 'memory' in chat_params else None
|
|
58
|
+
memory_cls = None
|
|
59
|
+
if memory:
|
|
60
|
+
memory_cls = DBMemory if memory == 'db' else HandlerMemory
|
|
61
|
+
|
|
56
62
|
if polling == 'message_count':
|
|
57
63
|
chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
|
|
58
64
|
self.chat_pooling = MessageCountPolling(self, chat_params)
|
|
59
|
-
|
|
65
|
+
# The default type for message count polling is HandlerMemory if not specified.
|
|
66
|
+
self.memory = HandlerMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
|
|
60
67
|
|
|
61
68
|
elif polling == 'realtime':
|
|
62
69
|
chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params]
|
|
63
70
|
self.chat_pooling = RealtimePolling(self, chat_params)
|
|
64
|
-
|
|
71
|
+
# The default type for real-time polling is DBMemory if not specified.
|
|
72
|
+
self.memory = DBMemory(self, chat_params) if memory_cls is None else memory_cls(self, chat_params)
|
|
65
73
|
|
|
66
74
|
elif polling == 'webhook':
|
|
67
75
|
self.chat_pooling = WebhookPolling(self, chat_params)
|
|
@@ -80,11 +88,11 @@ class ChatBotTask(BaseTask):
|
|
|
80
88
|
self.chat_pooling.run(stop_event)
|
|
81
89
|
|
|
82
90
|
def on_message(self, message: ChatBotMessage, chat_id=None, chat_memory=None, table_name=None):
|
|
83
|
-
if not chat_id and chat_memory:
|
|
91
|
+
if not chat_id and not chat_memory:
|
|
84
92
|
raise Exception('chat_id or chat_memory should be provided')
|
|
85
93
|
|
|
86
94
|
try:
|
|
87
|
-
self._on_holding_message(chat_id, table_name)
|
|
95
|
+
self._on_holding_message(chat_id, chat_memory, table_name)
|
|
88
96
|
self._on_message(message, chat_id, chat_memory, table_name)
|
|
89
97
|
except (SystemExit, KeyboardInterrupt):
|
|
90
98
|
raise
|
|
@@ -93,15 +101,18 @@ class ChatBotTask(BaseTask):
|
|
|
93
101
|
logger.error(error)
|
|
94
102
|
self.set_error(str(error))
|
|
95
103
|
|
|
96
|
-
def _on_holding_message(self, chat_id: str, table_name: str = None):
|
|
104
|
+
def _on_holding_message(self, chat_id: str = None, chat_memory: BaseMemory = None, table_name: str = None):
|
|
97
105
|
"""
|
|
98
106
|
Send a message to hold the user's attention while the bot is processing the request.
|
|
99
107
|
This message will not be saved in the chat memory.
|
|
100
108
|
|
|
101
109
|
Args:
|
|
102
110
|
chat_id (str): The ID of the chat.
|
|
111
|
+
chat_memory (BaseMemory): The memory of the chat.
|
|
103
112
|
table_name (str): The name of the table.
|
|
104
113
|
"""
|
|
114
|
+
chat_id = chat_id if chat_id else chat_memory.chat_id
|
|
115
|
+
|
|
105
116
|
response_message = ChatBotMessage(
|
|
106
117
|
ChatBotMessage.Type.DIRECT,
|
|
107
118
|
HOLDING_MESSAGE,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
2
4
|
from mindsdb_sql_parser.ast import Identifier, Select, BinaryOperation, Constant, OrderBy
|
|
3
5
|
|
|
4
6
|
from mindsdb.interfaces.storage import db
|
|
5
|
-
|
|
6
|
-
|
|
7
7
|
from .types import ChatBotMessage
|
|
8
8
|
|
|
9
9
|
|
|
@@ -60,7 +60,7 @@ class BaseMemory:
|
|
|
60
60
|
|
|
61
61
|
# If the chat_id is a tuple, convert it to a string when storing the message in the database.
|
|
62
62
|
self._add_to_history(
|
|
63
|
-
|
|
63
|
+
chat_id,
|
|
64
64
|
chat_message,
|
|
65
65
|
table_name=table_name
|
|
66
66
|
)
|
|
@@ -74,7 +74,7 @@ class BaseMemory:
|
|
|
74
74
|
|
|
75
75
|
else:
|
|
76
76
|
history = self._get_chat_history(
|
|
77
|
-
|
|
77
|
+
chat_id,
|
|
78
78
|
table_name
|
|
79
79
|
)
|
|
80
80
|
self._cache[key] = history
|
|
@@ -108,18 +108,44 @@ class HandlerMemory(BaseMemory):
|
|
|
108
108
|
time_col = t_params['time_col']
|
|
109
109
|
chat_id_cols = t_params['chat_id_col'] if isinstance(t_params['chat_id_col'], list) else [t_params['chat_id_col']]
|
|
110
110
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
from_table=Identifier(t_params['name']),
|
|
116
|
-
where=[BinaryOperation(
|
|
111
|
+
chat_id = chat_id if isinstance(chat_id, tuple) else (chat_id,)
|
|
112
|
+
# Add a WHERE clause for each chat_id column.
|
|
113
|
+
where_conditions = [
|
|
114
|
+
BinaryOperation(
|
|
117
115
|
op='=',
|
|
118
116
|
args=[
|
|
119
117
|
Identifier(chat_id_col),
|
|
120
118
|
Constant(chat_id[idx])
|
|
121
119
|
]
|
|
122
|
-
) for idx, chat_id_col in enumerate(chat_id_cols)
|
|
120
|
+
) for idx, chat_id_col in enumerate(chat_id_cols)
|
|
121
|
+
]
|
|
122
|
+
# Add a WHERE clause to ignore holding messages from the bot.
|
|
123
|
+
from .chatbot_task import HOLDING_MESSAGE
|
|
124
|
+
|
|
125
|
+
where_conditions.append(
|
|
126
|
+
BinaryOperation(
|
|
127
|
+
op='!=',
|
|
128
|
+
args=[
|
|
129
|
+
Identifier(text_col),
|
|
130
|
+
Constant(HOLDING_MESSAGE)
|
|
131
|
+
]
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Convert the WHERE conditions to a BinaryOperation object.
|
|
136
|
+
where_conditions_binary_operation = None
|
|
137
|
+
for condition in where_conditions:
|
|
138
|
+
if where_conditions_binary_operation is None:
|
|
139
|
+
where_conditions_binary_operation = condition
|
|
140
|
+
else:
|
|
141
|
+
where_conditions_binary_operation = BinaryOperation('and', args=[where_conditions_binary_operation, condition])
|
|
142
|
+
|
|
143
|
+
ast_query = Select(
|
|
144
|
+
targets=[Identifier(text_col),
|
|
145
|
+
Identifier(username_col),
|
|
146
|
+
Identifier(time_col)],
|
|
147
|
+
from_table=Identifier(t_params['name']),
|
|
148
|
+
where=where_conditions_binary_operation,
|
|
123
149
|
order_by=[OrderBy(Identifier(time_col))],
|
|
124
150
|
limit=Constant(self.MAX_DEPTH),
|
|
125
151
|
)
|
|
@@ -151,9 +177,28 @@ class DBMemory(BaseMemory):
|
|
|
151
177
|
uses mindsdb database to store messages
|
|
152
178
|
'''
|
|
153
179
|
|
|
180
|
+
def _generate_chat_id_for_db(self, chat_id: Union[str, tuple], table_name: str = None) -> str:
|
|
181
|
+
"""
|
|
182
|
+
Generate an ID for the chat to store in the database.
|
|
183
|
+
The ID is a string that includes the components of the chat ID and the table name (if provided) separated by underscores.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
chat_id (str | tuple): The ID of the chat.
|
|
187
|
+
table_name (str): The name of the table the chat belongs to.
|
|
188
|
+
"""
|
|
189
|
+
if isinstance(chat_id, tuple):
|
|
190
|
+
char_id_str = "_".join(str(val) for val in chat_id)
|
|
191
|
+
else:
|
|
192
|
+
char_id_str = str(chat_id)
|
|
193
|
+
|
|
194
|
+
if table_name:
|
|
195
|
+
chat_id_str = f"{table_name}_{char_id_str}"
|
|
196
|
+
|
|
197
|
+
return chat_id_str
|
|
198
|
+
|
|
154
199
|
def _add_to_history(self, chat_id, message, table_name=None):
|
|
155
200
|
chat_bot_id = self.chat_task.bot_id
|
|
156
|
-
destination =
|
|
201
|
+
destination = self._generate_chat_id_for_db(chat_id, table_name)
|
|
157
202
|
|
|
158
203
|
message = db.ChatBotsHistory(
|
|
159
204
|
chat_bot_id=chat_bot_id,
|
|
@@ -167,7 +212,7 @@ class DBMemory(BaseMemory):
|
|
|
167
212
|
|
|
168
213
|
def _get_chat_history(self, chat_id, table_name=None):
|
|
169
214
|
chat_bot_id = self.chat_task.bot_id
|
|
170
|
-
destination =
|
|
215
|
+
destination = self._generate_chat_id_for_db(chat_id, table_name)
|
|
171
216
|
|
|
172
217
|
query = db.ChatBotsHistory.query\
|
|
173
218
|
.filter(
|
|
@@ -24,19 +24,14 @@ class Project:
|
|
|
24
24
|
p = Project()
|
|
25
25
|
p.record = db_record
|
|
26
26
|
p.name = db_record.name
|
|
27
|
-
p.company_id =
|
|
27
|
+
p.company_id = ctx.company_id
|
|
28
28
|
p.id = db_record.id
|
|
29
29
|
return p
|
|
30
30
|
|
|
31
31
|
def create(self, name: str):
|
|
32
32
|
name = name.lower()
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
& (db.Project.company_id == ctx.company_id)
|
|
36
|
-
& (db.Project.deleted_at == sa.null())
|
|
37
|
-
).first()
|
|
38
|
-
if existing_record is not None:
|
|
39
|
-
raise EntityExistsError('Project already exists', name)
|
|
33
|
+
|
|
34
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
40
35
|
|
|
41
36
|
existing_record = db.Integration.query.filter(
|
|
42
37
|
sa.func.lower(db.Integration.name) == name,
|
|
@@ -45,23 +40,28 @@ class Project:
|
|
|
45
40
|
if existing_record is not None:
|
|
46
41
|
raise EntityExistsError('Database exists with this name ', name)
|
|
47
42
|
|
|
43
|
+
existing_record = db.Project.query.filter(
|
|
44
|
+
(sa.func.lower(db.Project.name) == name)
|
|
45
|
+
& (db.Project.company_id == company_id)
|
|
46
|
+
& (db.Project.deleted_at == sa.null())
|
|
47
|
+
).first()
|
|
48
|
+
if existing_record is not None:
|
|
49
|
+
raise EntityExistsError('Project already exists', name)
|
|
50
|
+
|
|
48
51
|
record = db.Project(
|
|
49
52
|
name=name,
|
|
50
|
-
company_id=
|
|
53
|
+
company_id=company_id
|
|
51
54
|
)
|
|
52
55
|
|
|
53
56
|
self.record = record
|
|
54
57
|
self.name = name
|
|
55
|
-
self.company_id =
|
|
58
|
+
self.company_id = company_id
|
|
56
59
|
|
|
57
60
|
db.session.add(record)
|
|
58
61
|
db.session.commit()
|
|
59
62
|
|
|
60
63
|
self.id = record.id
|
|
61
64
|
|
|
62
|
-
def save(self):
|
|
63
|
-
db.session.commit()
|
|
64
|
-
|
|
65
65
|
def delete(self):
|
|
66
66
|
tables = self.get_tables()
|
|
67
67
|
tables = [key for key, val in tables.items() if val['type'] != 'table']
|
|
@@ -360,8 +360,9 @@ class ProjectController:
|
|
|
360
360
|
pass
|
|
361
361
|
|
|
362
362
|
def get_list(self) -> List[Project]:
|
|
363
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
363
364
|
records = db.Project.query.filter(
|
|
364
|
-
(db.Project.company_id ==
|
|
365
|
+
(db.Project.company_id == company_id)
|
|
365
366
|
& (db.Project.deleted_at == sa.null())
|
|
366
367
|
).order_by(db.Project.name)
|
|
367
368
|
|
|
@@ -371,7 +372,8 @@ class ProjectController:
|
|
|
371
372
|
if id is not None and name is not None:
|
|
372
373
|
raise ValueError("Both 'id' and 'name' is None")
|
|
373
374
|
|
|
374
|
-
|
|
375
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
376
|
+
q = db.Project.query.filter_by(company_id=company_id)
|
|
375
377
|
|
|
376
378
|
if id is not None:
|
|
377
379
|
q = q.filter_by(id=id)
|
|
@@ -3,6 +3,7 @@ from mindsdb.interfaces.storage import db
|
|
|
3
3
|
from mindsdb.interfaces.query_context.context_controller import query_context_controller
|
|
4
4
|
from mindsdb.utilities.context import context as ctx
|
|
5
5
|
from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
|
|
6
|
+
from mindsdb.interfaces.model.functions import get_project_record, get_project_records
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class ViewController:
|
|
@@ -39,11 +40,8 @@ class ViewController:
|
|
|
39
40
|
|
|
40
41
|
def update(self, name, query, project_name):
|
|
41
42
|
name = name.lower()
|
|
42
|
-
project_record =
|
|
43
|
-
|
|
44
|
-
company_id=ctx.company_id,
|
|
45
|
-
deleted_at=None
|
|
46
|
-
).first()
|
|
43
|
+
project_record = get_project_record(project_name)
|
|
44
|
+
|
|
47
45
|
rec = db.session.query(db.View).filter(
|
|
48
46
|
func.lower(db.View.name) == name,
|
|
49
47
|
db.View.company_id == ctx.company_id,
|
|
@@ -56,11 +54,8 @@ class ViewController:
|
|
|
56
54
|
|
|
57
55
|
def delete(self, name, project_name):
|
|
58
56
|
name = name.lower()
|
|
59
|
-
project_record =
|
|
60
|
-
|
|
61
|
-
company_id=ctx.company_id,
|
|
62
|
-
deleted_at=None
|
|
63
|
-
).first()
|
|
57
|
+
project_record = get_project_record(project_name)
|
|
58
|
+
|
|
64
59
|
rec = db.session.query(db.View).filter(
|
|
65
60
|
func.lower(db.View.name) == name,
|
|
66
61
|
db.View.company_id == ctx.company_id,
|
|
@@ -74,17 +69,12 @@ class ViewController:
|
|
|
74
69
|
query_context_controller.drop_query_context('view', rec.id)
|
|
75
70
|
|
|
76
71
|
def list(self, project_name):
|
|
77
|
-
query = db.session.query(db.Project).filter_by(
|
|
78
|
-
company_id=ctx.company_id,
|
|
79
|
-
deleted_at=None
|
|
80
|
-
)
|
|
81
|
-
if project_name is not None:
|
|
82
|
-
query = query.filter_by(name=project_name)
|
|
83
72
|
|
|
84
|
-
project_names = {
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
73
|
+
project_names = {}
|
|
74
|
+
for project in get_project_records():
|
|
75
|
+
if project_name is not None and project.name != project_name:
|
|
76
|
+
continue
|
|
77
|
+
project_names[project.id] = project.name
|
|
88
78
|
|
|
89
79
|
query = db.session.query(db.View).filter(
|
|
90
80
|
db.View.company_id == ctx.company_id,
|
|
@@ -112,11 +102,8 @@ class ViewController:
|
|
|
112
102
|
}
|
|
113
103
|
|
|
114
104
|
def get(self, id=None, name=None, project_name=None):
|
|
115
|
-
project_record =
|
|
116
|
-
|
|
117
|
-
company_id=ctx.company_id,
|
|
118
|
-
deleted_at=None
|
|
119
|
-
).first()
|
|
105
|
+
project_record = get_project_record(project_name)
|
|
106
|
+
|
|
120
107
|
if id is not None:
|
|
121
108
|
records = db.session.query(db.View).filter_by(
|
|
122
109
|
id=id,
|
|
@@ -52,6 +52,7 @@ class KnowledgeBaseTable:
|
|
|
52
52
|
self.session = session
|
|
53
53
|
self.document_preprocessor = None
|
|
54
54
|
self.document_loader = None
|
|
55
|
+
self.model_params = None
|
|
55
56
|
|
|
56
57
|
def configure_preprocessing(self, config: Optional[dict] = None):
|
|
57
58
|
"""Configure preprocessing for the knowledge base table"""
|
|
@@ -488,6 +489,7 @@ class KnowledgeBaseTable:
|
|
|
488
489
|
df_out = project_datanode.predict(
|
|
489
490
|
model_name=model_rec.name,
|
|
490
491
|
df=df,
|
|
492
|
+
params=self.model_params
|
|
491
493
|
)
|
|
492
494
|
|
|
493
495
|
target = model_rec.to_predict[0]
|
|
@@ -642,11 +644,13 @@ class KnowledgeBaseController:
|
|
|
642
644
|
storage: Identifier,
|
|
643
645
|
params: dict,
|
|
644
646
|
preprocessing_config: Optional[dict] = None,
|
|
645
|
-
if_not_exists: bool = False
|
|
647
|
+
if_not_exists: bool = False
|
|
646
648
|
) -> db.KnowledgeBase:
|
|
647
649
|
"""
|
|
648
650
|
Add a new knowledge base to the database
|
|
649
651
|
:param preprocessing_config: Optional preprocessing configuration to validate and store
|
|
652
|
+
:param is_sparse: Whether to use sparse vectors for embeddings
|
|
653
|
+
:param vector_size: Optional size specification for vectors, required when is_sparse=True
|
|
650
654
|
"""
|
|
651
655
|
# Validate preprocessing config first if provided
|
|
652
656
|
if preprocessing_config is not None:
|
|
@@ -654,6 +658,12 @@ class KnowledgeBaseController:
|
|
|
654
658
|
params = params or {}
|
|
655
659
|
params['preprocessing'] = preprocessing_config
|
|
656
660
|
|
|
661
|
+
# Check if vector_size is provided when using sparse vectors
|
|
662
|
+
is_sparse = params.get('is_sparse')
|
|
663
|
+
vector_size = params.get('vector_size')
|
|
664
|
+
if is_sparse and vector_size is None:
|
|
665
|
+
raise ValueError("vector_size is required when is_sparse=True")
|
|
666
|
+
|
|
657
667
|
# get project id
|
|
658
668
|
project = self.session.database_controller.get_project(project_name)
|
|
659
669
|
project_id = project.id
|
|
@@ -693,7 +703,20 @@ class KnowledgeBaseController:
|
|
|
693
703
|
cloud_pg_vector = os.environ.get('KB_PGVECTOR_URL')
|
|
694
704
|
if cloud_pg_vector:
|
|
695
705
|
vector_table_name = name
|
|
696
|
-
|
|
706
|
+
# Add sparse vector support for pgvector
|
|
707
|
+
vector_db_params = {}
|
|
708
|
+
# Check both explicit parameter and model configuration
|
|
709
|
+
is_sparse = is_sparse or model_record.learn_args.get('using', {}).get('sparse')
|
|
710
|
+
if is_sparse:
|
|
711
|
+
vector_db_params['is_sparse'] = True
|
|
712
|
+
if vector_size is not None:
|
|
713
|
+
vector_db_params['vector_size'] = vector_size
|
|
714
|
+
vector_db_name = self._create_persistent_pgvector(vector_db_params)
|
|
715
|
+
|
|
716
|
+
# create table in vectordb before creating KB
|
|
717
|
+
self.session.datahub.get(vector_db_name).integration_handler.create_table(
|
|
718
|
+
vector_table_name
|
|
719
|
+
)
|
|
697
720
|
else:
|
|
698
721
|
# create chroma db with same name
|
|
699
722
|
vector_table_name = "default_collection"
|
|
@@ -707,15 +730,14 @@ class KnowledgeBaseController:
|
|
|
707
730
|
|
|
708
731
|
vector_database_id = self.session.integration_controller.get(vector_db_name)['id']
|
|
709
732
|
|
|
710
|
-
#
|
|
711
|
-
if
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
)
|
|
733
|
+
# Store sparse vector settings in params if specified
|
|
734
|
+
if is_sparse:
|
|
735
|
+
params = params or {}
|
|
736
|
+
params['vector_config'] = {
|
|
737
|
+
'is_sparse': is_sparse
|
|
738
|
+
}
|
|
739
|
+
if vector_size is not None:
|
|
740
|
+
params['vector_config']['vector_size'] = vector_size
|
|
719
741
|
|
|
720
742
|
kb = db.KnowledgeBase(
|
|
721
743
|
name=name,
|
|
@@ -729,16 +751,15 @@ class KnowledgeBaseController:
|
|
|
729
751
|
db.session.commit()
|
|
730
752
|
return kb
|
|
731
753
|
|
|
732
|
-
def _create_persistent_pgvector(self):
|
|
754
|
+
def _create_persistent_pgvector(self, params=None):
|
|
733
755
|
"""Create default vector database for knowledge base, if not specified"""
|
|
734
|
-
|
|
735
756
|
vector_store_name = "kb_pgvector_store"
|
|
736
757
|
|
|
737
758
|
# check if exists
|
|
738
759
|
if self.session.integration_controller.get(vector_store_name):
|
|
739
760
|
return vector_store_name
|
|
740
761
|
|
|
741
|
-
self.session.integration_controller.add(vector_store_name, 'pgvector', {})
|
|
762
|
+
self.session.integration_controller.add(vector_store_name, 'pgvector', params or {})
|
|
742
763
|
return vector_store_name
|
|
743
764
|
|
|
744
765
|
def _create_persistent_chroma(self, kb_name, engine="chromadb"):
|
|
@@ -840,16 +861,19 @@ class KnowledgeBaseController:
|
|
|
840
861
|
)
|
|
841
862
|
return kb
|
|
842
863
|
|
|
843
|
-
def get_table(self, name: str, project_id: int) -> KnowledgeBaseTable:
|
|
864
|
+
def get_table(self, name: str, project_id: int, params: dict = None) -> KnowledgeBaseTable:
|
|
844
865
|
"""
|
|
845
866
|
Returns kb table object with properly configured preprocessing
|
|
846
867
|
:param name: table name
|
|
847
868
|
:param project_id: project id
|
|
869
|
+
:param params: runtime parameters for KB. Keys: 'model' - parameters for embedding model
|
|
848
870
|
:return: kb table object
|
|
849
871
|
"""
|
|
850
872
|
kb = self.get(name, project_id)
|
|
851
873
|
if kb is not None:
|
|
852
874
|
table = KnowledgeBaseTable(kb, self.session)
|
|
875
|
+
if params:
|
|
876
|
+
table.model_params = params.get('model')
|
|
853
877
|
|
|
854
878
|
# Always configure preprocessing - either from params or default
|
|
855
879
|
if kb.params and 'preprocessing' in kb.params:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional, List
|
|
2
2
|
|
|
3
3
|
from sqlalchemy import null, func
|
|
4
4
|
|
|
@@ -41,9 +41,7 @@ def get_integration_record(name: str) -> db.Integration:
|
|
|
41
41
|
|
|
42
42
|
@profiler.profile()
|
|
43
43
|
def get_project_record(name: str) -> db.Project:
|
|
44
|
-
company_id = ctx.company_id
|
|
45
|
-
if company_id is None:
|
|
46
|
-
company_id = null()
|
|
44
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
47
45
|
|
|
48
46
|
project_record = (
|
|
49
47
|
db.session.query(db.Project)
|
|
@@ -56,6 +54,19 @@ def get_project_record(name: str) -> db.Project:
|
|
|
56
54
|
return project_record
|
|
57
55
|
|
|
58
56
|
|
|
57
|
+
@profiler.profile()
|
|
58
|
+
def get_project_records() -> List[db.Project]:
|
|
59
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
60
|
+
|
|
61
|
+
return (
|
|
62
|
+
db.session.query(db.Project)
|
|
63
|
+
.filter(
|
|
64
|
+
(db.Project.company_id == company_id)
|
|
65
|
+
& (db.Project.deleted_at == null())
|
|
66
|
+
).all()
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
59
70
|
@profiler.profile()
|
|
60
71
|
def get_predictor_integration(record: db.Predictor) -> db.Integration:
|
|
61
72
|
integration_record = (
|
|
@@ -7,14 +7,15 @@ from multiprocessing.pool import ThreadPool
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from dateutil.parser import parse as parse_datetime
|
|
9
9
|
|
|
10
|
-
from sqlalchemy import func
|
|
10
|
+
from sqlalchemy import func
|
|
11
11
|
import numpy as np
|
|
12
12
|
|
|
13
13
|
import mindsdb.interfaces.storage.db as db
|
|
14
14
|
from mindsdb.utilities.config import Config
|
|
15
15
|
from mindsdb.interfaces.model.functions import (
|
|
16
16
|
get_model_record,
|
|
17
|
-
get_model_records
|
|
17
|
+
get_model_records,
|
|
18
|
+
get_project_record
|
|
18
19
|
)
|
|
19
20
|
from mindsdb.interfaces.storage.json import get_json_storage
|
|
20
21
|
from mindsdb.interfaces.storage.model_fs import ModelStorage
|
|
@@ -151,11 +152,7 @@ class ModelController():
|
|
|
151
152
|
def delete_model(self, model_name: str, project_name: str = 'mindsdb', version=None):
|
|
152
153
|
from mindsdb.interfaces.database.database import DatabaseController
|
|
153
154
|
|
|
154
|
-
project_record =
|
|
155
|
-
(func.lower(db.Project.name) == func.lower(project_name))
|
|
156
|
-
& (db.Project.company_id == ctx.company_id)
|
|
157
|
-
& (db.Project.deleted_at == null())
|
|
158
|
-
).first()
|
|
155
|
+
project_record = get_project_record(func.lower(project_name))
|
|
159
156
|
if project_record is None:
|
|
160
157
|
raise Exception(f"Project '{project_name}' does not exists")
|
|
161
158
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from typing import List
|
|
2
|
+
from textwrap import dedent
|
|
2
3
|
|
|
3
4
|
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
|
|
4
5
|
from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool
|
|
@@ -11,7 +12,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
11
12
|
|
|
12
13
|
def get_tools(self, prefix='') -> List[BaseTool]:
|
|
13
14
|
"""Get the tools in the toolkit."""
|
|
14
|
-
list_sql_database_tool = ListSQLDatabaseTool(
|
|
15
|
+
list_sql_database_tool = ListSQLDatabaseTool(
|
|
16
|
+
name=f'sql_db_list_tables{prefix}',
|
|
17
|
+
db=self.db,
|
|
18
|
+
description=(
|
|
19
|
+
"Input is an empty string, output is a comma-separated list of tables in the database. "
|
|
20
|
+
"Each table name in the list may be in one of two formats: database_name.table_name or "
|
|
21
|
+
"database_name.schema_name.table_name."
|
|
22
|
+
)
|
|
23
|
+
)
|
|
15
24
|
|
|
16
25
|
info_sql_database_tool_description = (
|
|
17
26
|
"Input: A comma-separated list of tables. Output: Schema and sample rows for those tables. "
|
|
@@ -25,43 +34,43 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
25
34
|
db=self.db, description=info_sql_database_tool_description
|
|
26
35
|
)
|
|
27
36
|
|
|
28
|
-
query_sql_database_tool_description = (
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
)
|
|
37
|
+
query_sql_database_tool_description = dedent(f"""\
|
|
38
|
+
Input: A detailed SQL query.
|
|
39
|
+
Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
|
|
40
|
+
This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
|
|
41
|
+
Follow these instructions with utmost precision:
|
|
42
|
+
1. Query Output Format:
|
|
43
|
+
- Always return results in well-formatted **Markdown tables**.
|
|
44
|
+
- Ensure clarity and proper structure for easy readability.
|
|
45
|
+
2. Sample Data:
|
|
46
|
+
- Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers.
|
|
47
|
+
3. Categorical Data:
|
|
48
|
+
- Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first.
|
|
49
|
+
- Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses.
|
|
50
|
+
4. Result Limiting and Counting:
|
|
51
|
+
- Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`.
|
|
52
|
+
- If the count is greater than 10, limit the query to return only 10 results initially.
|
|
53
|
+
- **Always** inform the user of the total number of results available and specify that you are providing the first 10 results.
|
|
54
|
+
- Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
|
|
55
|
+
5. Date Handling:
|
|
56
|
+
- **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
|
|
57
|
+
- For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
|
|
58
|
+
- Do not compare date values without casting columns to date.
|
|
59
|
+
- For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
|
|
60
|
+
SELECT NOW() + INTERVAL 5 DAY;
|
|
61
|
+
SELECT NOW() - INTERVAL 3 HOUR;
|
|
62
|
+
SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY;
|
|
63
|
+
SELECT NOW() - INTERVAL 1 YEAR;
|
|
64
|
+
6. Query Best Practices:
|
|
65
|
+
- Always send only one query at a time.
|
|
66
|
+
- Query only necessary columns, not all.
|
|
67
|
+
- Use only existing column names from correct tables.
|
|
68
|
+
- Use database-specific syntax for date operations.
|
|
69
|
+
7. Error Handling:
|
|
70
|
+
- For errors, rewrite and retry the query.
|
|
71
|
+
- For 'Unknown column' errors, check table fields using info_sql_database_tool.
|
|
72
|
+
Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
|
|
73
|
+
""")
|
|
65
74
|
|
|
66
75
|
query_sql_database_tool = QuerySQLDataBaseTool(
|
|
67
76
|
name=f'sql_db_query{prefix}',
|
|
@@ -43,10 +43,17 @@ def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
|
43
43
|
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
44
44
|
|
|
45
45
|
kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
|
|
46
|
+
vector_store_config = {
|
|
47
|
+
'kb_table': kb_table
|
|
48
|
+
}
|
|
49
|
+
is_sparse = tools_config.pop('is_sparse', None)
|
|
50
|
+
vector_size = tools_config.pop('vector_size', None)
|
|
51
|
+
if is_sparse is not None:
|
|
52
|
+
vector_store_config['is_sparse'] = is_sparse
|
|
53
|
+
if vector_size is not None:
|
|
54
|
+
vector_store_config['vector_size'] = vector_size
|
|
46
55
|
kb_params = {
|
|
47
|
-
'vector_store_config':
|
|
48
|
-
'kb_table': kb_table
|
|
49
|
-
}
|
|
56
|
+
'vector_store_config': vector_store_config
|
|
50
57
|
}
|
|
51
58
|
|
|
52
59
|
# Get embedding model from knowledge base table
|