MindsDB 25.1.2.1__py3-none-any.whl → 25.1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/METADATA +246 -255
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/RECORD +94 -83
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +5 -3
- mindsdb/api/executor/__init__.py +0 -1
- mindsdb/api/executor/command_executor.py +2 -1
- mindsdb/api/executor/data_types/answer.py +1 -1
- mindsdb/api/executor/datahub/datanodes/datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +8 -3
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +9 -26
- mindsdb/api/executor/sql_query/__init__.py +1 -0
- mindsdb/api/executor/sql_query/result_set.py +36 -21
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
- mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
- mindsdb/api/executor/utilities/sql.py +2 -10
- mindsdb/api/http/namespaces/agents.py +3 -1
- mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
- mindsdb/api/http/namespaces/sql.py +3 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/databricks_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
- mindsdb/integrations/handlers/file_handler/requirements.txt +0 -4
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
- mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
- mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
- mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +8 -0
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +50 -16
- mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
- mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
- mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +5 -3
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
- mindsdb/integrations/handlers/web_handler/requirements.txt +0 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +2 -2
- mindsdb/integrations/utilities/files/__init__.py +0 -0
- mindsdb/integrations/utilities/files/file_reader.py +258 -0
- mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +2 -1
- mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py +8 -3
- mindsdb/integrations/utilities/rag/chains/map_reduce_summarizer_chain.py +5 -9
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +74 -21
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +108 -78
- mindsdb/integrations/utilities/rag/settings.py +37 -16
- mindsdb/integrations/utilities/sql_utils.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +18 -8
- mindsdb/interfaces/agents/constants.py +1 -0
- mindsdb/interfaces/agents/langchain_agent.py +124 -157
- mindsdb/interfaces/agents/langfuse_callback_handler.py +4 -37
- mindsdb/interfaces/agents/mindsdb_database_agent.py +21 -13
- mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
- mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
- mindsdb/interfaces/chatbot/memory.py +58 -13
- mindsdb/interfaces/database/integrations.py +5 -1
- mindsdb/interfaces/database/projects.py +55 -16
- mindsdb/interfaces/database/views.py +12 -25
- mindsdb/interfaces/knowledge_base/controller.py +39 -15
- mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +7 -26
- mindsdb/interfaces/model/functions.py +15 -4
- mindsdb/interfaces/model/model_controller.py +4 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +51 -40
- mindsdb/interfaces/skills/retrieval_tool.py +10 -3
- mindsdb/interfaces/skills/skill_tool.py +97 -54
- mindsdb/interfaces/skills/skills_controller.py +7 -3
- mindsdb/interfaces/skills/sql_agent.py +127 -41
- mindsdb/interfaces/storage/db.py +1 -1
- mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
- mindsdb/utilities/cache.py +7 -4
- mindsdb/utilities/context.py +11 -1
- mindsdb/utilities/langfuse.py +279 -0
- mindsdb/utilities/log.py +20 -2
- mindsdb/utilities/otel/__init__.py +206 -0
- mindsdb/utilities/otel/logger.py +25 -0
- mindsdb/utilities/otel/meter.py +19 -0
- mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
- mindsdb/utilities/otel/tracer.py +16 -0
- mindsdb/utilities/partitioning.py +52 -0
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- mindsdb/utilities/utils.py +34 -0
- mindsdb/utilities/otel.py +0 -72
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
2
4
|
from mindsdb_sql_parser.ast import Identifier, Select, BinaryOperation, Constant, OrderBy
|
|
3
5
|
|
|
4
6
|
from mindsdb.interfaces.storage import db
|
|
5
|
-
|
|
6
|
-
|
|
7
7
|
from .types import ChatBotMessage
|
|
8
8
|
|
|
9
9
|
|
|
@@ -60,7 +60,7 @@ class BaseMemory:
|
|
|
60
60
|
|
|
61
61
|
# If the chat_id is a tuple, convert it to a string when storing the message in the database.
|
|
62
62
|
self._add_to_history(
|
|
63
|
-
|
|
63
|
+
chat_id,
|
|
64
64
|
chat_message,
|
|
65
65
|
table_name=table_name
|
|
66
66
|
)
|
|
@@ -74,7 +74,7 @@ class BaseMemory:
|
|
|
74
74
|
|
|
75
75
|
else:
|
|
76
76
|
history = self._get_chat_history(
|
|
77
|
-
|
|
77
|
+
chat_id,
|
|
78
78
|
table_name
|
|
79
79
|
)
|
|
80
80
|
self._cache[key] = history
|
|
@@ -108,18 +108,44 @@ class HandlerMemory(BaseMemory):
|
|
|
108
108
|
time_col = t_params['time_col']
|
|
109
109
|
chat_id_cols = t_params['chat_id_col'] if isinstance(t_params['chat_id_col'], list) else [t_params['chat_id_col']]
|
|
110
110
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
from_table=Identifier(t_params['name']),
|
|
116
|
-
where=[BinaryOperation(
|
|
111
|
+
chat_id = chat_id if isinstance(chat_id, tuple) else (chat_id,)
|
|
112
|
+
# Add a WHERE clause for each chat_id column.
|
|
113
|
+
where_conditions = [
|
|
114
|
+
BinaryOperation(
|
|
117
115
|
op='=',
|
|
118
116
|
args=[
|
|
119
117
|
Identifier(chat_id_col),
|
|
120
118
|
Constant(chat_id[idx])
|
|
121
119
|
]
|
|
122
|
-
) for idx, chat_id_col in enumerate(chat_id_cols)
|
|
120
|
+
) for idx, chat_id_col in enumerate(chat_id_cols)
|
|
121
|
+
]
|
|
122
|
+
# Add a WHERE clause to ignore holding messages from the bot.
|
|
123
|
+
from .chatbot_task import HOLDING_MESSAGE
|
|
124
|
+
|
|
125
|
+
where_conditions.append(
|
|
126
|
+
BinaryOperation(
|
|
127
|
+
op='!=',
|
|
128
|
+
args=[
|
|
129
|
+
Identifier(text_col),
|
|
130
|
+
Constant(HOLDING_MESSAGE)
|
|
131
|
+
]
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Convert the WHERE conditions to a BinaryOperation object.
|
|
136
|
+
where_conditions_binary_operation = None
|
|
137
|
+
for condition in where_conditions:
|
|
138
|
+
if where_conditions_binary_operation is None:
|
|
139
|
+
where_conditions_binary_operation = condition
|
|
140
|
+
else:
|
|
141
|
+
where_conditions_binary_operation = BinaryOperation('and', args=[where_conditions_binary_operation, condition])
|
|
142
|
+
|
|
143
|
+
ast_query = Select(
|
|
144
|
+
targets=[Identifier(text_col),
|
|
145
|
+
Identifier(username_col),
|
|
146
|
+
Identifier(time_col)],
|
|
147
|
+
from_table=Identifier(t_params['name']),
|
|
148
|
+
where=where_conditions_binary_operation,
|
|
123
149
|
order_by=[OrderBy(Identifier(time_col))],
|
|
124
150
|
limit=Constant(self.MAX_DEPTH),
|
|
125
151
|
)
|
|
@@ -151,9 +177,28 @@ class DBMemory(BaseMemory):
|
|
|
151
177
|
uses mindsdb database to store messages
|
|
152
178
|
'''
|
|
153
179
|
|
|
180
|
+
def _generate_chat_id_for_db(self, chat_id: Union[str, tuple], table_name: str = None) -> str:
|
|
181
|
+
"""
|
|
182
|
+
Generate an ID for the chat to store in the database.
|
|
183
|
+
The ID is a string that includes the components of the chat ID and the table name (if provided) separated by underscores.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
chat_id (str | tuple): The ID of the chat.
|
|
187
|
+
table_name (str): The name of the table the chat belongs to.
|
|
188
|
+
"""
|
|
189
|
+
if isinstance(chat_id, tuple):
|
|
190
|
+
char_id_str = "_".join(str(val) for val in chat_id)
|
|
191
|
+
else:
|
|
192
|
+
char_id_str = str(chat_id)
|
|
193
|
+
|
|
194
|
+
if table_name:
|
|
195
|
+
chat_id_str = f"{table_name}_{char_id_str}"
|
|
196
|
+
|
|
197
|
+
return chat_id_str
|
|
198
|
+
|
|
154
199
|
def _add_to_history(self, chat_id, message, table_name=None):
|
|
155
200
|
chat_bot_id = self.chat_task.bot_id
|
|
156
|
-
destination =
|
|
201
|
+
destination = self._generate_chat_id_for_db(chat_id, table_name)
|
|
157
202
|
|
|
158
203
|
message = db.ChatBotsHistory(
|
|
159
204
|
chat_bot_id=chat_bot_id,
|
|
@@ -167,7 +212,7 @@ class DBMemory(BaseMemory):
|
|
|
167
212
|
|
|
168
213
|
def _get_chat_history(self, chat_id, table_name=None):
|
|
169
214
|
chat_bot_id = self.chat_task.bot_id
|
|
170
|
-
destination =
|
|
215
|
+
destination = self._generate_chat_id_for_db(chat_id, table_name)
|
|
171
216
|
|
|
172
217
|
query = db.ChatBotsHistory.query\
|
|
173
218
|
.filter(
|
|
@@ -215,6 +215,8 @@ class IntegrationController:
|
|
|
215
215
|
def modify(self, name, data):
|
|
216
216
|
self.handlers_cache.delete(name)
|
|
217
217
|
integration_record = self._get_integration_record(name)
|
|
218
|
+
if isinstance(integration_record.data, dict) and integration_record.data.get('is_demo') is True:
|
|
219
|
+
raise ValueError("It is forbidden to change properties of the demo object")
|
|
218
220
|
old_data = deepcopy(integration_record.data)
|
|
219
221
|
for k in old_data:
|
|
220
222
|
if k not in data:
|
|
@@ -234,9 +236,11 @@ class IntegrationController:
|
|
|
234
236
|
handler = self.handler_modules[name]
|
|
235
237
|
|
|
236
238
|
if getattr(handler, 'permanent', False) is True:
|
|
237
|
-
raise Exception('Unable to drop
|
|
239
|
+
raise Exception('Unable to drop permanent integration')
|
|
238
240
|
|
|
239
241
|
integration_record = self._get_integration_record(name)
|
|
242
|
+
if isinstance(integration_record.data, dict) and integration_record.data.get('is_demo') is True:
|
|
243
|
+
raise Exception('Unable to drop demo object')
|
|
240
244
|
|
|
241
245
|
# if this is ml engine
|
|
242
246
|
engine_models = get_model_records(ml_handler_name=name, deleted_at=None)
|
|
@@ -7,6 +7,7 @@ import sqlalchemy as sa
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
|
|
9
9
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
10
|
+
from mindsdb_sql_parser.ast import Select, Star, Constant, Identifier
|
|
10
11
|
from mindsdb_sql_parser import parse_sql
|
|
11
12
|
|
|
12
13
|
from mindsdb.interfaces.storage import db
|
|
@@ -16,6 +17,9 @@ from mindsdb.interfaces.database.views import ViewController
|
|
|
16
17
|
from mindsdb.utilities.context import context as ctx
|
|
17
18
|
from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
|
|
18
19
|
import mindsdb.utilities.profiler as profiler
|
|
20
|
+
from mindsdb.api.executor.sql_query import SQLQuery
|
|
21
|
+
from mindsdb.api.executor.utilities.sql import query_df
|
|
22
|
+
from mindsdb.interfaces.query_context.context_controller import query_context_controller
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
class Project:
|
|
@@ -24,19 +28,14 @@ class Project:
|
|
|
24
28
|
p = Project()
|
|
25
29
|
p.record = db_record
|
|
26
30
|
p.name = db_record.name
|
|
27
|
-
p.company_id =
|
|
31
|
+
p.company_id = ctx.company_id
|
|
28
32
|
p.id = db_record.id
|
|
29
33
|
return p
|
|
30
34
|
|
|
31
35
|
def create(self, name: str):
|
|
32
36
|
name = name.lower()
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
& (db.Project.company_id == ctx.company_id)
|
|
36
|
-
& (db.Project.deleted_at == sa.null())
|
|
37
|
-
).first()
|
|
38
|
-
if existing_record is not None:
|
|
39
|
-
raise EntityExistsError('Project already exists', name)
|
|
37
|
+
|
|
38
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
40
39
|
|
|
41
40
|
existing_record = db.Integration.query.filter(
|
|
42
41
|
sa.func.lower(db.Integration.name) == name,
|
|
@@ -45,23 +44,28 @@ class Project:
|
|
|
45
44
|
if existing_record is not None:
|
|
46
45
|
raise EntityExistsError('Database exists with this name ', name)
|
|
47
46
|
|
|
47
|
+
existing_record = db.Project.query.filter(
|
|
48
|
+
(sa.func.lower(db.Project.name) == name)
|
|
49
|
+
& (db.Project.company_id == company_id)
|
|
50
|
+
& (db.Project.deleted_at == sa.null())
|
|
51
|
+
).first()
|
|
52
|
+
if existing_record is not None:
|
|
53
|
+
raise EntityExistsError('Project already exists', name)
|
|
54
|
+
|
|
48
55
|
record = db.Project(
|
|
49
56
|
name=name,
|
|
50
|
-
company_id=
|
|
57
|
+
company_id=company_id
|
|
51
58
|
)
|
|
52
59
|
|
|
53
60
|
self.record = record
|
|
54
61
|
self.name = name
|
|
55
|
-
self.company_id =
|
|
62
|
+
self.company_id = company_id
|
|
56
63
|
|
|
57
64
|
db.session.add(record)
|
|
58
65
|
db.session.commit()
|
|
59
66
|
|
|
60
67
|
self.id = record.id
|
|
61
68
|
|
|
62
|
-
def save(self):
|
|
63
|
-
db.session.commit()
|
|
64
|
-
|
|
65
69
|
def delete(self):
|
|
66
70
|
tables = self.get_tables()
|
|
67
71
|
tables = [key for key, val in tables.items() if val['type'] != 'table']
|
|
@@ -111,7 +115,7 @@ class Project:
|
|
|
111
115
|
project_name=self.name
|
|
112
116
|
)
|
|
113
117
|
|
|
114
|
-
def
|
|
118
|
+
def get_view_meta(self, query: ASTNode) -> ASTNode:
|
|
115
119
|
view_name = query.from_table.parts[-1]
|
|
116
120
|
view_meta = ViewController().get(
|
|
117
121
|
name=view_name,
|
|
@@ -120,6 +124,30 @@ class Project:
|
|
|
120
124
|
view_meta['query_ast'] = parse_sql(view_meta['query'])
|
|
121
125
|
return view_meta
|
|
122
126
|
|
|
127
|
+
def query_view(self, query, session):
|
|
128
|
+
|
|
129
|
+
view_meta = self.get_view_meta(query)
|
|
130
|
+
|
|
131
|
+
query_context_controller.set_context('view', view_meta['id'])
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
sqlquery = SQLQuery(
|
|
135
|
+
view_meta['query_ast'],
|
|
136
|
+
session=session
|
|
137
|
+
)
|
|
138
|
+
result = sqlquery.fetch(view='dataframe')
|
|
139
|
+
|
|
140
|
+
finally:
|
|
141
|
+
query_context_controller.release_context('view', view_meta['id'])
|
|
142
|
+
|
|
143
|
+
if result['success'] is False:
|
|
144
|
+
raise Exception(f"Cant execute view query: {view_meta['query_ast']}")
|
|
145
|
+
df = result['result']
|
|
146
|
+
# remove duplicated columns
|
|
147
|
+
df = df.loc[:, ~df.columns.duplicated()]
|
|
148
|
+
|
|
149
|
+
return query_df(df, query, session=session)
|
|
150
|
+
|
|
123
151
|
@staticmethod
|
|
124
152
|
def _get_model_data(predictor_record, integraion_record, with_secrets: bool = True):
|
|
125
153
|
from mindsdb.interfaces.database.integrations import integration_controller
|
|
@@ -341,6 +369,15 @@ class Project:
|
|
|
341
369
|
columns = predictor_record.to_predict
|
|
342
370
|
if not isinstance(columns, list):
|
|
343
371
|
columns = [columns]
|
|
372
|
+
return columns
|
|
373
|
+
if self.get_view(table_name):
|
|
374
|
+
query = Select(targets=[Star()], from_table=Identifier(table_name), limit=Constant(1))
|
|
375
|
+
|
|
376
|
+
from mindsdb.api.executor.controllers.session_controller import SessionController
|
|
377
|
+
session = SessionController()
|
|
378
|
+
session.database = self.name
|
|
379
|
+
df = self.query_view(query, session)
|
|
380
|
+
return df.columns
|
|
344
381
|
else:
|
|
345
382
|
# is it agent?
|
|
346
383
|
agent = db.Agents.query.filter_by(
|
|
@@ -360,8 +397,9 @@ class ProjectController:
|
|
|
360
397
|
pass
|
|
361
398
|
|
|
362
399
|
def get_list(self) -> List[Project]:
|
|
400
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
363
401
|
records = db.Project.query.filter(
|
|
364
|
-
(db.Project.company_id ==
|
|
402
|
+
(db.Project.company_id == company_id)
|
|
365
403
|
& (db.Project.deleted_at == sa.null())
|
|
366
404
|
).order_by(db.Project.name)
|
|
367
405
|
|
|
@@ -371,7 +409,8 @@ class ProjectController:
|
|
|
371
409
|
if id is not None and name is not None:
|
|
372
410
|
raise ValueError("Both 'id' and 'name' is None")
|
|
373
411
|
|
|
374
|
-
|
|
412
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
413
|
+
q = db.Project.query.filter_by(company_id=company_id)
|
|
375
414
|
|
|
376
415
|
if id is not None:
|
|
377
416
|
q = q.filter_by(id=id)
|
|
@@ -3,6 +3,7 @@ from mindsdb.interfaces.storage import db
|
|
|
3
3
|
from mindsdb.interfaces.query_context.context_controller import query_context_controller
|
|
4
4
|
from mindsdb.utilities.context import context as ctx
|
|
5
5
|
from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
|
|
6
|
+
from mindsdb.interfaces.model.functions import get_project_record, get_project_records
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class ViewController:
|
|
@@ -39,11 +40,8 @@ class ViewController:
|
|
|
39
40
|
|
|
40
41
|
def update(self, name, query, project_name):
|
|
41
42
|
name = name.lower()
|
|
42
|
-
project_record =
|
|
43
|
-
|
|
44
|
-
company_id=ctx.company_id,
|
|
45
|
-
deleted_at=None
|
|
46
|
-
).first()
|
|
43
|
+
project_record = get_project_record(project_name)
|
|
44
|
+
|
|
47
45
|
rec = db.session.query(db.View).filter(
|
|
48
46
|
func.lower(db.View.name) == name,
|
|
49
47
|
db.View.company_id == ctx.company_id,
|
|
@@ -56,11 +54,8 @@ class ViewController:
|
|
|
56
54
|
|
|
57
55
|
def delete(self, name, project_name):
|
|
58
56
|
name = name.lower()
|
|
59
|
-
project_record =
|
|
60
|
-
|
|
61
|
-
company_id=ctx.company_id,
|
|
62
|
-
deleted_at=None
|
|
63
|
-
).first()
|
|
57
|
+
project_record = get_project_record(project_name)
|
|
58
|
+
|
|
64
59
|
rec = db.session.query(db.View).filter(
|
|
65
60
|
func.lower(db.View.name) == name,
|
|
66
61
|
db.View.company_id == ctx.company_id,
|
|
@@ -74,17 +69,12 @@ class ViewController:
|
|
|
74
69
|
query_context_controller.drop_query_context('view', rec.id)
|
|
75
70
|
|
|
76
71
|
def list(self, project_name):
|
|
77
|
-
query = db.session.query(db.Project).filter_by(
|
|
78
|
-
company_id=ctx.company_id,
|
|
79
|
-
deleted_at=None
|
|
80
|
-
)
|
|
81
|
-
if project_name is not None:
|
|
82
|
-
query = query.filter_by(name=project_name)
|
|
83
72
|
|
|
84
|
-
project_names = {
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
73
|
+
project_names = {}
|
|
74
|
+
for project in get_project_records():
|
|
75
|
+
if project_name is not None and project.name != project_name:
|
|
76
|
+
continue
|
|
77
|
+
project_names[project.id] = project.name
|
|
88
78
|
|
|
89
79
|
query = db.session.query(db.View).filter(
|
|
90
80
|
db.View.company_id == ctx.company_id,
|
|
@@ -112,11 +102,8 @@ class ViewController:
|
|
|
112
102
|
}
|
|
113
103
|
|
|
114
104
|
def get(self, id=None, name=None, project_name=None):
|
|
115
|
-
project_record =
|
|
116
|
-
|
|
117
|
-
company_id=ctx.company_id,
|
|
118
|
-
deleted_at=None
|
|
119
|
-
).first()
|
|
105
|
+
project_record = get_project_record(project_name)
|
|
106
|
+
|
|
120
107
|
if id is not None:
|
|
121
108
|
records = db.session.query(db.View).filter_by(
|
|
122
109
|
id=id,
|
|
@@ -52,6 +52,7 @@ class KnowledgeBaseTable:
|
|
|
52
52
|
self.session = session
|
|
53
53
|
self.document_preprocessor = None
|
|
54
54
|
self.document_loader = None
|
|
55
|
+
self.model_params = None
|
|
55
56
|
|
|
56
57
|
def configure_preprocessing(self, config: Optional[dict] = None):
|
|
57
58
|
"""Configure preprocessing for the knowledge base table"""
|
|
@@ -488,6 +489,7 @@ class KnowledgeBaseTable:
|
|
|
488
489
|
df_out = project_datanode.predict(
|
|
489
490
|
model_name=model_rec.name,
|
|
490
491
|
df=df,
|
|
492
|
+
params=self.model_params
|
|
491
493
|
)
|
|
492
494
|
|
|
493
495
|
target = model_rec.to_predict[0]
|
|
@@ -642,11 +644,13 @@ class KnowledgeBaseController:
|
|
|
642
644
|
storage: Identifier,
|
|
643
645
|
params: dict,
|
|
644
646
|
preprocessing_config: Optional[dict] = None,
|
|
645
|
-
if_not_exists: bool = False
|
|
647
|
+
if_not_exists: bool = False
|
|
646
648
|
) -> db.KnowledgeBase:
|
|
647
649
|
"""
|
|
648
650
|
Add a new knowledge base to the database
|
|
649
651
|
:param preprocessing_config: Optional preprocessing configuration to validate and store
|
|
652
|
+
:param is_sparse: Whether to use sparse vectors for embeddings
|
|
653
|
+
:param vector_size: Optional size specification for vectors, required when is_sparse=True
|
|
650
654
|
"""
|
|
651
655
|
# Validate preprocessing config first if provided
|
|
652
656
|
if preprocessing_config is not None:
|
|
@@ -654,6 +658,12 @@ class KnowledgeBaseController:
|
|
|
654
658
|
params = params or {}
|
|
655
659
|
params['preprocessing'] = preprocessing_config
|
|
656
660
|
|
|
661
|
+
# Check if vector_size is provided when using sparse vectors
|
|
662
|
+
is_sparse = params.get('is_sparse')
|
|
663
|
+
vector_size = params.get('vector_size')
|
|
664
|
+
if is_sparse and vector_size is None:
|
|
665
|
+
raise ValueError("vector_size is required when is_sparse=True")
|
|
666
|
+
|
|
657
667
|
# get project id
|
|
658
668
|
project = self.session.database_controller.get_project(project_name)
|
|
659
669
|
project_id = project.id
|
|
@@ -693,7 +703,16 @@ class KnowledgeBaseController:
|
|
|
693
703
|
cloud_pg_vector = os.environ.get('KB_PGVECTOR_URL')
|
|
694
704
|
if cloud_pg_vector:
|
|
695
705
|
vector_table_name = name
|
|
696
|
-
|
|
706
|
+
# Add sparse vector support for pgvector
|
|
707
|
+
vector_db_params = {}
|
|
708
|
+
# Check both explicit parameter and model configuration
|
|
709
|
+
is_sparse = is_sparse or model_record.learn_args.get('using', {}).get('sparse')
|
|
710
|
+
if is_sparse:
|
|
711
|
+
vector_db_params['is_sparse'] = True
|
|
712
|
+
if vector_size is not None:
|
|
713
|
+
vector_db_params['vector_size'] = vector_size
|
|
714
|
+
vector_db_name = self._create_persistent_pgvector(vector_db_params)
|
|
715
|
+
|
|
697
716
|
else:
|
|
698
717
|
# create chroma db with same name
|
|
699
718
|
vector_table_name = "default_collection"
|
|
@@ -705,17 +724,20 @@ class KnowledgeBaseController:
|
|
|
705
724
|
else:
|
|
706
725
|
vector_db_name, vector_table_name = storage.parts
|
|
707
726
|
|
|
727
|
+
# create table in vectordb before creating KB
|
|
728
|
+
self.session.datahub.get(vector_db_name).integration_handler.create_table(
|
|
729
|
+
vector_table_name
|
|
730
|
+
)
|
|
708
731
|
vector_database_id = self.session.integration_controller.get(vector_db_name)['id']
|
|
709
732
|
|
|
710
|
-
#
|
|
711
|
-
if
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
)
|
|
733
|
+
# Store sparse vector settings in params if specified
|
|
734
|
+
if is_sparse:
|
|
735
|
+
params = params or {}
|
|
736
|
+
params['vector_config'] = {
|
|
737
|
+
'is_sparse': is_sparse
|
|
738
|
+
}
|
|
739
|
+
if vector_size is not None:
|
|
740
|
+
params['vector_config']['vector_size'] = vector_size
|
|
719
741
|
|
|
720
742
|
kb = db.KnowledgeBase(
|
|
721
743
|
name=name,
|
|
@@ -729,16 +751,15 @@ class KnowledgeBaseController:
|
|
|
729
751
|
db.session.commit()
|
|
730
752
|
return kb
|
|
731
753
|
|
|
732
|
-
def _create_persistent_pgvector(self):
|
|
754
|
+
def _create_persistent_pgvector(self, params=None):
|
|
733
755
|
"""Create default vector database for knowledge base, if not specified"""
|
|
734
|
-
|
|
735
756
|
vector_store_name = "kb_pgvector_store"
|
|
736
757
|
|
|
737
758
|
# check if exists
|
|
738
759
|
if self.session.integration_controller.get(vector_store_name):
|
|
739
760
|
return vector_store_name
|
|
740
761
|
|
|
741
|
-
self.session.integration_controller.add(vector_store_name, 'pgvector', {})
|
|
762
|
+
self.session.integration_controller.add(vector_store_name, 'pgvector', params or {})
|
|
742
763
|
return vector_store_name
|
|
743
764
|
|
|
744
765
|
def _create_persistent_chroma(self, kb_name, engine="chromadb"):
|
|
@@ -840,16 +861,19 @@ class KnowledgeBaseController:
|
|
|
840
861
|
)
|
|
841
862
|
return kb
|
|
842
863
|
|
|
843
|
-
def get_table(self, name: str, project_id: int) -> KnowledgeBaseTable:
|
|
864
|
+
def get_table(self, name: str, project_id: int, params: dict = None) -> KnowledgeBaseTable:
|
|
844
865
|
"""
|
|
845
866
|
Returns kb table object with properly configured preprocessing
|
|
846
867
|
:param name: table name
|
|
847
868
|
:param project_id: project id
|
|
869
|
+
:param params: runtime parameters for KB. Keys: 'model' - parameters for embedding model
|
|
848
870
|
:return: kb table object
|
|
849
871
|
"""
|
|
850
872
|
kb = self.get(name, project_id)
|
|
851
873
|
if kb is not None:
|
|
852
874
|
table = KnowledgeBaseTable(kb, self.session)
|
|
875
|
+
if params:
|
|
876
|
+
table.model_params = params.get('model')
|
|
853
877
|
|
|
854
878
|
# Always configure preprocessing - either from params or default
|
|
855
879
|
if kb.params and 'preprocessing' in kb.params:
|
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import List, Iterator
|
|
3
3
|
from langchain_core.documents import Document as LangchainDocument
|
|
4
|
-
from langchain_text_splitters import MarkdownHeaderTextSplitter
|
|
4
|
+
from langchain_text_splitters import MarkdownHeaderTextSplitter
|
|
5
5
|
import pandas as pd
|
|
6
6
|
|
|
7
7
|
from mindsdb.interfaces.file.file_controller import FileController
|
|
8
8
|
from mindsdb.integrations.utilities.rag.loaders.file_loader import FileLoader
|
|
9
9
|
from mindsdb.integrations.utilities.rag.splitters.file_splitter import (
|
|
10
10
|
FileSplitter,
|
|
11
|
-
DEFAULT_CHUNK_SIZE,
|
|
12
|
-
DEFAULT_CHUNK_OVERLAP
|
|
13
11
|
)
|
|
14
12
|
from mindsdb.integrations.handlers.web_handler.urlcrawl_helpers import get_all_websites
|
|
15
13
|
from mindsdb.interfaces.knowledge_base.preprocessing.models import Document
|
|
@@ -45,12 +43,6 @@ class DocumentLoader:
|
|
|
45
43
|
self.file_loader_class = file_loader_class
|
|
46
44
|
self.mysql_proxy = mysql_proxy
|
|
47
45
|
|
|
48
|
-
# Initialize text splitter for query results with default settings
|
|
49
|
-
self.query_splitter = RecursiveCharacterTextSplitter(
|
|
50
|
-
chunk_size=DEFAULT_CHUNK_SIZE,
|
|
51
|
-
chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
|
52
|
-
)
|
|
53
|
-
|
|
54
46
|
def load_files(self, file_names: List[str]) -> Iterator[Document]:
|
|
55
47
|
"""Load and split documents from files"""
|
|
56
48
|
for file_name in file_names:
|
|
@@ -143,8 +135,9 @@ class DocumentLoader:
|
|
|
143
135
|
|
|
144
136
|
# Process each row into a Document
|
|
145
137
|
for _, row in df.iterrows():
|
|
146
|
-
# Extract content
|
|
138
|
+
# Extract id, content and metadata
|
|
147
139
|
content = str(row.get('content', ''))
|
|
140
|
+
id = row.get('id', None)
|
|
148
141
|
|
|
149
142
|
# Convert remaining columns to metadata
|
|
150
143
|
metadata = {
|
|
@@ -156,21 +149,9 @@ class DocumentLoader:
|
|
|
156
149
|
|
|
157
150
|
# Split content using recursive splitter
|
|
158
151
|
if content:
|
|
159
|
-
|
|
160
|
-
|
|
152
|
+
|
|
153
|
+
yield Document(
|
|
154
|
+
id=id,
|
|
155
|
+
content=content,
|
|
161
156
|
metadata=metadata
|
|
162
157
|
)
|
|
163
|
-
# Use FileSplitter with default recursive splitter
|
|
164
|
-
split_docs = self.file_splitter.split_documents(
|
|
165
|
-
[doc],
|
|
166
|
-
default_failover=True
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
for split_doc in split_docs:
|
|
170
|
-
metadata = doc.metadata.copy()
|
|
171
|
-
metadata.update(split_doc.metadata or {})
|
|
172
|
-
|
|
173
|
-
yield Document(
|
|
174
|
-
content=split_doc.page_content,
|
|
175
|
-
metadata=metadata
|
|
176
|
-
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional, List
|
|
2
2
|
|
|
3
3
|
from sqlalchemy import null, func
|
|
4
4
|
|
|
@@ -41,9 +41,7 @@ def get_integration_record(name: str) -> db.Integration:
|
|
|
41
41
|
|
|
42
42
|
@profiler.profile()
|
|
43
43
|
def get_project_record(name: str) -> db.Project:
|
|
44
|
-
company_id = ctx.company_id
|
|
45
|
-
if company_id is None:
|
|
46
|
-
company_id = null()
|
|
44
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
47
45
|
|
|
48
46
|
project_record = (
|
|
49
47
|
db.session.query(db.Project)
|
|
@@ -56,6 +54,19 @@ def get_project_record(name: str) -> db.Project:
|
|
|
56
54
|
return project_record
|
|
57
55
|
|
|
58
56
|
|
|
57
|
+
@profiler.profile()
|
|
58
|
+
def get_project_records() -> List[db.Project]:
|
|
59
|
+
company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
60
|
+
|
|
61
|
+
return (
|
|
62
|
+
db.session.query(db.Project)
|
|
63
|
+
.filter(
|
|
64
|
+
(db.Project.company_id == company_id)
|
|
65
|
+
& (db.Project.deleted_at == null())
|
|
66
|
+
).all()
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
59
70
|
@profiler.profile()
|
|
60
71
|
def get_predictor_integration(record: db.Predictor) -> db.Integration:
|
|
61
72
|
integration_record = (
|
|
@@ -7,14 +7,15 @@ from multiprocessing.pool import ThreadPool
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from dateutil.parser import parse as parse_datetime
|
|
9
9
|
|
|
10
|
-
from sqlalchemy import func
|
|
10
|
+
from sqlalchemy import func
|
|
11
11
|
import numpy as np
|
|
12
12
|
|
|
13
13
|
import mindsdb.interfaces.storage.db as db
|
|
14
14
|
from mindsdb.utilities.config import Config
|
|
15
15
|
from mindsdb.interfaces.model.functions import (
|
|
16
16
|
get_model_record,
|
|
17
|
-
get_model_records
|
|
17
|
+
get_model_records,
|
|
18
|
+
get_project_record
|
|
18
19
|
)
|
|
19
20
|
from mindsdb.interfaces.storage.json import get_json_storage
|
|
20
21
|
from mindsdb.interfaces.storage.model_fs import ModelStorage
|
|
@@ -151,11 +152,7 @@ class ModelController():
|
|
|
151
152
|
def delete_model(self, model_name: str, project_name: str = 'mindsdb', version=None):
|
|
152
153
|
from mindsdb.interfaces.database.database import DatabaseController
|
|
153
154
|
|
|
154
|
-
project_record =
|
|
155
|
-
(func.lower(db.Project.name) == func.lower(project_name))
|
|
156
|
-
& (db.Project.company_id == ctx.company_id)
|
|
157
|
-
& (db.Project.deleted_at == null())
|
|
158
|
-
).first()
|
|
155
|
+
project_record = get_project_record(func.lower(project_name))
|
|
159
156
|
if project_record is None:
|
|
160
157
|
raise Exception(f"Project '{project_name}' does not exists")
|
|
161
158
|
|