MindsDB 25.9.1.2__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +39 -20
- mindsdb/api/a2a/agent.py +7 -9
- mindsdb/api/a2a/common/server/server.py +3 -3
- mindsdb/api/a2a/common/server/task_manager.py +4 -4
- mindsdb/api/a2a/task_manager.py +15 -17
- mindsdb/api/common/middleware.py +9 -11
- mindsdb/api/executor/command_executor.py +2 -4
- mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
- mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
- mindsdb/api/executor/exceptions.py +29 -10
- mindsdb/api/executor/planner/plan_join.py +17 -3
- mindsdb/api/executor/sql_query/sql_query.py +74 -74
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
- mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
- mindsdb/api/executor/utilities/functions.py +6 -6
- mindsdb/api/executor/utilities/sql.py +32 -16
- mindsdb/api/http/gui.py +5 -11
- mindsdb/api/http/initialize.py +8 -10
- mindsdb/api/http/namespaces/agents.py +10 -12
- mindsdb/api/http/namespaces/analysis.py +13 -20
- mindsdb/api/http/namespaces/auth.py +1 -1
- mindsdb/api/http/namespaces/config.py +15 -11
- mindsdb/api/http/namespaces/databases.py +140 -201
- mindsdb/api/http/namespaces/file.py +15 -4
- mindsdb/api/http/namespaces/handlers.py +7 -2
- mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
- mindsdb/api/http/namespaces/models.py +94 -126
- mindsdb/api/http/namespaces/projects.py +13 -22
- mindsdb/api/http/namespaces/sql.py +33 -25
- mindsdb/api/http/namespaces/tab.py +27 -37
- mindsdb/api/http/namespaces/views.py +1 -1
- mindsdb/api/http/start.py +14 -8
- mindsdb/api/mcp/__init__.py +2 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
- mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +11 -5
- mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
- mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +20 -2
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +18 -3
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
- mindsdb/integrations/libs/api_handler.py +10 -10
- mindsdb/integrations/libs/base.py +4 -4
- mindsdb/integrations/libs/llm/utils.py +2 -2
- mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
- mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
- mindsdb/integrations/libs/process_cache.py +132 -140
- mindsdb/integrations/libs/response.py +18 -12
- mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
- mindsdb/integrations/utilities/files/file_reader.py +6 -7
- mindsdb/integrations/utilities/rag/config_loader.py +37 -26
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
- mindsdb/integrations/utilities/rag/settings.py +58 -133
- mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
- mindsdb/interfaces/agents/agents_controller.py +2 -1
- mindsdb/interfaces/agents/constants.py +0 -2
- mindsdb/interfaces/agents/litellm_server.py +34 -58
- mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
- mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
- mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
- mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
- mindsdb/interfaces/chatbot/polling.py +30 -18
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
- mindsdb/interfaces/database/integrations.py +19 -2
- mindsdb/interfaces/file/file_controller.py +6 -6
- mindsdb/interfaces/functions/controller.py +1 -1
- mindsdb/interfaces/functions/to_markdown.py +2 -2
- mindsdb/interfaces/jobs/jobs_controller.py +5 -5
- mindsdb/interfaces/jobs/scheduler.py +3 -8
- mindsdb/interfaces/knowledge_base/controller.py +54 -25
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
- mindsdb/interfaces/model/model_controller.py +170 -166
- mindsdb/interfaces/query_context/context_controller.py +14 -2
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
- mindsdb/interfaces/skills/retrieval_tool.py +43 -50
- mindsdb/interfaces/skills/skill_tool.py +2 -2
- mindsdb/interfaces/skills/sql_agent.py +25 -19
- mindsdb/interfaces/storage/fs.py +114 -169
- mindsdb/interfaces/storage/json.py +19 -18
- mindsdb/interfaces/storage/model_fs.py +54 -92
- mindsdb/interfaces/tabs/tabs_controller.py +49 -72
- mindsdb/interfaces/tasks/task_monitor.py +3 -9
- mindsdb/interfaces/tasks/task_thread.py +7 -9
- mindsdb/interfaces/triggers/trigger_task.py +7 -13
- mindsdb/interfaces/triggers/triggers_controller.py +47 -50
- mindsdb/migrations/migrate.py +16 -16
- mindsdb/utilities/api_status.py +58 -0
- mindsdb/utilities/config.py +49 -0
- mindsdb/utilities/exception.py +40 -1
- mindsdb/utilities/fs.py +0 -1
- mindsdb/utilities/hooks/profiling.py +17 -14
- mindsdb/utilities/langfuse.py +40 -45
- mindsdb/utilities/log.py +272 -0
- mindsdb/utilities/ml_task_queue/consumer.py +52 -58
- mindsdb/utilities/ml_task_queue/producer.py +26 -30
- mindsdb/utilities/render/sqlalchemy_render.py +8 -7
- mindsdb/utilities/utils.py +2 -2
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +266 -261
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +119 -119
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
|
@@ -11,11 +11,7 @@ import numpy as np
|
|
|
11
11
|
|
|
12
12
|
import mindsdb.interfaces.storage.db as db
|
|
13
13
|
from mindsdb.utilities.config import Config
|
|
14
|
-
from mindsdb.interfaces.model.functions import
|
|
15
|
-
get_model_record,
|
|
16
|
-
get_model_records,
|
|
17
|
-
get_project_record
|
|
18
|
-
)
|
|
14
|
+
from mindsdb.interfaces.model.functions import get_model_record, get_model_records, get_project_record
|
|
19
15
|
from mindsdb.interfaces.storage.json import get_json_storage
|
|
20
16
|
from mindsdb.interfaces.storage.model_fs import ModelStorage
|
|
21
17
|
from mindsdb.utilities.config import config
|
|
@@ -28,7 +24,7 @@ from mindsdb.utilities import log
|
|
|
28
24
|
logger = log.getLogger(__name__)
|
|
29
25
|
|
|
30
26
|
|
|
31
|
-
default_project = config.get(
|
|
27
|
+
default_project = config.get("default_project")
|
|
32
28
|
|
|
33
29
|
|
|
34
30
|
def delete_model_storage(model_id, ctx_dump):
|
|
@@ -36,85 +32,91 @@ def delete_model_storage(model_id, ctx_dump):
|
|
|
36
32
|
ctx.load(ctx_dump)
|
|
37
33
|
modelStorage = ModelStorage(model_id)
|
|
38
34
|
modelStorage.delete()
|
|
39
|
-
except Exception
|
|
40
|
-
logger.
|
|
35
|
+
except Exception:
|
|
36
|
+
logger.exception(f"Something went wrong during deleting storage of model {model_id}:")
|
|
41
37
|
|
|
42
38
|
|
|
43
|
-
class ModelController
|
|
39
|
+
class ModelController:
|
|
44
40
|
config: Config
|
|
45
41
|
|
|
46
42
|
def __init__(self) -> None:
|
|
47
43
|
self.config = Config()
|
|
48
44
|
|
|
49
|
-
def get_model_data(self, name: str = None, predictor_record=None, ml_handler_name=
|
|
45
|
+
def get_model_data(self, name: str = None, predictor_record=None, ml_handler_name="lightwood") -> dict:
|
|
50
46
|
if predictor_record is None:
|
|
51
47
|
predictor_record = get_model_record(except_absent=True, name=name, ml_handler_name=ml_handler_name)
|
|
52
48
|
|
|
53
49
|
data = deepcopy(predictor_record.data)
|
|
54
|
-
data[
|
|
55
|
-
data[
|
|
56
|
-
data[
|
|
57
|
-
data[
|
|
58
|
-
data[
|
|
59
|
-
data[
|
|
60
|
-
data[
|
|
61
|
-
data[
|
|
62
|
-
data[
|
|
63
|
-
data[
|
|
64
|
-
data[
|
|
65
|
-
data[
|
|
66
|
-
data[
|
|
67
|
-
data[
|
|
68
|
-
data[
|
|
69
|
-
data[
|
|
70
|
-
|
|
71
|
-
json_storage = get_json_storage(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
data
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
if len(data['accuracies']) > 0:
|
|
78
|
-
data['accuracy'] = float(np.mean(list(data['accuracies'].values())))
|
|
50
|
+
data["dtype_dict"] = predictor_record.dtype_dict
|
|
51
|
+
data["created_at"] = str(parse_datetime(str(predictor_record.created_at).split(".")[0]))
|
|
52
|
+
data["updated_at"] = str(parse_datetime(str(predictor_record.updated_at).split(".")[0]))
|
|
53
|
+
data["training_start_at"] = predictor_record.training_start_at
|
|
54
|
+
data["training_stop_at"] = predictor_record.training_stop_at
|
|
55
|
+
data["predict"] = predictor_record.to_predict[0]
|
|
56
|
+
data["update"] = predictor_record.update_status
|
|
57
|
+
data["mindsdb_version"] = predictor_record.mindsdb_version
|
|
58
|
+
data["name"] = predictor_record.name
|
|
59
|
+
data["code"] = predictor_record.code
|
|
60
|
+
data["problem_definition"] = predictor_record.learn_args
|
|
61
|
+
data["fetch_data_query"] = predictor_record.fetch_data_query
|
|
62
|
+
data["active"] = predictor_record.active
|
|
63
|
+
data["status"] = predictor_record.status
|
|
64
|
+
data["id"] = predictor_record.id
|
|
65
|
+
data["version"] = predictor_record.version
|
|
66
|
+
|
|
67
|
+
json_storage = get_json_storage(resource_id=predictor_record.id)
|
|
68
|
+
data["json_ai"] = json_storage.get("json_ai")
|
|
69
|
+
|
|
70
|
+
if data.get("accuracies", None) is not None:
|
|
71
|
+
if len(data["accuracies"]) > 0:
|
|
72
|
+
data["accuracy"] = float(np.mean(list(data["accuracies"].values())))
|
|
79
73
|
return data
|
|
80
74
|
|
|
81
|
-
def get_reduced_model_data(self, name: str = None, predictor_record=None, ml_handler_name=
|
|
82
|
-
full_model_data = self.get_model_data(
|
|
75
|
+
def get_reduced_model_data(self, name: str = None, predictor_record=None, ml_handler_name="lightwood") -> dict:
|
|
76
|
+
full_model_data = self.get_model_data(
|
|
77
|
+
name=name, predictor_record=predictor_record, ml_handler_name=ml_handler_name
|
|
78
|
+
)
|
|
83
79
|
reduced_model_data = {}
|
|
84
|
-
for k in [
|
|
85
|
-
|
|
86
|
-
|
|
80
|
+
for k in [
|
|
81
|
+
"id",
|
|
82
|
+
"name",
|
|
83
|
+
"version",
|
|
84
|
+
"is_active",
|
|
85
|
+
"predict",
|
|
86
|
+
"status",
|
|
87
|
+
"problem_definition",
|
|
88
|
+
"current_phase",
|
|
89
|
+
"accuracy",
|
|
90
|
+
"data_source",
|
|
91
|
+
"update",
|
|
92
|
+
"active",
|
|
93
|
+
"mindsdb_version",
|
|
94
|
+
"error",
|
|
95
|
+
"created_at",
|
|
96
|
+
"fetch_data_query",
|
|
97
|
+
]:
|
|
87
98
|
reduced_model_data[k] = full_model_data.get(k, None)
|
|
88
99
|
|
|
89
|
-
reduced_model_data[
|
|
90
|
-
if full_model_data.get(
|
|
91
|
-
if full_model_data.get(
|
|
92
|
-
reduced_model_data[
|
|
93
|
-
|
|
94
|
-
- full_model_data.get('training_start_at')
|
|
95
|
-
)
|
|
96
|
-
elif full_model_data.get('status') == 'training':
|
|
97
|
-
reduced_model_data['training_time'] = (
|
|
98
|
-
dt.datetime.now()
|
|
99
|
-
- full_model_data.get('training_start_at')
|
|
100
|
+
reduced_model_data["training_time"] = None
|
|
101
|
+
if full_model_data.get("training_start_at") is not None:
|
|
102
|
+
if full_model_data.get("training_stop_at") is not None:
|
|
103
|
+
reduced_model_data["training_time"] = full_model_data.get("training_stop_at") - full_model_data.get(
|
|
104
|
+
"training_start_at"
|
|
100
105
|
)
|
|
101
|
-
|
|
102
|
-
reduced_model_data[
|
|
103
|
-
|
|
104
|
-
|
|
106
|
+
elif full_model_data.get("status") == "training":
|
|
107
|
+
reduced_model_data["training_time"] = dt.datetime.now() - full_model_data.get("training_start_at")
|
|
108
|
+
if reduced_model_data["training_time"] is not None:
|
|
109
|
+
reduced_model_data["training_time"] = (
|
|
110
|
+
reduced_model_data["training_time"]
|
|
111
|
+
- dt.timedelta(microseconds=reduced_model_data["training_time"].microseconds)
|
|
105
112
|
).total_seconds()
|
|
106
113
|
|
|
107
114
|
return reduced_model_data
|
|
108
115
|
|
|
109
116
|
def describe_model(self, session, project_name, model_name, attribute, version=None):
|
|
110
|
-
args = {
|
|
111
|
-
'name': model_name,
|
|
112
|
-
'version': version,
|
|
113
|
-
'project_name': project_name,
|
|
114
|
-
'except_absent': True
|
|
115
|
-
}
|
|
117
|
+
args = {"name": model_name, "version": version, "project_name": project_name, "except_absent": True}
|
|
116
118
|
if version is not None:
|
|
117
|
-
args[
|
|
119
|
+
args["active"] = None
|
|
118
120
|
|
|
119
121
|
model_record = get_model_record(**args)
|
|
120
122
|
|
|
@@ -132,20 +134,24 @@ class ModelController():
|
|
|
132
134
|
name=name,
|
|
133
135
|
ml_handler_name=ml_handler_name,
|
|
134
136
|
except_absent=True,
|
|
135
|
-
project_name=project_name
|
|
137
|
+
project_name=project_name,
|
|
138
|
+
)
|
|
136
139
|
data = self.get_reduced_model_data(predictor_record=model_record)
|
|
137
140
|
integration_record = db.Integration.query.get(model_record.integration_id)
|
|
138
141
|
if integration_record is not None:
|
|
139
|
-
data[
|
|
140
|
-
data[
|
|
142
|
+
data["engine"] = integration_record.engine
|
|
143
|
+
data["engine_name"] = integration_record.name
|
|
141
144
|
return data
|
|
142
145
|
|
|
143
|
-
def get_models(self, with_versions=False, ml_handler_name=None, integration_id=None,
|
|
144
|
-
project_name=None):
|
|
146
|
+
def get_models(self, with_versions=False, ml_handler_name=None, integration_id=None, project_name=None):
|
|
145
147
|
models = []
|
|
146
148
|
show_active = True if with_versions is False else None
|
|
147
|
-
for model_record in get_model_records(
|
|
148
|
-
|
|
149
|
+
for model_record in get_model_records(
|
|
150
|
+
active=show_active,
|
|
151
|
+
ml_handler_name=ml_handler_name,
|
|
152
|
+
integration_id=integration_id,
|
|
153
|
+
project_name=project_name,
|
|
154
|
+
):
|
|
149
155
|
model_data = self.get_reduced_model_data(predictor_record=model_record)
|
|
150
156
|
models.append(model_data)
|
|
151
157
|
return models
|
|
@@ -175,18 +181,20 @@ class ModelController():
|
|
|
175
181
|
version=version,
|
|
176
182
|
)
|
|
177
183
|
if len(predictors_records) == 0:
|
|
178
|
-
raise EntityNotExistsError(
|
|
184
|
+
raise EntityNotExistsError("Model does not exist", model_name)
|
|
179
185
|
|
|
180
|
-
is_cloud = self.config.get(
|
|
186
|
+
is_cloud = self.config.get("cloud", False)
|
|
181
187
|
if is_cloud:
|
|
182
188
|
for predictor_record in predictors_records:
|
|
183
189
|
model_data = self.get_model_data(predictor_record=predictor_record)
|
|
184
190
|
if (
|
|
185
|
-
model_data.get(
|
|
186
|
-
and isinstance(model_data.get(
|
|
187
|
-
and (dt.datetime.now() - parse_datetime(model_data.get(
|
|
191
|
+
model_data.get("status") in ["generating", "training"]
|
|
192
|
+
and isinstance(model_data.get("created_at"), str) is True
|
|
193
|
+
and (dt.datetime.now() - parse_datetime(model_data.get("created_at"))) < dt.timedelta(hours=1)
|
|
188
194
|
):
|
|
189
|
-
raise Exception(
|
|
195
|
+
raise Exception(
|
|
196
|
+
"You are unable to delete models currently in progress, please wait before trying again"
|
|
197
|
+
)
|
|
190
198
|
|
|
191
199
|
for predictor_record in predictors_records:
|
|
192
200
|
if is_cloud:
|
|
@@ -225,22 +233,15 @@ class ModelController():
|
|
|
225
233
|
|
|
226
234
|
databases_meta = database_controller.get_dict()
|
|
227
235
|
if integration_name not in databases_meta:
|
|
228
|
-
raise EntityNotExistsError(
|
|
236
|
+
raise EntityNotExistsError("Database does not exist", integration_name)
|
|
229
237
|
data_integration_meta = databases_meta[integration_name]
|
|
230
238
|
# TODO improve here. Suppose that it is view
|
|
231
|
-
if data_integration_meta[
|
|
232
|
-
data_integration_ref = {
|
|
233
|
-
|
|
234
|
-
}
|
|
235
|
-
elif data_integration_meta['type'] == 'system':
|
|
236
|
-
data_integration_ref = {
|
|
237
|
-
'type': 'system'
|
|
238
|
-
}
|
|
239
|
+
if data_integration_meta["type"] == "project":
|
|
240
|
+
data_integration_ref = {"type": "project"}
|
|
241
|
+
elif data_integration_meta["type"] == "system":
|
|
242
|
+
data_integration_ref = {"type": "system"}
|
|
239
243
|
else:
|
|
240
|
-
data_integration_ref = {
|
|
241
|
-
'type': 'integration',
|
|
242
|
-
'id': data_integration_meta['id']
|
|
243
|
-
}
|
|
244
|
+
data_integration_ref = {"type": "integration", "id": data_integration_meta["id"]}
|
|
244
245
|
return data_integration_ref, fetch_data_query
|
|
245
246
|
|
|
246
247
|
def prepare_create_statement(self, statement, database_controller):
|
|
@@ -251,36 +252,34 @@ class ModelController():
|
|
|
251
252
|
sql_task = None
|
|
252
253
|
if statement.task is not None:
|
|
253
254
|
sql_task = statement.task.to_string()
|
|
254
|
-
problem_definition = {
|
|
255
|
-
'__mdb_sql_task': sql_task
|
|
256
|
-
}
|
|
255
|
+
problem_definition = {"__mdb_sql_task": sql_task}
|
|
257
256
|
if statement.targets is not None:
|
|
258
|
-
problem_definition[
|
|
257
|
+
problem_definition["target"] = statement.targets[0].parts[-1]
|
|
259
258
|
|
|
260
259
|
data_integration_ref, fetch_data_query = self._get_data_integration_ref(statement, database_controller)
|
|
261
260
|
|
|
262
261
|
label = None
|
|
263
262
|
if statement.using is not None:
|
|
264
|
-
label = statement.using.pop(
|
|
263
|
+
label = statement.using.pop("tag", None)
|
|
265
264
|
|
|
266
|
-
problem_definition[
|
|
265
|
+
problem_definition["using"] = statement.using
|
|
267
266
|
|
|
268
267
|
if statement.order_by is not None:
|
|
269
|
-
problem_definition[
|
|
270
|
-
|
|
271
|
-
|
|
268
|
+
problem_definition["timeseries_settings"] = {
|
|
269
|
+
"is_timeseries": True,
|
|
270
|
+
"order_by": getattr(statement, "order_by")[0].field.parts[-1],
|
|
272
271
|
}
|
|
273
|
-
for attr in [
|
|
272
|
+
for attr in ["horizon", "window"]:
|
|
274
273
|
if getattr(statement, attr) is not None:
|
|
275
|
-
problem_definition[
|
|
274
|
+
problem_definition["timeseries_settings"][attr] = getattr(statement, attr)
|
|
276
275
|
|
|
277
276
|
if statement.group_by is not None:
|
|
278
|
-
problem_definition[
|
|
277
|
+
problem_definition["timeseries_settings"]["group_by"] = [col.parts[-1] for col in statement.group_by]
|
|
279
278
|
|
|
280
279
|
join_learn_process = False
|
|
281
|
-
if
|
|
282
|
-
join_learn_process = problem_definition[
|
|
283
|
-
del problem_definition[
|
|
280
|
+
if "join_learn_process" in problem_definition.get("using", {}):
|
|
281
|
+
join_learn_process = problem_definition["using"]["join_learn_process"]
|
|
282
|
+
del problem_definition["using"]["join_learn_process"]
|
|
284
283
|
|
|
285
284
|
return dict(
|
|
286
285
|
model_name=model_name,
|
|
@@ -289,20 +288,20 @@ class ModelController():
|
|
|
289
288
|
fetch_data_query=fetch_data_query,
|
|
290
289
|
problem_definition=problem_definition,
|
|
291
290
|
join_learn_process=join_learn_process,
|
|
292
|
-
label=label
|
|
291
|
+
label=label,
|
|
293
292
|
)
|
|
294
293
|
|
|
295
294
|
def create_model(self, statement, ml_handler):
|
|
296
295
|
params = self.prepare_create_statement(statement, ml_handler.database_controller)
|
|
297
296
|
|
|
298
|
-
existing_projects_meta = ml_handler.database_controller.get_dict(filter_type=
|
|
299
|
-
if params[
|
|
300
|
-
raise EntityNotExistsError(
|
|
297
|
+
existing_projects_meta = ml_handler.database_controller.get_dict(filter_type="project")
|
|
298
|
+
if params["project_name"] not in existing_projects_meta:
|
|
299
|
+
raise EntityNotExistsError("Project does not exist", params["project_name"])
|
|
301
300
|
|
|
302
|
-
project = ml_handler.database_controller.get_project(name=params[
|
|
301
|
+
project = ml_handler.database_controller.get_project(name=params["project_name"])
|
|
303
302
|
project_tables = project.get_tables()
|
|
304
|
-
if params[
|
|
305
|
-
raise EntityExistsError(
|
|
303
|
+
if params["model_name"] in project_tables:
|
|
304
|
+
raise EntityExistsError("Model already exists", f"{params['project_name']}.{params['model_name']}")
|
|
306
305
|
predictor_record = ml_handler.learn(**params)
|
|
307
306
|
|
|
308
307
|
return ModelController.get_model_info(predictor_record)
|
|
@@ -311,33 +310,31 @@ class ModelController():
|
|
|
311
310
|
# active setting
|
|
312
311
|
set_active = True
|
|
313
312
|
if statement.using is not None:
|
|
314
|
-
set_active = statement.using.pop(
|
|
315
|
-
if set_active in (
|
|
313
|
+
set_active = statement.using.pop("active", True)
|
|
314
|
+
if set_active in ("0", 0, None):
|
|
316
315
|
set_active = False
|
|
317
316
|
|
|
318
317
|
params = self.prepare_create_statement(statement, ml_handler.database_controller)
|
|
319
318
|
|
|
320
319
|
base_predictor_record = get_model_record(
|
|
321
|
-
name=params[
|
|
322
|
-
project_name=params['project_name'],
|
|
323
|
-
active=True
|
|
320
|
+
name=params["model_name"], project_name=params["project_name"], active=True
|
|
324
321
|
)
|
|
325
322
|
|
|
326
|
-
model_name = params[
|
|
323
|
+
model_name = params["model_name"]
|
|
327
324
|
if base_predictor_record is None:
|
|
328
325
|
raise Exception(f"Error: model '{model_name}' does not exist")
|
|
329
326
|
|
|
330
|
-
if params[
|
|
331
|
-
params[
|
|
332
|
-
if params[
|
|
333
|
-
params[
|
|
327
|
+
if params["data_integration_ref"] is None:
|
|
328
|
+
params["data_integration_ref"] = base_predictor_record.data_integration_ref
|
|
329
|
+
if params["fetch_data_query"] is None:
|
|
330
|
+
params["fetch_data_query"] = base_predictor_record.fetch_data_query
|
|
334
331
|
|
|
335
332
|
problem_definition = base_predictor_record.learn_args.copy()
|
|
336
|
-
problem_definition.update(params[
|
|
337
|
-
params[
|
|
333
|
+
problem_definition.update(params["problem_definition"])
|
|
334
|
+
params["problem_definition"] = problem_definition
|
|
338
335
|
|
|
339
|
-
params[
|
|
340
|
-
params[
|
|
336
|
+
params["is_retrain"] = True
|
|
337
|
+
params["set_active"] = set_active
|
|
341
338
|
predictor_record = ml_handler.learn(**params)
|
|
342
339
|
|
|
343
340
|
return ModelController.get_model_info(predictor_record)
|
|
@@ -350,23 +347,23 @@ class ModelController():
|
|
|
350
347
|
|
|
351
348
|
set_active = True
|
|
352
349
|
if statement.using is not None:
|
|
353
|
-
set_active = statement.using.pop(
|
|
354
|
-
if set_active in (
|
|
350
|
+
set_active = statement.using.pop("active", True)
|
|
351
|
+
if set_active in ("0", 0, None):
|
|
355
352
|
set_active = False
|
|
356
353
|
|
|
357
354
|
label = None
|
|
358
355
|
args = {}
|
|
359
356
|
if statement.using is not None:
|
|
360
|
-
label = statement.using.pop(
|
|
357
|
+
label = statement.using.pop("tag", None)
|
|
361
358
|
args = statement.using
|
|
362
359
|
|
|
363
|
-
join_learn_process = args.pop(
|
|
360
|
+
join_learn_process = args.pop("join_learn_process", False)
|
|
364
361
|
|
|
365
362
|
base_predictor_record = get_model_record(
|
|
366
363
|
name=model_name,
|
|
367
364
|
project_name=project_name,
|
|
368
365
|
version=model_version,
|
|
369
|
-
active=True if model_version is None else None
|
|
366
|
+
active=True if model_version is None else None,
|
|
370
367
|
)
|
|
371
368
|
|
|
372
369
|
if data_integration_ref is None:
|
|
@@ -383,7 +380,7 @@ class ModelController():
|
|
|
383
380
|
args=args,
|
|
384
381
|
join_learn_process=join_learn_process,
|
|
385
382
|
label=label,
|
|
386
|
-
set_active=set_active
|
|
383
|
+
set_active=set_active,
|
|
387
384
|
)
|
|
388
385
|
|
|
389
386
|
@profiler.profile()
|
|
@@ -393,58 +390,71 @@ class ModelController():
|
|
|
393
390
|
return ModelController.get_model_info(predictor_record)
|
|
394
391
|
|
|
395
392
|
def update_model(self, session, project_name: str, model_name: str, problem_definition, version=None):
|
|
396
|
-
|
|
397
|
-
model_record = get_model_record(
|
|
398
|
-
name=model_name,
|
|
399
|
-
version=version,
|
|
400
|
-
project_name=project_name,
|
|
401
|
-
except_absent=True
|
|
402
|
-
)
|
|
393
|
+
model_record = get_model_record(name=model_name, version=version, project_name=project_name, except_absent=True)
|
|
403
394
|
integration_record = db.Integration.query.get(model_record.integration_id)
|
|
404
395
|
|
|
405
396
|
ml_handler_base = session.integration_controller.get_ml_handler(integration_record.name)
|
|
406
397
|
ml_handler_base.update(args=problem_definition, model_id=model_record.id)
|
|
407
398
|
|
|
408
399
|
# update model record
|
|
409
|
-
if
|
|
400
|
+
if "using" in problem_definition:
|
|
410
401
|
learn_args = copy.deepcopy(model_record.learn_args)
|
|
411
|
-
learn_args[
|
|
402
|
+
learn_args["using"].update(problem_definition["using"])
|
|
412
403
|
model_record.learn_args = learn_args
|
|
413
404
|
db.session.commit()
|
|
414
405
|
|
|
415
406
|
@staticmethod
|
|
416
407
|
def get_model_info(predictor_record):
|
|
417
408
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
409
|
+
|
|
418
410
|
projects_controller = ProjectController()
|
|
419
411
|
project = projects_controller.get(id=predictor_record.project_id)
|
|
420
412
|
|
|
421
|
-
columns = [
|
|
422
|
-
|
|
413
|
+
columns = [
|
|
414
|
+
"NAME",
|
|
415
|
+
"ENGINE",
|
|
416
|
+
"PROJECT",
|
|
417
|
+
"ACTIVE",
|
|
418
|
+
"VERSION",
|
|
419
|
+
"STATUS",
|
|
420
|
+
"ACCURACY",
|
|
421
|
+
"PREDICT",
|
|
422
|
+
"UPDATE_STATUS",
|
|
423
|
+
"MINDSDB_VERSION",
|
|
424
|
+
"ERROR",
|
|
425
|
+
"SELECT_DATA_QUERY",
|
|
426
|
+
"TRAINING_OPTIONS",
|
|
427
|
+
"TAG",
|
|
428
|
+
]
|
|
423
429
|
|
|
424
430
|
project_name = project.name
|
|
425
431
|
model = project.get_model_by_id(model_id=predictor_record.id)
|
|
426
|
-
table_name = model[
|
|
427
|
-
table_meta = model[
|
|
432
|
+
table_name = model["name"]
|
|
433
|
+
table_meta = model["metadata"]
|
|
428
434
|
record = [
|
|
429
|
-
table_name,
|
|
430
|
-
table_meta[
|
|
431
|
-
|
|
432
|
-
|
|
435
|
+
table_name,
|
|
436
|
+
table_meta["engine"],
|
|
437
|
+
project_name,
|
|
438
|
+
table_meta["active"],
|
|
439
|
+
table_meta["version"],
|
|
440
|
+
table_meta["status"],
|
|
441
|
+
table_meta["accuracy"],
|
|
442
|
+
table_meta["predict"],
|
|
443
|
+
table_meta["update_status"],
|
|
444
|
+
table_meta["mindsdb_version"],
|
|
445
|
+
table_meta["error"],
|
|
446
|
+
table_meta["select_data_query"],
|
|
447
|
+
str(table_meta["training_options"]),
|
|
448
|
+
table_meta["label"],
|
|
433
449
|
]
|
|
434
450
|
|
|
435
451
|
return pd.DataFrame([record], columns=columns)
|
|
436
452
|
|
|
437
453
|
def set_model_active_version(self, project_name, model_name, version):
|
|
438
|
-
|
|
439
|
-
model_record = get_model_record(
|
|
440
|
-
name=model_name,
|
|
441
|
-
project_name=project_name,
|
|
442
|
-
version=version,
|
|
443
|
-
active=None
|
|
444
|
-
)
|
|
454
|
+
model_record = get_model_record(name=model_name, project_name=project_name, version=version, active=None)
|
|
445
455
|
|
|
446
456
|
if model_record is None:
|
|
447
|
-
raise EntityNotExistsError(f
|
|
457
|
+
raise EntityNotExistsError(f"Model {model_name} with version {version} is not found in {project_name}")
|
|
448
458
|
|
|
449
459
|
model_record.active = True
|
|
450
460
|
|
|
@@ -452,9 +462,9 @@ class ModelController():
|
|
|
452
462
|
model_records = db.Predictor.query.filter(
|
|
453
463
|
db.Predictor.name == model_record.name,
|
|
454
464
|
db.Predictor.project_id == model_record.project_id,
|
|
455
|
-
db.Predictor.active == True,
|
|
465
|
+
db.Predictor.active == True, # noqa
|
|
456
466
|
db.Predictor.company_id == ctx.company_id,
|
|
457
|
-
db.Predictor.id != model_record.id
|
|
467
|
+
db.Predictor.id != model_record.id,
|
|
458
468
|
)
|
|
459
469
|
for p in model_records:
|
|
460
470
|
p.active = False
|
|
@@ -462,20 +472,14 @@ class ModelController():
|
|
|
462
472
|
db.session.commit()
|
|
463
473
|
|
|
464
474
|
def delete_model_version(self, project_name, model_name, version):
|
|
465
|
-
|
|
466
|
-
model_record = get_model_record(
|
|
467
|
-
name=model_name,
|
|
468
|
-
project_name=project_name,
|
|
469
|
-
version=version,
|
|
470
|
-
active=None
|
|
471
|
-
)
|
|
475
|
+
model_record = get_model_record(name=model_name, project_name=project_name, version=version, active=None)
|
|
472
476
|
if model_record is None:
|
|
473
|
-
raise EntityNotExistsError(f
|
|
477
|
+
raise EntityNotExistsError(f"Model {model_name} with version {version} is not found in {project_name}")
|
|
474
478
|
|
|
475
479
|
if model_record.active:
|
|
476
480
|
raise Exception(f"Can't remove active version: {project_name}.{model_name}.{version}")
|
|
477
481
|
|
|
478
|
-
is_cloud = self.config.get(
|
|
482
|
+
is_cloud = self.config.get("cloud", False)
|
|
479
483
|
if is_cloud:
|
|
480
484
|
model_record.deleted_at = dt.datetime.now()
|
|
481
485
|
else:
|
|
@@ -7,7 +7,7 @@ import pandas as pd
|
|
|
7
7
|
|
|
8
8
|
from mindsdb_sql_parser import Select, Star, OrderBy
|
|
9
9
|
|
|
10
|
-
from mindsdb_sql_parser.ast import Identifier, BinaryOperation, Last, Constant, ASTNode
|
|
10
|
+
from mindsdb_sql_parser.ast import Identifier, BinaryOperation, Last, Constant, ASTNode, Function
|
|
11
11
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
12
12
|
from mindsdb.utilities.cache import get_cache
|
|
13
13
|
|
|
@@ -312,9 +312,21 @@ class QueryContextController:
|
|
|
312
312
|
def replace_lasts(node, **kwargs):
|
|
313
313
|
# find last in where
|
|
314
314
|
if isinstance(node, BinaryOperation):
|
|
315
|
-
|
|
315
|
+
arg1, arg2 = node.args
|
|
316
|
+
if not isinstance(arg1, Identifier):
|
|
317
|
+
arg1, arg2 = arg2, arg1
|
|
318
|
+
|
|
319
|
+
# one of the args must be identifier
|
|
320
|
+
if not isinstance(arg1, Identifier):
|
|
321
|
+
return
|
|
322
|
+
|
|
323
|
+
# another must be LAST or function with LAST in args
|
|
324
|
+
if isinstance(arg2, Last) or (
|
|
325
|
+
isinstance(arg2, Function) and any(isinstance(arg, Last) for arg in arg2.args)
|
|
326
|
+
):
|
|
316
327
|
node.args = [Constant(0), Constant(0)]
|
|
317
328
|
node.op = "="
|
|
329
|
+
return node
|
|
318
330
|
|
|
319
331
|
# find lasts
|
|
320
332
|
query_traversal(query, replace_lasts)
|
|
@@ -55,7 +55,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
55
55
|
f"""\
|
|
56
56
|
Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
|
|
57
57
|
Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
|
|
58
|
-
This system is a highly intelligent and reliable
|
|
58
|
+
This system is a highly intelligent and reliable SQL skill designed to work with databases.
|
|
59
59
|
Follow these instructions with utmost precision:
|
|
60
60
|
1. Final Response Format:
|
|
61
61
|
- Assume the frontend fully supports Markdown unless the user specifies otherwise.
|
|
@@ -73,7 +73,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
73
73
|
- Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
|
|
74
74
|
5. Date Handling:
|
|
75
75
|
- **System current date and time: {current_date_time} (UTC or local timezone based on server settings).**
|
|
76
|
-
- **Always** use
|
|
76
|
+
- **Always** use `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
|
|
77
77
|
- For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
|
|
78
78
|
- Do not compare date values without casting columns to date.
|
|
79
79
|
- For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
|
|
@@ -95,6 +95,8 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
95
95
|
8. Identity and Purpose:
|
|
96
96
|
- When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
|
|
97
97
|
- When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
|
|
98
|
+
9. Important: you can use only mysql quoting rules to compose queries: backticks (`) for identifiers, and single quotes (') for constants
|
|
99
|
+
|
|
98
100
|
Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
|
|
99
101
|
"""
|
|
100
102
|
)
|
|
@@ -110,7 +112,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
110
112
|
"If the query is correct, it will be parsed and returned. "
|
|
111
113
|
f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. "
|
|
112
114
|
)
|
|
113
|
-
mindsdb_sql_parser_tool = MindsDBSQLParserTool(
|
|
115
|
+
mindsdb_sql_parser_tool = MindsDBSQLParserTool( # noqa: F841
|
|
114
116
|
name=f"mindsdb_sql_parser_tool{prefix}", description=mindsdb_sql_parser_tool_description
|
|
115
117
|
)
|
|
116
118
|
|
|
@@ -118,7 +120,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
118
120
|
query_sql_database_tool,
|
|
119
121
|
info_sql_database_tool,
|
|
120
122
|
list_sql_database_tool,
|
|
121
|
-
mindsdb_sql_parser_tool,
|
|
123
|
+
# mindsdb_sql_parser_tool,
|
|
122
124
|
]
|
|
123
125
|
if not self.include_knowledge_base_tools:
|
|
124
126
|
return sql_tools
|