MindsDB 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +39 -20
- mindsdb/api/a2a/agent.py +7 -9
- mindsdb/api/a2a/common/server/server.py +3 -3
- mindsdb/api/a2a/common/server/task_manager.py +4 -4
- mindsdb/api/a2a/task_manager.py +15 -17
- mindsdb/api/common/middleware.py +9 -11
- mindsdb/api/executor/command_executor.py +2 -4
- mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
- mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
- mindsdb/api/executor/exceptions.py +29 -10
- mindsdb/api/executor/planner/plan_join.py +17 -3
- mindsdb/api/executor/sql_query/sql_query.py +74 -74
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
- mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
- mindsdb/api/executor/utilities/functions.py +6 -6
- mindsdb/api/executor/utilities/sql.py +32 -16
- mindsdb/api/http/gui.py +5 -11
- mindsdb/api/http/initialize.py +8 -10
- mindsdb/api/http/namespaces/agents.py +10 -12
- mindsdb/api/http/namespaces/analysis.py +13 -20
- mindsdb/api/http/namespaces/auth.py +1 -1
- mindsdb/api/http/namespaces/config.py +15 -11
- mindsdb/api/http/namespaces/databases.py +140 -201
- mindsdb/api/http/namespaces/file.py +15 -4
- mindsdb/api/http/namespaces/handlers.py +7 -2
- mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
- mindsdb/api/http/namespaces/models.py +94 -126
- mindsdb/api/http/namespaces/projects.py +13 -22
- mindsdb/api/http/namespaces/sql.py +33 -25
- mindsdb/api/http/namespaces/tab.py +27 -37
- mindsdb/api/http/namespaces/views.py +1 -1
- mindsdb/api/http/start.py +14 -8
- mindsdb/api/mcp/__init__.py +2 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
- mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
- mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
- mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +13 -1
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
- mindsdb/integrations/libs/api_handler.py +10 -10
- mindsdb/integrations/libs/base.py +4 -4
- mindsdb/integrations/libs/llm/utils.py +2 -2
- mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
- mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
- mindsdb/integrations/libs/process_cache.py +132 -140
- mindsdb/integrations/libs/response.py +18 -12
- mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
- mindsdb/integrations/utilities/files/file_reader.py +6 -7
- mindsdb/integrations/utilities/rag/config_loader.py +37 -26
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
- mindsdb/integrations/utilities/rag/settings.py +58 -133
- mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
- mindsdb/interfaces/agents/agents_controller.py +2 -1
- mindsdb/interfaces/agents/constants.py +0 -2
- mindsdb/interfaces/agents/litellm_server.py +34 -58
- mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
- mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
- mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
- mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
- mindsdb/interfaces/chatbot/polling.py +30 -18
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
- mindsdb/interfaces/database/integrations.py +19 -2
- mindsdb/interfaces/file/file_controller.py +6 -6
- mindsdb/interfaces/functions/controller.py +1 -1
- mindsdb/interfaces/functions/to_markdown.py +2 -2
- mindsdb/interfaces/jobs/jobs_controller.py +5 -5
- mindsdb/interfaces/jobs/scheduler.py +3 -8
- mindsdb/interfaces/knowledge_base/controller.py +50 -23
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
- mindsdb/interfaces/model/model_controller.py +170 -166
- mindsdb/interfaces/query_context/context_controller.py +14 -2
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
- mindsdb/interfaces/skills/retrieval_tool.py +43 -50
- mindsdb/interfaces/skills/skill_tool.py +2 -2
- mindsdb/interfaces/skills/sql_agent.py +25 -19
- mindsdb/interfaces/storage/fs.py +114 -169
- mindsdb/interfaces/storage/json.py +19 -18
- mindsdb/interfaces/tabs/tabs_controller.py +49 -72
- mindsdb/interfaces/tasks/task_monitor.py +3 -9
- mindsdb/interfaces/tasks/task_thread.py +7 -9
- mindsdb/interfaces/triggers/trigger_task.py +7 -13
- mindsdb/interfaces/triggers/triggers_controller.py +47 -50
- mindsdb/migrations/migrate.py +16 -16
- mindsdb/utilities/api_status.py +58 -0
- mindsdb/utilities/config.py +49 -0
- mindsdb/utilities/exception.py +40 -1
- mindsdb/utilities/fs.py +0 -1
- mindsdb/utilities/hooks/profiling.py +17 -14
- mindsdb/utilities/langfuse.py +40 -45
- mindsdb/utilities/log.py +272 -0
- mindsdb/utilities/ml_task_queue/consumer.py +52 -58
- mindsdb/utilities/ml_task_queue/producer.py +26 -30
- mindsdb/utilities/render/sqlalchemy_render.py +7 -6
- mindsdb/utilities/utils.py +2 -2
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +269 -264
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +115 -115
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import time
|
|
2
2
|
import threading
|
|
3
|
+
import traceback
|
|
3
4
|
from typing import Optional, Callable
|
|
4
5
|
from concurrent.futures import ProcessPoolExecutor, Future
|
|
5
6
|
|
|
@@ -17,7 +18,7 @@ from mindsdb.integrations.libs.ml_handler_process import (
|
|
|
17
18
|
create_engine_process,
|
|
18
19
|
update_engine_process,
|
|
19
20
|
create_validation_process,
|
|
20
|
-
func_call_process
|
|
21
|
+
func_call_process,
|
|
21
22
|
)
|
|
22
23
|
|
|
23
24
|
|
|
@@ -44,11 +45,13 @@ class MLProcessException(Exception):
|
|
|
44
45
|
If exception can not be pickled (pickle.loads(pickle.dumps(e))) then it may lead to termination of the ML process.
|
|
45
46
|
Also in this case, the error sent to the user will not be relevant. This wrapper should prevent it.
|
|
46
47
|
"""
|
|
48
|
+
|
|
47
49
|
base_exception_bytes: bytes = None
|
|
48
50
|
|
|
49
51
|
def __init__(self, base_exception: Exception, message: str = None) -> None:
|
|
50
52
|
super().__init__(message)
|
|
51
|
-
|
|
53
|
+
traceback_text = "\n".join(traceback.format_exception(base_exception))
|
|
54
|
+
self.message = f"{base_exception.__class__.__name__}: {base_exception}\n{traceback_text}"
|
|
52
55
|
|
|
53
56
|
@property
|
|
54
57
|
def base_exception(self) -> Exception:
|
|
@@ -56,18 +59,19 @@ class MLProcessException(Exception):
|
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
class WarmProcess:
|
|
59
|
-
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
62
|
+
"""Class-wrapper for a process that persist for a long time. The process
|
|
63
|
+
may be initialized with any handler requirements. Current implimentation
|
|
64
|
+
is based on ProcessPoolExecutor just because of multiprocessing.pool
|
|
65
|
+
produce daemon processes, which can not be used for learning. That
|
|
66
|
+
bahaviour may be changed only using inheritance.
|
|
64
67
|
"""
|
|
68
|
+
|
|
65
69
|
def __init__(self, initializer: Optional[Callable] = None, initargs: tuple = ()):
|
|
66
|
-
"""
|
|
70
|
+
"""create and init new process
|
|
67
71
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
72
|
+
Args:
|
|
73
|
+
initializer (Callable): the same as ProcessPoolExecutor initializer
|
|
74
|
+
initargs (tuple): the same as ProcessPoolExecutor initargs
|
|
71
75
|
"""
|
|
72
76
|
self.pool = ProcessPoolExecutor(1, initializer=initializer, initargs=initargs)
|
|
73
77
|
self.last_usage_at = time.time()
|
|
@@ -91,18 +95,17 @@ class WarmProcess:
|
|
|
91
95
|
self.pool.shutdown(wait=wait)
|
|
92
96
|
|
|
93
97
|
def _init_done_callback(self, _task):
|
|
94
|
-
"""
|
|
95
|
-
"""
|
|
98
|
+
"""callback for initial task"""
|
|
96
99
|
self._init_done = True
|
|
97
100
|
|
|
98
101
|
def _update_last_usage_at_callback(self, _task):
|
|
99
102
|
self.last_usage_at = time.time()
|
|
100
103
|
|
|
101
104
|
def ready(self) -> bool:
|
|
102
|
-
"""
|
|
105
|
+
"""check is process ready to get a task or not
|
|
103
106
|
|
|
104
|
-
|
|
105
|
-
|
|
107
|
+
Returns:
|
|
108
|
+
bool
|
|
106
109
|
"""
|
|
107
110
|
if self._init_done is False:
|
|
108
111
|
self.task.result()
|
|
@@ -112,51 +115,49 @@ class WarmProcess:
|
|
|
112
115
|
return False
|
|
113
116
|
|
|
114
117
|
def add_marker(self, marker: tuple):
|
|
115
|
-
"""
|
|
118
|
+
"""remember that that process processed task for that model
|
|
116
119
|
|
|
117
|
-
|
|
118
|
-
|
|
120
|
+
Args:
|
|
121
|
+
marker (tuple): identifier of model
|
|
119
122
|
"""
|
|
120
123
|
if marker is not None:
|
|
121
124
|
self._markers.add(marker)
|
|
122
125
|
|
|
123
126
|
def has_marker(self, marker: tuple) -> bool:
|
|
124
|
-
"""
|
|
127
|
+
"""check if that process processed task for model
|
|
125
128
|
|
|
126
|
-
|
|
127
|
-
|
|
129
|
+
Args:
|
|
130
|
+
marker (tuple): identifier of model
|
|
128
131
|
|
|
129
|
-
|
|
130
|
-
|
|
132
|
+
Returns:
|
|
133
|
+
bool
|
|
131
134
|
"""
|
|
132
135
|
if marker is None:
|
|
133
136
|
return False
|
|
134
137
|
return marker in self._markers
|
|
135
138
|
|
|
136
139
|
def is_marked(self) -> bool:
|
|
137
|
-
"""
|
|
140
|
+
"""check if process has any marker
|
|
138
141
|
|
|
139
|
-
|
|
140
|
-
|
|
142
|
+
Returns:
|
|
143
|
+
bool
|
|
141
144
|
"""
|
|
142
145
|
return len(self._markers) > 0
|
|
143
146
|
|
|
144
147
|
def apply_async(self, func: Callable, *args: tuple, **kwargs: dict) -> Future:
|
|
145
|
-
"""
|
|
148
|
+
"""Run new task
|
|
146
149
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
150
|
+
Args:
|
|
151
|
+
func (Callable): function to run
|
|
152
|
+
args (tuple): args to be passed to function
|
|
153
|
+
kwargs (dict): kwargs to be passed to function
|
|
151
154
|
|
|
152
|
-
|
|
153
|
-
|
|
155
|
+
Returns:
|
|
156
|
+
Future
|
|
154
157
|
"""
|
|
155
158
|
if not self.ready():
|
|
156
|
-
raise Exception(
|
|
157
|
-
self.task = self.pool.submit(
|
|
158
|
-
func, *args, **kwargs
|
|
159
|
-
)
|
|
159
|
+
raise Exception("Process task is not ready")
|
|
160
|
+
self.task = self.pool.submit(func, *args, **kwargs)
|
|
160
161
|
self.task.add_done_callback(self._update_last_usage_at_callback)
|
|
161
162
|
self.last_usage_at = time.time()
|
|
162
163
|
return self.task
|
|
@@ -173,11 +174,11 @@ def warm_function(func, context: str, *args, **kwargs):
|
|
|
173
174
|
|
|
174
175
|
|
|
175
176
|
class ProcessCache:
|
|
176
|
-
"""
|
|
177
|
-
|
|
177
|
+
"""simple cache for WarmProcess-es"""
|
|
178
|
+
|
|
178
179
|
def __init__(self, ttl: int = 120):
|
|
179
|
-
"""
|
|
180
|
-
|
|
180
|
+
"""Args:
|
|
181
|
+
ttl (int) time to live for unused process
|
|
181
182
|
"""
|
|
182
183
|
self.cache = {}
|
|
183
184
|
self._init = False
|
|
@@ -191,42 +192,37 @@ class ProcessCache:
|
|
|
191
192
|
self._stop_clean()
|
|
192
193
|
|
|
193
194
|
def _start_clean(self) -> None:
|
|
194
|
-
"""
|
|
195
|
-
|
|
196
|
-
if (
|
|
197
|
-
isinstance(self.cleaner_thread, threading.Thread)
|
|
198
|
-
and self.cleaner_thread.is_alive()
|
|
199
|
-
):
|
|
195
|
+
"""start worker that close connections after ttl expired"""
|
|
196
|
+
if isinstance(self.cleaner_thread, threading.Thread) and self.cleaner_thread.is_alive():
|
|
200
197
|
return
|
|
201
198
|
self._stop_event.clear()
|
|
202
|
-
self.cleaner_thread = threading.Thread(target=self._clean, name=
|
|
199
|
+
self.cleaner_thread = threading.Thread(target=self._clean, name="ProcessCache.clean")
|
|
203
200
|
self.cleaner_thread.daemon = True
|
|
204
201
|
self.cleaner_thread.start()
|
|
205
202
|
|
|
206
203
|
def _stop_clean(self) -> None:
|
|
207
|
-
"""
|
|
208
|
-
"""
|
|
204
|
+
"""stop clean worker"""
|
|
209
205
|
self._stop_event.set()
|
|
210
206
|
|
|
211
207
|
def init(self):
|
|
212
|
-
"""
|
|
213
|
-
"""
|
|
208
|
+
"""run processes for specified handlers"""
|
|
214
209
|
from mindsdb.interfaces.database.integrations import integration_controller
|
|
210
|
+
|
|
215
211
|
preload_handlers = {}
|
|
216
212
|
config = Config()
|
|
217
|
-
is_cloud = config.get(
|
|
213
|
+
is_cloud = config.get("cloud", False) # noqa
|
|
218
214
|
|
|
219
|
-
if config[
|
|
215
|
+
if config["ml_task_queue"]["type"] != "redis":
|
|
220
216
|
if is_cloud:
|
|
221
|
-
lightwood_handler = integration_controller.get_handler_module(
|
|
217
|
+
lightwood_handler = integration_controller.get_handler_module("lightwood")
|
|
222
218
|
if lightwood_handler is not None and lightwood_handler.Handler is not None:
|
|
223
219
|
preload_handlers[lightwood_handler.Handler] = 4 if is_cloud else 1
|
|
224
220
|
|
|
225
|
-
huggingface_handler = integration_controller.get_handler_module(
|
|
221
|
+
huggingface_handler = integration_controller.get_handler_module("huggingface")
|
|
226
222
|
if huggingface_handler is not None and huggingface_handler.Handler is not None:
|
|
227
223
|
preload_handlers[huggingface_handler.Handler] = 1
|
|
228
224
|
|
|
229
|
-
openai_handler = integration_controller.get_handler_module(
|
|
225
|
+
openai_handler = integration_controller.get_handler_module("openai")
|
|
230
226
|
if openai_handler is not None and openai_handler.Handler is not None:
|
|
231
227
|
preload_handlers[openai_handler.Handler] = 1
|
|
232
228
|
|
|
@@ -236,146 +232,144 @@ class ProcessCache:
|
|
|
236
232
|
for handler in preload_handlers:
|
|
237
233
|
self._keep_alive[handler.name] = preload_handlers[handler]
|
|
238
234
|
self.cache[handler.name] = {
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
235
|
+
"last_usage_at": time.time(),
|
|
236
|
+
"handler_module": handler.__module__,
|
|
237
|
+
"processes": [
|
|
242
238
|
WarmProcess(init_ml_handler, (handler.__module__,))
|
|
243
239
|
for _x in range(preload_handlers[handler])
|
|
244
|
-
]
|
|
240
|
+
],
|
|
245
241
|
}
|
|
246
242
|
|
|
247
|
-
def apply_async(
|
|
248
|
-
|
|
249
|
-
|
|
243
|
+
def apply_async(
|
|
244
|
+
self, task_type: ML_TASK_TYPE, model_id: Optional[int], payload: dict, dataframe: Optional[DataFrame] = None
|
|
245
|
+
) -> Future:
|
|
246
|
+
"""run new task. If possible - do it in existing process, if not - start new one.
|
|
250
247
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
248
|
+
Args:
|
|
249
|
+
task_type (ML_TASK_TYPE): type of the task (learn, predict, etc)
|
|
250
|
+
model_id (int): id of the model
|
|
251
|
+
payload (dict): any 'lightweight' data that needs to be send in the process
|
|
252
|
+
dataframe (DataFrame): DataFrame to be send in the process
|
|
256
253
|
|
|
257
|
-
|
|
258
|
-
|
|
254
|
+
Returns:
|
|
255
|
+
Future
|
|
259
256
|
"""
|
|
260
257
|
self._start_clean()
|
|
261
|
-
handler_module_path = payload[
|
|
262
|
-
integration_id = payload[
|
|
258
|
+
handler_module_path = payload["handler_meta"]["module_path"]
|
|
259
|
+
integration_id = payload["handler_meta"]["integration_id"]
|
|
263
260
|
if task_type in (ML_TASK_TYPE.LEARN, ML_TASK_TYPE.FINETUNE):
|
|
264
261
|
func = learn_process
|
|
265
262
|
kwargs = {
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
263
|
+
"data_integration_ref": payload["data_integration_ref"],
|
|
264
|
+
"problem_definition": payload["problem_definition"],
|
|
265
|
+
"fetch_data_query": payload["fetch_data_query"],
|
|
266
|
+
"project_name": payload["project_name"],
|
|
267
|
+
"model_id": model_id,
|
|
268
|
+
"base_model_id": payload.get("base_model_id"),
|
|
269
|
+
"set_active": payload["set_active"],
|
|
270
|
+
"integration_id": integration_id,
|
|
271
|
+
"module_path": handler_module_path,
|
|
275
272
|
}
|
|
276
273
|
elif task_type == ML_TASK_TYPE.PREDICT:
|
|
277
274
|
func = predict_process
|
|
278
275
|
kwargs = {
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
276
|
+
"predictor_record": payload["predictor_record"],
|
|
277
|
+
"ml_engine_name": payload["handler_meta"]["engine"],
|
|
278
|
+
"args": payload["args"],
|
|
279
|
+
"dataframe": dataframe,
|
|
280
|
+
"integration_id": integration_id,
|
|
281
|
+
"module_path": handler_module_path,
|
|
285
282
|
}
|
|
286
283
|
elif task_type == ML_TASK_TYPE.DESCRIBE:
|
|
287
284
|
func = describe_process
|
|
288
285
|
kwargs = {
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
286
|
+
"attribute": payload.get("attribute"),
|
|
287
|
+
"model_id": model_id,
|
|
288
|
+
"integration_id": integration_id,
|
|
289
|
+
"module_path": handler_module_path,
|
|
293
290
|
}
|
|
294
291
|
elif task_type == ML_TASK_TYPE.CREATE_VALIDATION:
|
|
295
292
|
func = create_validation_process
|
|
296
293
|
kwargs = {
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
294
|
+
"target": payload.get("target"),
|
|
295
|
+
"args": payload.get("args"),
|
|
296
|
+
"integration_id": integration_id,
|
|
297
|
+
"module_path": handler_module_path,
|
|
301
298
|
}
|
|
302
299
|
elif task_type == ML_TASK_TYPE.CREATE_ENGINE:
|
|
303
300
|
func = create_engine_process
|
|
304
301
|
kwargs = {
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
302
|
+
"connection_args": payload["connection_args"],
|
|
303
|
+
"integration_id": integration_id,
|
|
304
|
+
"module_path": handler_module_path,
|
|
308
305
|
}
|
|
309
306
|
elif task_type == ML_TASK_TYPE.UPDATE_ENGINE:
|
|
310
307
|
func = update_engine_process
|
|
311
308
|
kwargs = {
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
309
|
+
"connection_args": payload["connection_args"],
|
|
310
|
+
"integration_id": integration_id,
|
|
311
|
+
"module_path": handler_module_path,
|
|
315
312
|
}
|
|
316
313
|
elif task_type == ML_TASK_TYPE.UPDATE:
|
|
317
314
|
func = update_process
|
|
318
315
|
kwargs = {
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
316
|
+
"args": payload["args"],
|
|
317
|
+
"integration_id": integration_id,
|
|
318
|
+
"model_id": model_id,
|
|
319
|
+
"module_path": handler_module_path,
|
|
323
320
|
}
|
|
324
321
|
elif task_type == ML_TASK_TYPE.FUNC_CALL:
|
|
325
322
|
func = func_call_process
|
|
326
323
|
kwargs = {
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
324
|
+
"name": payload["name"],
|
|
325
|
+
"args": payload["args"],
|
|
326
|
+
"integration_id": integration_id,
|
|
327
|
+
"module_path": handler_module_path,
|
|
331
328
|
}
|
|
332
329
|
else:
|
|
333
|
-
raise Exception(f
|
|
330
|
+
raise Exception(f"Unknown ML task type: {task_type}")
|
|
334
331
|
|
|
335
|
-
ml_engine_name = payload[
|
|
336
|
-
model_marker = (model_id, payload[
|
|
332
|
+
ml_engine_name = payload["handler_meta"]["engine"]
|
|
333
|
+
model_marker = (model_id, payload["context"]["company_id"])
|
|
337
334
|
with self._lock:
|
|
338
335
|
if ml_engine_name not in self.cache:
|
|
339
336
|
warm_process = WarmProcess(init_ml_handler, (handler_module_path,))
|
|
340
337
|
self.cache[ml_engine_name] = {
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
338
|
+
"last_usage_at": None,
|
|
339
|
+
"handler_module": handler_module_path,
|
|
340
|
+
"processes": [warm_process],
|
|
344
341
|
}
|
|
345
342
|
else:
|
|
346
343
|
warm_process = None
|
|
347
344
|
if model_marker is not None:
|
|
348
345
|
try:
|
|
349
346
|
warm_process = next(
|
|
350
|
-
p
|
|
347
|
+
p
|
|
348
|
+
for p in self.cache[ml_engine_name]["processes"]
|
|
351
349
|
if p.ready() and p.has_marker(model_marker)
|
|
352
350
|
)
|
|
353
351
|
except StopIteration:
|
|
354
352
|
pass
|
|
355
353
|
if warm_process is None:
|
|
356
354
|
try:
|
|
357
|
-
warm_process = next(
|
|
358
|
-
p for p in self.cache[ml_engine_name]['processes']
|
|
359
|
-
if p.ready()
|
|
360
|
-
)
|
|
355
|
+
warm_process = next(p for p in self.cache[ml_engine_name]["processes"] if p.ready())
|
|
361
356
|
except StopIteration:
|
|
362
357
|
pass
|
|
363
358
|
if warm_process is None:
|
|
364
359
|
warm_process = WarmProcess(init_ml_handler, (handler_module_path,))
|
|
365
|
-
self.cache[ml_engine_name][
|
|
360
|
+
self.cache[ml_engine_name]["processes"].append(warm_process)
|
|
366
361
|
|
|
367
|
-
task = warm_process.apply_async(warm_function, func, payload[
|
|
368
|
-
self.cache[ml_engine_name][
|
|
362
|
+
task = warm_process.apply_async(warm_function, func, payload["context"], **kwargs)
|
|
363
|
+
self.cache[ml_engine_name]["last_usage_at"] = time.time()
|
|
369
364
|
warm_process.add_marker(model_marker)
|
|
370
365
|
return task
|
|
371
366
|
|
|
372
367
|
def _clean(self) -> None:
|
|
373
|
-
"""
|
|
374
|
-
"""
|
|
368
|
+
"""worker that stop unused processes"""
|
|
375
369
|
while self._stop_event.wait(timeout=10) is False:
|
|
376
370
|
with self._lock:
|
|
377
371
|
for handler_name in self.cache.keys():
|
|
378
|
-
processes = self.cache[handler_name][
|
|
372
|
+
processes = self.cache[handler_name]["processes"]
|
|
379
373
|
processes.sort(key=lambda x: x.is_marked())
|
|
380
374
|
|
|
381
375
|
expected_count = 0
|
|
@@ -395,9 +389,7 @@ class ProcessCache:
|
|
|
395
389
|
break
|
|
396
390
|
|
|
397
391
|
while expected_count > len(processes):
|
|
398
|
-
processes.append(
|
|
399
|
-
WarmProcess(init_ml_handler, (self.cache[handler_name]['handler_module'],))
|
|
400
|
-
)
|
|
392
|
+
processes.append(WarmProcess(init_ml_handler, (self.cache[handler_name]["handler_module"],)))
|
|
401
393
|
|
|
402
394
|
def shutdown(self, wait: bool = True) -> None:
|
|
403
395
|
"""Call 'shutdown' for each process cache
|
|
@@ -406,25 +398,25 @@ class ProcessCache:
|
|
|
406
398
|
"""
|
|
407
399
|
with self._lock:
|
|
408
400
|
for handler_name in self.cache:
|
|
409
|
-
for process in self.cache[handler_name][
|
|
401
|
+
for process in self.cache[handler_name]["processes"]:
|
|
410
402
|
process.shutdown(wait=wait)
|
|
411
|
-
self.cache[handler_name][
|
|
403
|
+
self.cache[handler_name]["processes"] = []
|
|
412
404
|
|
|
413
405
|
def remove_processes_for_handler(self, handler_name: str) -> None:
|
|
414
406
|
"""
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
407
|
+
Remove all warm processes for a given handler.
|
|
408
|
+
This is useful when the previous processes use an outdated instance of the handler.
|
|
409
|
+
A good example is when the dependencies for a handler are installed after attempting to use the handler.
|
|
418
410
|
|
|
419
|
-
|
|
420
|
-
|
|
411
|
+
Args:
|
|
412
|
+
handler_name (str): name of the handler.
|
|
421
413
|
"""
|
|
422
414
|
with self._lock:
|
|
423
415
|
if handler_name in self.cache:
|
|
424
|
-
for process in self.cache[handler_name][
|
|
416
|
+
for process in self.cache[handler_name]["processes"]:
|
|
425
417
|
process.shutdown()
|
|
426
418
|
|
|
427
|
-
self.cache[handler_name][
|
|
419
|
+
self.cache[handler_name]["processes"] = []
|
|
428
420
|
|
|
429
421
|
|
|
430
422
|
process_cache = ProcessCache()
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import sys
|
|
1
2
|
from typing import Callable
|
|
2
3
|
from dataclasses import dataclass, fields
|
|
3
4
|
|
|
@@ -41,9 +42,15 @@ INF_SCHEMA_COLUMNS_NAMES_SET = set(f.name for f in fields(INF_SCHEMA_COLUMNS_NAM
|
|
|
41
42
|
|
|
42
43
|
class HandlerResponse:
|
|
43
44
|
def __init__(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
self,
|
|
46
|
+
resp_type: RESPONSE_TYPE,
|
|
47
|
+
data_frame: pandas.DataFrame = None,
|
|
48
|
+
query: ASTNode = 0,
|
|
49
|
+
error_code: int = 0,
|
|
50
|
+
error_message: str | None = None,
|
|
51
|
+
affected_rows: int | None = None,
|
|
52
|
+
mysql_types: list[MYSQL_DATA_TYPE] | None = None,
|
|
53
|
+
is_acceptable_error: bool = False,
|
|
47
54
|
) -> None:
|
|
48
55
|
self.resp_type = resp_type
|
|
49
56
|
self.query = query
|
|
@@ -54,6 +61,11 @@ class HandlerResponse:
|
|
|
54
61
|
if isinstance(self.affected_rows, int) is False or self.affected_rows < 0:
|
|
55
62
|
self.affected_rows = 0
|
|
56
63
|
self.mysql_types = mysql_types
|
|
64
|
+
self.is_acceptable_error = is_acceptable_error
|
|
65
|
+
self.exception = None
|
|
66
|
+
current_exception = sys.exc_info()
|
|
67
|
+
if current_exception[0] is not None:
|
|
68
|
+
self.exception = current_exception[1]
|
|
57
69
|
|
|
58
70
|
@property
|
|
59
71
|
def type(self):
|
|
@@ -71,9 +83,7 @@ class HandlerResponse:
|
|
|
71
83
|
f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}, "
|
|
72
84
|
f"the error is: {self.error_message}"
|
|
73
85
|
)
|
|
74
|
-
raise ValueError(
|
|
75
|
-
f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}"
|
|
76
|
-
)
|
|
86
|
+
raise ValueError(f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}")
|
|
77
87
|
|
|
78
88
|
self.data_frame.columns = [name.upper() for name in self.data_frame.columns]
|
|
79
89
|
self.data_frame[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = self.data_frame[
|
|
@@ -83,9 +93,7 @@ class HandlerResponse:
|
|
|
83
93
|
# region validate df
|
|
84
94
|
current_columns_set = set(self.data_frame.columns)
|
|
85
95
|
if INF_SCHEMA_COLUMNS_NAMES_SET != current_columns_set:
|
|
86
|
-
raise ValueError(
|
|
87
|
-
f"Columns set for INFORMATION_SCHEMA.COLUMNS is wrong: {list(current_columns_set)}"
|
|
88
|
-
)
|
|
96
|
+
raise ValueError(f"Columns set for INFORMATION_SCHEMA.COLUMNS is wrong: {list(current_columns_set)}")
|
|
89
97
|
# endregion
|
|
90
98
|
|
|
91
99
|
self.data_frame = self.data_frame.astype(
|
|
@@ -112,9 +120,7 @@ class HandlerResponse:
|
|
|
112
120
|
try:
|
|
113
121
|
data = None
|
|
114
122
|
if self.data_frame is not None:
|
|
115
|
-
data = self.data_frame.to_json(
|
|
116
|
-
orient="split", index=False, date_format="iso"
|
|
117
|
-
)
|
|
123
|
+
data = self.data_frame.to_json(orient="split", index=False, date_format="iso")
|
|
118
124
|
except Exception as e:
|
|
119
125
|
logger.error("%s.to_json: error - %s", self.__class__.__name__, e)
|
|
120
126
|
data = None
|
|
@@ -593,6 +593,32 @@ class VectorStoreHandler(BaseHandler):
|
|
|
593
593
|
"""
|
|
594
594
|
raise NotImplementedError(f"Hybrid search not supported for VectorStoreHandler {self.name}")
|
|
595
595
|
|
|
596
|
+
def check_existing_ids(self, table_name: str, ids: List[str]) -> List[str]:
|
|
597
|
+
"""
|
|
598
|
+
Check which IDs from the provided list already exist in the table.
|
|
599
|
+
|
|
600
|
+
Args:
|
|
601
|
+
table_name (str): Name of the table to check
|
|
602
|
+
ids (List[str]): List of IDs to check for existence
|
|
603
|
+
|
|
604
|
+
Returns:
|
|
605
|
+
List[str]: List of IDs that already exist in the table
|
|
606
|
+
"""
|
|
607
|
+
if not ids:
|
|
608
|
+
return []
|
|
609
|
+
|
|
610
|
+
try:
|
|
611
|
+
# Query existing IDs
|
|
612
|
+
df_existing = self.select(
|
|
613
|
+
table_name,
|
|
614
|
+
columns=[TableField.ID.value],
|
|
615
|
+
conditions=[FilterCondition(column=TableField.ID.value, op=FilterOperator.IN, value=ids)],
|
|
616
|
+
)
|
|
617
|
+
return list(df_existing[TableField.ID.value]) if not df_existing.empty else []
|
|
618
|
+
except Exception:
|
|
619
|
+
# If select fails for any reason, return empty list to be safe
|
|
620
|
+
return []
|
|
621
|
+
|
|
596
622
|
def create_index(self, *args, **kwargs):
|
|
597
623
|
"""
|
|
598
624
|
Create an index on the specified table.
|
|
@@ -1,17 +1,16 @@
|
|
|
1
|
-
from dataclasses import dataclass, astuple
|
|
2
|
-
import traceback
|
|
3
|
-
import json
|
|
4
1
|
import csv
|
|
5
|
-
|
|
6
|
-
from pathlib import Path
|
|
2
|
+
import json
|
|
7
3
|
import codecs
|
|
4
|
+
from io import BytesIO, StringIO, IOBase
|
|
8
5
|
from typing import List, Generator
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from dataclasses import dataclass, astuple
|
|
9
8
|
|
|
10
9
|
import filetype
|
|
11
10
|
import pandas as pd
|
|
12
11
|
from charset_normalizer import from_bytes
|
|
13
|
-
from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
|
|
14
12
|
|
|
13
|
+
from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
|
|
15
14
|
from mindsdb.utilities import log
|
|
16
15
|
|
|
17
16
|
logger = log.getLogger(__name__)
|
|
@@ -76,7 +75,7 @@ def decode(file_obj: IOBase) -> StringIO:
|
|
|
76
75
|
|
|
77
76
|
data_str = StringIO(byte_str.decode(encoding, errors))
|
|
78
77
|
except Exception as e:
|
|
79
|
-
logger.
|
|
78
|
+
logger.exception("Error during file decode:")
|
|
80
79
|
raise FileProcessingError("Could not load into string") from e
|
|
81
80
|
|
|
82
81
|
return data_str
|