MindsDB 25.9.1.2__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +39 -20
- mindsdb/api/a2a/agent.py +7 -9
- mindsdb/api/a2a/common/server/server.py +3 -3
- mindsdb/api/a2a/common/server/task_manager.py +4 -4
- mindsdb/api/a2a/task_manager.py +15 -17
- mindsdb/api/common/middleware.py +9 -11
- mindsdb/api/executor/command_executor.py +2 -4
- mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
- mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
- mindsdb/api/executor/exceptions.py +29 -10
- mindsdb/api/executor/planner/plan_join.py +17 -3
- mindsdb/api/executor/sql_query/sql_query.py +74 -74
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
- mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
- mindsdb/api/executor/utilities/functions.py +6 -6
- mindsdb/api/executor/utilities/sql.py +32 -16
- mindsdb/api/http/gui.py +5 -11
- mindsdb/api/http/initialize.py +8 -10
- mindsdb/api/http/namespaces/agents.py +10 -12
- mindsdb/api/http/namespaces/analysis.py +13 -20
- mindsdb/api/http/namespaces/auth.py +1 -1
- mindsdb/api/http/namespaces/config.py +15 -11
- mindsdb/api/http/namespaces/databases.py +140 -201
- mindsdb/api/http/namespaces/file.py +15 -4
- mindsdb/api/http/namespaces/handlers.py +7 -2
- mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
- mindsdb/api/http/namespaces/models.py +94 -126
- mindsdb/api/http/namespaces/projects.py +13 -22
- mindsdb/api/http/namespaces/sql.py +33 -25
- mindsdb/api/http/namespaces/tab.py +27 -37
- mindsdb/api/http/namespaces/views.py +1 -1
- mindsdb/api/http/start.py +14 -8
- mindsdb/api/mcp/__init__.py +2 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
- mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +11 -5
- mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
- mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +20 -2
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +18 -3
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
- mindsdb/integrations/libs/api_handler.py +10 -10
- mindsdb/integrations/libs/base.py +4 -4
- mindsdb/integrations/libs/llm/utils.py +2 -2
- mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
- mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
- mindsdb/integrations/libs/process_cache.py +132 -140
- mindsdb/integrations/libs/response.py +18 -12
- mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
- mindsdb/integrations/utilities/files/file_reader.py +6 -7
- mindsdb/integrations/utilities/rag/config_loader.py +37 -26
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
- mindsdb/integrations/utilities/rag/settings.py +58 -133
- mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
- mindsdb/interfaces/agents/agents_controller.py +2 -1
- mindsdb/interfaces/agents/constants.py +0 -2
- mindsdb/interfaces/agents/litellm_server.py +34 -58
- mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
- mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
- mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
- mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
- mindsdb/interfaces/chatbot/polling.py +30 -18
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
- mindsdb/interfaces/database/integrations.py +19 -2
- mindsdb/interfaces/file/file_controller.py +6 -6
- mindsdb/interfaces/functions/controller.py +1 -1
- mindsdb/interfaces/functions/to_markdown.py +2 -2
- mindsdb/interfaces/jobs/jobs_controller.py +5 -5
- mindsdb/interfaces/jobs/scheduler.py +3 -8
- mindsdb/interfaces/knowledge_base/controller.py +54 -25
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
- mindsdb/interfaces/model/model_controller.py +170 -166
- mindsdb/interfaces/query_context/context_controller.py +14 -2
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
- mindsdb/interfaces/skills/retrieval_tool.py +43 -50
- mindsdb/interfaces/skills/skill_tool.py +2 -2
- mindsdb/interfaces/skills/sql_agent.py +25 -19
- mindsdb/interfaces/storage/fs.py +114 -169
- mindsdb/interfaces/storage/json.py +19 -18
- mindsdb/interfaces/storage/model_fs.py +54 -92
- mindsdb/interfaces/tabs/tabs_controller.py +49 -72
- mindsdb/interfaces/tasks/task_monitor.py +3 -9
- mindsdb/interfaces/tasks/task_thread.py +7 -9
- mindsdb/interfaces/triggers/trigger_task.py +7 -13
- mindsdb/interfaces/triggers/triggers_controller.py +47 -50
- mindsdb/migrations/migrate.py +16 -16
- mindsdb/utilities/api_status.py +58 -0
- mindsdb/utilities/config.py +49 -0
- mindsdb/utilities/exception.py +40 -1
- mindsdb/utilities/fs.py +0 -1
- mindsdb/utilities/hooks/profiling.py +17 -14
- mindsdb/utilities/langfuse.py +40 -45
- mindsdb/utilities/log.py +272 -0
- mindsdb/utilities/ml_task_queue/consumer.py +52 -58
- mindsdb/utilities/ml_task_queue/producer.py +26 -30
- mindsdb/utilities/render/sqlalchemy_render.py +8 -7
- mindsdb/utilities/utils.py +2 -2
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +266 -261
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +119 -119
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,6 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
|
|
|
17
17
|
TableField,
|
|
18
18
|
VectorStoreHandler,
|
|
19
19
|
)
|
|
20
|
-
from mindsdb.interfaces.storage.model_fs import HandlerStorage
|
|
21
20
|
from mindsdb.utilities import log
|
|
22
21
|
|
|
23
22
|
logger = log.getLogger(__name__)
|
|
@@ -55,7 +54,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
55
54
|
|
|
56
55
|
def __init__(self, name: str, **kwargs):
|
|
57
56
|
super().__init__(name)
|
|
58
|
-
self.handler_storage =
|
|
57
|
+
self.handler_storage = kwargs["handler_storage"]
|
|
59
58
|
self._client = None
|
|
60
59
|
self.persist_directory = None
|
|
61
60
|
self.is_connected = False
|
|
@@ -73,8 +72,6 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
73
72
|
"hnsw:space": config.distance,
|
|
74
73
|
}
|
|
75
74
|
|
|
76
|
-
self.connect()
|
|
77
|
-
|
|
78
75
|
def validate_connection_parameters(self, name, **kwargs):
|
|
79
76
|
"""
|
|
80
77
|
Validate the connection parameters.
|
|
@@ -88,7 +85,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
88
85
|
if config.persist_directory:
|
|
89
86
|
if os.path.isabs(config.persist_directory):
|
|
90
87
|
self.persist_directory = config.persist_directory
|
|
91
|
-
|
|
88
|
+
else:
|
|
92
89
|
# get full persistence directory from handler storage
|
|
93
90
|
self.persist_directory = self.handler_storage.folder_get(config.persist_directory)
|
|
94
91
|
self._use_handler_storage = True
|
|
@@ -149,6 +146,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
149
146
|
need_to_close = self.is_connected is False
|
|
150
147
|
|
|
151
148
|
try:
|
|
149
|
+
self.connect()
|
|
152
150
|
self._client.heartbeat()
|
|
153
151
|
response_code.success = True
|
|
154
152
|
except Exception as e:
|
|
@@ -233,6 +231,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
233
231
|
offset: int = None,
|
|
234
232
|
limit: int = None,
|
|
235
233
|
) -> pd.DataFrame:
|
|
234
|
+
self.connect()
|
|
236
235
|
collection = self._client.get_collection(table_name)
|
|
237
236
|
filters = self._translate_metadata_condition(conditions)
|
|
238
237
|
|
|
@@ -399,6 +398,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
399
398
|
Insert/Upsert data into ChromaDB collection.
|
|
400
399
|
If records with same IDs exist, they will be updated.
|
|
401
400
|
"""
|
|
401
|
+
self.connect()
|
|
402
402
|
collection = self._client.get_or_create_collection(collection_name, metadata=self.create_collection_metadata)
|
|
403
403
|
|
|
404
404
|
# Convert metadata from string to dict if needed
|
|
@@ -449,6 +449,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
449
449
|
"""
|
|
450
450
|
Update data in the ChromaDB database.
|
|
451
451
|
"""
|
|
452
|
+
self.connect()
|
|
452
453
|
collection = self._client.get_collection(table_name)
|
|
453
454
|
|
|
454
455
|
# drop columns with all None values
|
|
@@ -466,6 +467,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
466
467
|
self._sync()
|
|
467
468
|
|
|
468
469
|
def delete(self, table_name: str, conditions: List[FilterCondition] = None):
|
|
470
|
+
self.connect()
|
|
469
471
|
filters = self._translate_metadata_condition(conditions)
|
|
470
472
|
# get id filters
|
|
471
473
|
id_filters = [condition.value for condition in conditions if condition.column == TableField.ID.value] or None
|
|
@@ -480,6 +482,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
480
482
|
"""
|
|
481
483
|
Create a collection with the given name in the ChromaDB database.
|
|
482
484
|
"""
|
|
485
|
+
self.connect()
|
|
483
486
|
self._client.create_collection(
|
|
484
487
|
table_name, get_or_create=if_not_exists, metadata=self.create_collection_metadata
|
|
485
488
|
)
|
|
@@ -489,6 +492,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
489
492
|
"""
|
|
490
493
|
Delete a collection from the ChromaDB database.
|
|
491
494
|
"""
|
|
495
|
+
self.connect()
|
|
492
496
|
try:
|
|
493
497
|
self._client.delete_collection(table_name)
|
|
494
498
|
self._sync()
|
|
@@ -502,6 +506,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
502
506
|
"""
|
|
503
507
|
Get the list of collections in the ChromaDB database.
|
|
504
508
|
"""
|
|
509
|
+
self.connect()
|
|
505
510
|
collections = self._client.list_collections()
|
|
506
511
|
collections_name = pd.DataFrame(
|
|
507
512
|
columns=["table_name"],
|
|
@@ -511,6 +516,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
511
516
|
|
|
512
517
|
def get_columns(self, table_name: str) -> HandlerResponse:
|
|
513
518
|
# check if collection exists
|
|
519
|
+
self.connect()
|
|
514
520
|
try:
|
|
515
521
|
_ = self._client.get_collection(table_name)
|
|
516
522
|
except ValueError:
|
|
@@ -50,6 +50,7 @@ class FileHandler(DatabaseHandler):
|
|
|
50
50
|
self.chunk_size = connection_data.get("chunk_size", DEFAULT_CHUNK_SIZE)
|
|
51
51
|
self.chunk_overlap = connection_data.get("chunk_overlap", DEFAULT_CHUNK_OVERLAP)
|
|
52
52
|
self.file_controller = file_controller
|
|
53
|
+
self.thread_safe = True
|
|
53
54
|
|
|
54
55
|
def connect(self, **kwargs):
|
|
55
56
|
return
|
|
@@ -83,6 +84,12 @@ class FileHandler(DatabaseHandler):
|
|
|
83
84
|
table_name = table_identifier.parts[-1]
|
|
84
85
|
try:
|
|
85
86
|
self.file_controller.delete_file(table_name)
|
|
87
|
+
except FileNotFoundError as e:
|
|
88
|
+
if not query.if_exists:
|
|
89
|
+
return Response(
|
|
90
|
+
RESPONSE_TYPE.ERROR,
|
|
91
|
+
error_message=f"Can't delete table '{table_name}': {e}",
|
|
92
|
+
)
|
|
86
93
|
except Exception as e:
|
|
87
94
|
return Response(
|
|
88
95
|
RESPONSE_TYPE.ERROR,
|
|
@@ -27,51 +27,40 @@ logger = log.getLogger(__name__)
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def create_learn_mark():
|
|
30
|
-
if os.name ==
|
|
31
|
-
p = Path(tempfile.gettempdir()).joinpath(
|
|
30
|
+
if os.name == "posix":
|
|
31
|
+
p = Path(tempfile.gettempdir()).joinpath("mindsdb/learn_processes/")
|
|
32
32
|
p.mkdir(parents=True, exist_ok=True)
|
|
33
|
-
p.joinpath(f
|
|
33
|
+
p.joinpath(f"{os.getpid()}").touch()
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def delete_learn_mark():
|
|
37
|
-
if os.name ==
|
|
38
|
-
p = (
|
|
39
|
-
Path(tempfile.gettempdir())
|
|
40
|
-
.joinpath('mindsdb/learn_processes/')
|
|
41
|
-
.joinpath(f'{os.getpid()}')
|
|
42
|
-
)
|
|
37
|
+
if os.name == "posix":
|
|
38
|
+
p = Path(tempfile.gettempdir()).joinpath("mindsdb/learn_processes/").joinpath(f"{os.getpid()}")
|
|
43
39
|
if p.exists():
|
|
44
40
|
p.unlink()
|
|
45
41
|
|
|
46
42
|
|
|
47
|
-
@mark_process(name=
|
|
43
|
+
@mark_process(name="learn")
|
|
48
44
|
@profiler.profile()
|
|
49
45
|
def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = None):
|
|
46
|
+
model_storage.training_state_set(current_state_num=1, total_states=5, state_name="Generating problem definition")
|
|
47
|
+
json_ai_override = args.pop("using", {})
|
|
50
48
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
)
|
|
54
|
-
json_ai_override = args.pop('using', {})
|
|
55
|
-
|
|
56
|
-
if 'dtype_dict' in json_ai_override:
|
|
57
|
-
args['dtype_dict'] = json_ai_override.pop('dtype_dict')
|
|
49
|
+
if "dtype_dict" in json_ai_override:
|
|
50
|
+
args["dtype_dict"] = json_ai_override.pop("dtype_dict")
|
|
58
51
|
|
|
59
|
-
if
|
|
60
|
-
args = {**args, **json_ai_override[
|
|
52
|
+
if "problem_definition" in json_ai_override:
|
|
53
|
+
args = {**args, **json_ai_override["problem_definition"]}
|
|
61
54
|
|
|
62
|
-
if
|
|
63
|
-
for tss_key in [
|
|
64
|
-
|
|
65
|
-
]:
|
|
66
|
-
k = f'timeseries_settings.{tss_key}'
|
|
55
|
+
if "timeseries_settings" in args:
|
|
56
|
+
for tss_key in [f.name for f in dataclasses.fields(lightwood.api.TimeseriesSettings)]:
|
|
57
|
+
k = f"timeseries_settings.{tss_key}"
|
|
67
58
|
if k in json_ai_override:
|
|
68
|
-
args[
|
|
59
|
+
args["timeseries_settings"][tss_key] = json_ai_override.pop(k)
|
|
69
60
|
|
|
70
61
|
problem_definition = lightwood.ProblemDefinition.from_dict(args)
|
|
71
62
|
|
|
72
|
-
model_storage.training_state_set(
|
|
73
|
-
current_state_num=2, total_states=5, state_name='Generating JsonAI'
|
|
74
|
-
)
|
|
63
|
+
model_storage.training_state_set(current_state_num=2, total_states=5, state_name="Generating JsonAI")
|
|
75
64
|
json_ai = lightwood.json_ai_from_problem(df, problem_definition)
|
|
76
65
|
json_ai = json_ai.to_dict()
|
|
77
66
|
unpack_jsonai_old_args(json_ai_override)
|
|
@@ -79,9 +68,7 @@ def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = N
|
|
|
79
68
|
rep_recur(json_ai, json_ai_override)
|
|
80
69
|
json_ai = JsonAI.from_dict(json_ai)
|
|
81
70
|
|
|
82
|
-
model_storage.training_state_set(
|
|
83
|
-
current_state_num=3, total_states=5, state_name='Generating code'
|
|
84
|
-
)
|
|
71
|
+
model_storage.training_state_set(current_state_num=3, total_states=5, state_name="Generating code")
|
|
85
72
|
code = lightwood.code_from_json_ai(json_ai)
|
|
86
73
|
|
|
87
74
|
predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
|
|
@@ -89,33 +76,27 @@ def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = N
|
|
|
89
76
|
db.session.commit()
|
|
90
77
|
|
|
91
78
|
json_storage = get_json_storage(resource_id=predictor_id)
|
|
92
|
-
json_storage.set(
|
|
79
|
+
json_storage.set("json_ai", json_ai.to_dict())
|
|
93
80
|
|
|
94
81
|
|
|
95
|
-
@mark_process(name=
|
|
82
|
+
@mark_process(name="learn")
|
|
96
83
|
@profiler.profile()
|
|
97
84
|
def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
|
|
98
85
|
try:
|
|
99
86
|
predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
|
|
100
87
|
assert predictor_record is not None
|
|
101
88
|
|
|
102
|
-
predictor_record.data = {
|
|
89
|
+
predictor_record.data = {"training_log": "training"}
|
|
103
90
|
predictor_record.status = PREDICTOR_STATUS.TRAINING
|
|
104
91
|
db.session.commit()
|
|
105
92
|
|
|
106
|
-
model_storage.training_state_set(
|
|
107
|
-
|
|
108
|
-
)
|
|
109
|
-
predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(
|
|
110
|
-
predictor_record.code
|
|
111
|
-
)
|
|
93
|
+
model_storage.training_state_set(current_state_num=4, total_states=5, state_name="Training model")
|
|
94
|
+
predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code)
|
|
112
95
|
predictor.learn(df)
|
|
113
96
|
|
|
114
97
|
db.session.refresh(predictor_record)
|
|
115
98
|
|
|
116
|
-
fs = FileStorage(
|
|
117
|
-
resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True
|
|
118
|
-
)
|
|
99
|
+
fs = FileStorage(resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True)
|
|
119
100
|
predictor.save(fs.folder_path / fs.folder_name)
|
|
120
101
|
fs.push(compression_level=0)
|
|
121
102
|
|
|
@@ -124,9 +105,7 @@ def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
|
|
|
124
105
|
# getting training time for each tried model. it is possible to do
|
|
125
106
|
# after training only
|
|
126
107
|
fit_mixers = list(
|
|
127
|
-
predictor.runtime_log[x]
|
|
128
|
-
for x in predictor.runtime_log
|
|
129
|
-
if isinstance(x, tuple) and x[0] == "fit_mixer"
|
|
108
|
+
predictor.runtime_log[x] for x in predictor.runtime_log if isinstance(x, tuple) and x[0] == "fit_mixer"
|
|
130
109
|
)
|
|
131
110
|
submodel_data = predictor_record.data.get("submodel_data", [])
|
|
132
111
|
# add training time to other mixers info
|
|
@@ -135,43 +114,39 @@ def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
|
|
|
135
114
|
submodel_data[i]["training_time"] = tr_time
|
|
136
115
|
predictor_record.data["submodel_data"] = submodel_data
|
|
137
116
|
|
|
138
|
-
model_storage.training_state_set(
|
|
139
|
-
current_state_num=5, total_states=5, state_name='Complete'
|
|
140
|
-
)
|
|
117
|
+
model_storage.training_state_set(current_state_num=5, total_states=5, state_name="Complete")
|
|
141
118
|
predictor_record.dtype_dict = predictor.dtype_dict
|
|
142
119
|
db.session.commit()
|
|
143
120
|
except Exception as e:
|
|
144
121
|
db.session.refresh(predictor_record)
|
|
145
|
-
predictor_record.data = {
|
|
122
|
+
predictor_record.data = {"error": f"{traceback.format_exc()}\nMain error: {e}"}
|
|
146
123
|
db.session.commit()
|
|
147
124
|
raise e
|
|
148
125
|
|
|
149
126
|
|
|
150
|
-
@mark_process(name=
|
|
127
|
+
@mark_process(name="learn")
|
|
151
128
|
def run_learn_remote(df: DataFrame, predictor_id: int) -> None:
|
|
152
129
|
try:
|
|
153
130
|
serialized_df = json.dumps(df.to_dict())
|
|
154
131
|
predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
|
|
155
132
|
resp = requests.post(
|
|
156
|
-
predictor_record.data[
|
|
157
|
-
json={
|
|
133
|
+
predictor_record.data["train_url"],
|
|
134
|
+
json={"df": serialized_df, "target": predictor_record.to_predict[0]},
|
|
158
135
|
)
|
|
159
136
|
|
|
160
137
|
assert resp.status_code == 200
|
|
161
|
-
predictor_record.data[
|
|
138
|
+
predictor_record.data["status"] = "complete"
|
|
162
139
|
except Exception:
|
|
163
|
-
predictor_record.data[
|
|
164
|
-
predictor_record.data[
|
|
140
|
+
predictor_record.data["status"] = "error"
|
|
141
|
+
predictor_record.data["error"] = str(resp.text)
|
|
165
142
|
|
|
166
143
|
db.session.commit()
|
|
167
144
|
|
|
168
145
|
|
|
169
|
-
@mark_process(name=
|
|
146
|
+
@mark_process(name="learn")
|
|
170
147
|
def run_learn(df: DataFrame, args: dict, model_storage) -> None:
|
|
171
148
|
if df is None or df.shape[0] == 0:
|
|
172
|
-
raise Exception(
|
|
173
|
-
'No input data. Ensure the data source is healthy and try again.'
|
|
174
|
-
)
|
|
149
|
+
raise Exception("No input data. Ensure the data source is healthy and try again.")
|
|
175
150
|
|
|
176
151
|
predictor_id = model_storage.predictor_id
|
|
177
152
|
|
|
@@ -187,15 +162,13 @@ def run_learn(df: DataFrame, args: dict, model_storage) -> None:
|
|
|
187
162
|
db.session.commit()
|
|
188
163
|
|
|
189
164
|
|
|
190
|
-
@mark_process(name=
|
|
165
|
+
@mark_process(name="finetune")
|
|
191
166
|
def run_finetune(df: DataFrame, args: dict, model_storage):
|
|
192
167
|
try:
|
|
193
168
|
if df is None or df.shape[0] == 0:
|
|
194
|
-
raise Exception(
|
|
195
|
-
'No input data. Ensure the data source is healthy and try again.'
|
|
196
|
-
)
|
|
169
|
+
raise Exception("No input data. Ensure the data source is healthy and try again.")
|
|
197
170
|
|
|
198
|
-
base_predictor_id = args[
|
|
171
|
+
base_predictor_id = args["base_model_id"]
|
|
199
172
|
base_predictor_record = db.Predictor.query.get(base_predictor_id)
|
|
200
173
|
if base_predictor_record.status != PREDICTOR_STATUS.COMPLETE:
|
|
201
174
|
raise Exception("Base model must be in status 'complete'")
|
|
@@ -204,11 +177,9 @@ def run_finetune(df: DataFrame, args: dict, model_storage):
|
|
|
204
177
|
predictor_record = db.Predictor.query.get(predictor_id)
|
|
205
178
|
|
|
206
179
|
# TODO move this to ModelStorage (don't work with database directly)
|
|
207
|
-
predictor_record.data = {
|
|
180
|
+
predictor_record.data = {"training_log": "training"}
|
|
208
181
|
predictor_record.training_start_at = datetime.now()
|
|
209
|
-
predictor_record.status =
|
|
210
|
-
PREDICTOR_STATUS.FINETUNING
|
|
211
|
-
) # TODO: parallel execution block
|
|
182
|
+
predictor_record.status = PREDICTOR_STATUS.FINETUNING # TODO: parallel execution block
|
|
212
183
|
db.session.commit()
|
|
213
184
|
|
|
214
185
|
base_fs = FileStorage(
|
|
@@ -219,28 +190,23 @@ def run_finetune(df: DataFrame, args: dict, model_storage):
|
|
|
219
190
|
predictor = lightwood.predictor_from_state(
|
|
220
191
|
base_fs.folder_path / base_fs.folder_name, base_predictor_record.code
|
|
221
192
|
)
|
|
222
|
-
predictor.adjust(df, adjust_args=args.get(
|
|
193
|
+
predictor.adjust(df, adjust_args=args.get("using", {}))
|
|
223
194
|
|
|
224
|
-
fs = FileStorage(
|
|
225
|
-
resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True
|
|
226
|
-
)
|
|
195
|
+
fs = FileStorage(resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True)
|
|
227
196
|
predictor.save(fs.folder_path / fs.folder_name)
|
|
228
197
|
fs.push(compression_level=0)
|
|
229
198
|
|
|
230
|
-
predictor_record.data = (
|
|
231
|
-
predictor.model_analysis.to_dict()
|
|
232
|
-
) # todo: update accuracy in LW as post-finetune hook
|
|
199
|
+
predictor_record.data = predictor.model_analysis.to_dict() # todo: update accuracy in LW as post-finetune hook
|
|
233
200
|
predictor_record.code = base_predictor_record.code
|
|
234
|
-
predictor_record.update_status =
|
|
201
|
+
predictor_record.update_status = "up_to_date"
|
|
235
202
|
predictor_record.status = PREDICTOR_STATUS.COMPLETE
|
|
236
203
|
predictor_record.training_stop_at = datetime.now()
|
|
237
204
|
db.session.commit()
|
|
238
205
|
|
|
239
206
|
except Exception as e:
|
|
240
|
-
logger.error(
|
|
207
|
+
logger.error("Unexpected error during Lightwood model finetune:", exc_info=True)
|
|
241
208
|
predictor_id = model_storage.predictor_id
|
|
242
209
|
predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
|
|
243
|
-
logger.error(traceback.format_exc())
|
|
244
210
|
error_message = format_exception_error(e)
|
|
245
211
|
predictor_record.data = {"error": error_message}
|
|
246
212
|
predictor_record.status = PREDICTOR_STATUS.ERROR
|
|
@@ -114,7 +114,7 @@ class OpenAIHandler(BaseMLEngine):
|
|
|
114
114
|
except NotFoundError:
|
|
115
115
|
pass
|
|
116
116
|
except AuthenticationError as e:
|
|
117
|
-
if e.body
|
|
117
|
+
if isinstance(e.body, dict) and e.body.get("code") == "invalid_api_key":
|
|
118
118
|
raise Exception("Invalid api key")
|
|
119
119
|
raise Exception(f"Something went wrong: {e}")
|
|
120
120
|
|
|
@@ -17,7 +17,9 @@ from mindsdb_sql_parser.ast import (
|
|
|
17
17
|
Delete,
|
|
18
18
|
Update,
|
|
19
19
|
Function,
|
|
20
|
+
DropTables,
|
|
20
21
|
)
|
|
22
|
+
from mindsdb_sql_parser.ast.base import ASTNode
|
|
21
23
|
from pgvector.psycopg import register_vector
|
|
22
24
|
|
|
23
25
|
from mindsdb.integrations.handlers.postgres_handler.postgres_handler import (
|
|
@@ -116,9 +118,22 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
|
|
|
116
118
|
return Response(RESPONSE_TYPE.OK)
|
|
117
119
|
return super().get_tables()
|
|
118
120
|
|
|
119
|
-
def
|
|
121
|
+
def query(self, query: ASTNode) -> Response:
|
|
122
|
+
# Option to drop table of shared pgvector connection
|
|
123
|
+
if isinstance(query, DropTables):
|
|
124
|
+
query.tables = [self._check_table(table.parts[-1]) for table in query.tables]
|
|
125
|
+
query_str, params = self.renderer.get_exec_params(query, with_failback=True)
|
|
126
|
+
return self.native_query(query_str, params, no_restrict=True)
|
|
127
|
+
return super().query(query)
|
|
128
|
+
|
|
129
|
+
def native_query(self, query, params=None, no_restrict=False) -> Response:
|
|
130
|
+
"""
|
|
131
|
+
Altered `native_query` method of postgres handler.
|
|
132
|
+
Restrict usage of native query from executor with shared pg vector connection
|
|
133
|
+
Exceptions: if it is used by pgvector itself (with no_restrict = True)
|
|
134
|
+
"""
|
|
120
135
|
# Prevent execute native queries
|
|
121
|
-
if self._is_shared_db:
|
|
136
|
+
if self._is_shared_db and not no_restrict:
|
|
122
137
|
return Response(RESPONSE_TYPE.OK)
|
|
123
138
|
return super().native_query(query, params=params)
|
|
124
139
|
|
|
@@ -550,6 +565,9 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
|
|
|
550
565
|
|
|
551
566
|
def create_table(self, table_name: str):
|
|
552
567
|
"""Create a table with a vector column."""
|
|
568
|
+
|
|
569
|
+
table_name = self._check_table(table_name)
|
|
570
|
+
|
|
553
571
|
with self.connection.cursor() as cur:
|
|
554
572
|
# For sparse vectors, use sparsevec type
|
|
555
573
|
vector_column_type = "sparsevec" if self._is_sparse else "vector"
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import time
|
|
2
2
|
import json
|
|
3
|
+
import logging
|
|
3
4
|
from typing import Optional, Any
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
@@ -279,7 +280,7 @@ class PostgresHandler(MetaDatabaseHandler):
|
|
|
279
280
|
df.columns = columns
|
|
280
281
|
|
|
281
282
|
@profiler.profile()
|
|
282
|
-
def native_query(self, query: str, params=None) -> Response:
|
|
283
|
+
def native_query(self, query: str, params=None, **kwargs) -> Response:
|
|
283
284
|
"""
|
|
284
285
|
Executes a SQL query on the PostgreSQL database and returns the result.
|
|
285
286
|
|
|
@@ -304,8 +305,19 @@ class PostgresHandler(MetaDatabaseHandler):
|
|
|
304
305
|
result = cur.fetchall()
|
|
305
306
|
response = _make_table_response(result, cur)
|
|
306
307
|
connection.commit()
|
|
308
|
+
except (psycopg.ProgrammingError, psycopg.DataError) as e:
|
|
309
|
+
# These is 'expected' exceptions, they should not be treated as mindsdb's errors
|
|
310
|
+
# ProgrammingError: table not found or already exists, syntax error, etc
|
|
311
|
+
# DataError: division by zero, numeric value out of range, etc.
|
|
312
|
+
# https://www.psycopg.org/psycopg3/docs/api/errors.html
|
|
313
|
+
log_message = "Database query failed with error, likely due to invalid SQL query"
|
|
314
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
315
|
+
log_message += f". Executed query:\n{query}"
|
|
316
|
+
logger.info(log_message)
|
|
317
|
+
response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e), is_acceptable_error=True)
|
|
318
|
+
connection.rollback()
|
|
307
319
|
except Exception as e:
|
|
308
|
-
logger.error(f"Error running query
|
|
320
|
+
logger.error(f"Error running query:\n{query}\non {self.database}, {e}")
|
|
309
321
|
response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
|
|
310
322
|
connection.rollback()
|
|
311
323
|
|
|
@@ -466,7 +478,10 @@ class PostgresHandler(MetaDatabaseHandler):
|
|
|
466
478
|
AND
|
|
467
479
|
table_schema = {schema_name}
|
|
468
480
|
"""
|
|
469
|
-
|
|
481
|
+
# If it is used by pgvector handler - `native_query` method of pgvector handler will be used
|
|
482
|
+
# in that case if shared pgvector db is used - `native_query` will be skipped (return empty result)
|
|
483
|
+
# `no_restrict` flag allows to execute native query, and it will call `native_query` of postgres handler
|
|
484
|
+
result = self.native_query(query, no_restrict=True)
|
|
470
485
|
result.to_columns_table_response(map_type_fn=_map_type)
|
|
471
486
|
return result
|
|
472
487
|
|
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
import shopify
|
|
2
2
|
import requests
|
|
3
3
|
|
|
4
|
-
from mindsdb.integrations.handlers.shopify_handler.shopify_tables import
|
|
4
|
+
from mindsdb.integrations.handlers.shopify_handler.shopify_tables import (
|
|
5
|
+
ProductsTable,
|
|
6
|
+
CustomersTable,
|
|
7
|
+
OrdersTable,
|
|
8
|
+
InventoryLevelTable,
|
|
9
|
+
LocationTable,
|
|
10
|
+
CustomerReviews,
|
|
11
|
+
CarrierServiceTable,
|
|
12
|
+
ShippingZoneTable,
|
|
13
|
+
SalesChannelTable,
|
|
14
|
+
)
|
|
5
15
|
from mindsdb.integrations.libs.api_handler import APIHandler
|
|
6
16
|
from mindsdb.integrations.libs.response import (
|
|
7
17
|
HandlerStatusResponse as StatusResponse,
|
|
@@ -9,7 +19,11 @@ from mindsdb.integrations.libs.response import (
|
|
|
9
19
|
|
|
10
20
|
from mindsdb.utilities import log
|
|
11
21
|
from mindsdb_sql_parser import parse_sql
|
|
12
|
-
from mindsdb.integrations.libs.api_handler_exceptions import
|
|
22
|
+
from mindsdb.integrations.libs.api_handler_exceptions import (
|
|
23
|
+
InvalidNativeQuery,
|
|
24
|
+
ConnectionFailed,
|
|
25
|
+
MissingConnectionParams,
|
|
26
|
+
)
|
|
13
27
|
|
|
14
28
|
logger = log.getLogger(__name__)
|
|
15
29
|
|
|
@@ -19,7 +33,7 @@ class ShopifyHandler(APIHandler):
|
|
|
19
33
|
The Shopify handler implementation.
|
|
20
34
|
"""
|
|
21
35
|
|
|
22
|
-
name =
|
|
36
|
+
name = "shopify"
|
|
23
37
|
|
|
24
38
|
def __init__(self, name: str, **kwargs):
|
|
25
39
|
"""
|
|
@@ -81,10 +95,12 @@ class ShopifyHandler(APIHandler):
|
|
|
81
95
|
if self.kwargs.get("connection_data") is None:
|
|
82
96
|
raise MissingConnectionParams("Incomplete parameters passed to Shopify Handler")
|
|
83
97
|
|
|
84
|
-
api_session = shopify.Session(self.connection_data[
|
|
98
|
+
api_session = shopify.Session(self.connection_data["shop_url"], "2021-10", self.connection_data["access_token"])
|
|
85
99
|
|
|
86
|
-
self.yotpo_app_key = self.connection_data[
|
|
87
|
-
self.yotpo_access_token =
|
|
100
|
+
self.yotpo_app_key = self.connection_data["yotpo_app_key"] if "yotpo_app_key" in self.connection_data else None
|
|
101
|
+
self.yotpo_access_token = (
|
|
102
|
+
self.connection_data["yotpo_access_token"] if "yotpo_access_token" in self.connection_data else None
|
|
103
|
+
)
|
|
88
104
|
|
|
89
105
|
self.connection = api_session
|
|
90
106
|
|
|
@@ -107,16 +123,13 @@ class ShopifyHandler(APIHandler):
|
|
|
107
123
|
shopify.Shop.current()
|
|
108
124
|
response.success = True
|
|
109
125
|
except Exception as e:
|
|
110
|
-
logger.error(
|
|
111
|
-
raise ConnectionFailed("Conenction to Shopify failed.")
|
|
126
|
+
logger.error("Error connecting to Shopify!")
|
|
112
127
|
response.error_message = str(e)
|
|
128
|
+
raise ConnectionFailed("Conenction to Shopify failed.")
|
|
113
129
|
|
|
114
130
|
if self.yotpo_app_key is not None and self.yotpo_access_token is not None:
|
|
115
131
|
url = f"https://api.yotpo.com/v1/apps/{self.yotpo_app_key}/reviews?count=1&utoken={self.yotpo_access_token}"
|
|
116
|
-
headers = {
|
|
117
|
-
"accept": "application/json",
|
|
118
|
-
"Content-Type": "application/json"
|
|
119
|
-
}
|
|
132
|
+
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
|
120
133
|
if requests.get(url, headers=headers).status_code == 200:
|
|
121
134
|
response.success = True
|
|
122
135
|
else:
|
|
@@ -204,10 +204,11 @@ class SnowflakeHandler(MetaDatabaseHandler):
|
|
|
204
204
|
"user": self.connection_data.get("user"),
|
|
205
205
|
"password": self.connection_data.get("password"),
|
|
206
206
|
"database": self.connection_data.get("database"),
|
|
207
|
+
"schema": self.connection_data.get("schema", "PUBLIC"),
|
|
207
208
|
}
|
|
208
209
|
|
|
209
210
|
# Optional connection parameters
|
|
210
|
-
optional_params = ["
|
|
211
|
+
optional_params = ["warehouse", "role"]
|
|
211
212
|
for param in optional_params:
|
|
212
213
|
if param in self.connection_data:
|
|
213
214
|
config[param] = self.connection_data[param]
|
|
@@ -170,9 +170,9 @@ def get_all_website_links(url, headers: dict = None) -> dict:
|
|
|
170
170
|
href = href.rstrip("/")
|
|
171
171
|
urls.add(href)
|
|
172
172
|
|
|
173
|
-
except Exception
|
|
173
|
+
except Exception:
|
|
174
174
|
error_message = traceback.format_exc().splitlines()[-1]
|
|
175
|
-
logger.
|
|
175
|
+
logger.exception("An exception occurred:")
|
|
176
176
|
return {
|
|
177
177
|
"url": url,
|
|
178
178
|
"urls": urls,
|
|
@@ -238,9 +238,9 @@ def get_all_website_links_recursively(
|
|
|
238
238
|
if url not in reviewed_urls and matches_filter:
|
|
239
239
|
try:
|
|
240
240
|
reviewed_urls[url] = get_all_website_links(url, headers=headers)
|
|
241
|
-
except Exception
|
|
241
|
+
except Exception:
|
|
242
242
|
error_message = traceback.format_exc().splitlines()[-1]
|
|
243
|
-
logger.
|
|
243
|
+
logger.exception("An exception occurred:")
|
|
244
244
|
reviewed_urls[url] = {
|
|
245
245
|
"url": url,
|
|
246
246
|
"urls": [],
|
|
@@ -550,8 +550,8 @@ class MetaAPIHandler(APIHandler):
|
|
|
550
550
|
if hasattr(table_class, "meta_get_tables"):
|
|
551
551
|
table_metadata = table_class.meta_get_tables(table_name, **kwargs)
|
|
552
552
|
df = pd.concat([df, pd.DataFrame([table_metadata])], ignore_index=True)
|
|
553
|
-
except Exception
|
|
554
|
-
logger.
|
|
553
|
+
except Exception:
|
|
554
|
+
logger.exception(f"Error retrieving metadata for table {table_name}:")
|
|
555
555
|
|
|
556
556
|
return Response(RESPONSE_TYPE.TABLE, df)
|
|
557
557
|
|
|
@@ -572,8 +572,8 @@ class MetaAPIHandler(APIHandler):
|
|
|
572
572
|
if hasattr(table_class, "meta_get_columns"):
|
|
573
573
|
column_metadata = table_class.meta_get_columns(table_name, **kwargs)
|
|
574
574
|
df = pd.concat([df, pd.DataFrame(column_metadata)], ignore_index=True)
|
|
575
|
-
except Exception
|
|
576
|
-
logger.
|
|
575
|
+
except Exception:
|
|
576
|
+
logger.exception(f"Error retrieving column metadata for table {table_name}:")
|
|
577
577
|
|
|
578
578
|
return Response(RESPONSE_TYPE.TABLE, df)
|
|
579
579
|
|
|
@@ -594,8 +594,8 @@ class MetaAPIHandler(APIHandler):
|
|
|
594
594
|
if hasattr(table_class, "meta_get_column_statistics"):
|
|
595
595
|
column_statistics = table_class.meta_get_column_statistics(table_name, **kwargs)
|
|
596
596
|
df = pd.concat([df, pd.DataFrame(column_statistics)], ignore_index=True)
|
|
597
|
-
except Exception
|
|
598
|
-
logger.
|
|
597
|
+
except Exception:
|
|
598
|
+
logger.exception(f"Error retrieving column statistics for table {table_name}:")
|
|
599
599
|
|
|
600
600
|
return Response(RESPONSE_TYPE.TABLE, df)
|
|
601
601
|
|
|
@@ -616,8 +616,8 @@ class MetaAPIHandler(APIHandler):
|
|
|
616
616
|
if hasattr(table_class, "meta_get_primary_keys"):
|
|
617
617
|
primary_key_metadata = table_class.meta_get_primary_keys(table_name, **kwargs)
|
|
618
618
|
df = pd.concat([df, pd.DataFrame(primary_key_metadata)], ignore_index=True)
|
|
619
|
-
except Exception
|
|
620
|
-
logger.
|
|
619
|
+
except Exception:
|
|
620
|
+
logger.exception(f"Error retrieving primary keys for table {table_name}:")
|
|
621
621
|
|
|
622
622
|
return Response(RESPONSE_TYPE.TABLE, df)
|
|
623
623
|
|
|
@@ -641,8 +641,8 @@ class MetaAPIHandler(APIHandler):
|
|
|
641
641
|
table_name, all_tables=table_names if table_names else all_tables, **kwargs
|
|
642
642
|
)
|
|
643
643
|
df = pd.concat([df, pd.DataFrame(foreign_key_metadata)], ignore_index=True)
|
|
644
|
-
except Exception
|
|
645
|
-
logger.
|
|
644
|
+
except Exception:
|
|
645
|
+
logger.exception(f"Error retrieving foreign keys for table {table_name}:")
|
|
646
646
|
|
|
647
647
|
return Response(RESPONSE_TYPE.TABLE, df)
|
|
648
648
|
|