MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +40 -29
- mindsdb/api/a2a/__init__.py +1 -1
- mindsdb/api/a2a/agent.py +16 -10
- mindsdb/api/a2a/common/server/server.py +7 -3
- mindsdb/api/a2a/common/server/task_manager.py +12 -5
- mindsdb/api/a2a/common/types.py +66 -0
- mindsdb/api/a2a/task_manager.py +65 -17
- mindsdb/api/common/middleware.py +10 -12
- mindsdb/api/executor/command_executor.py +51 -40
- mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
- mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
- mindsdb/api/executor/exceptions.py +29 -10
- mindsdb/api/executor/planner/plan_join.py +17 -3
- mindsdb/api/executor/planner/query_prepare.py +2 -20
- mindsdb/api/executor/sql_query/sql_query.py +74 -74
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
- mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
- mindsdb/api/executor/utilities/functions.py +6 -6
- mindsdb/api/executor/utilities/sql.py +37 -20
- mindsdb/api/http/gui.py +5 -11
- mindsdb/api/http/initialize.py +75 -61
- mindsdb/api/http/namespaces/agents.py +10 -15
- mindsdb/api/http/namespaces/analysis.py +13 -20
- mindsdb/api/http/namespaces/auth.py +1 -1
- mindsdb/api/http/namespaces/chatbots.py +0 -5
- mindsdb/api/http/namespaces/config.py +15 -11
- mindsdb/api/http/namespaces/databases.py +140 -201
- mindsdb/api/http/namespaces/file.py +17 -4
- mindsdb/api/http/namespaces/handlers.py +17 -7
- mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
- mindsdb/api/http/namespaces/models.py +94 -126
- mindsdb/api/http/namespaces/projects.py +13 -22
- mindsdb/api/http/namespaces/sql.py +33 -25
- mindsdb/api/http/namespaces/tab.py +27 -37
- mindsdb/api/http/namespaces/views.py +1 -1
- mindsdb/api/http/start.py +16 -10
- mindsdb/api/mcp/__init__.py +2 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
- mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
- mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
- mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
- mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
- mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
- mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
- mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
- mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
- mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
- mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
- mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
- mindsdb/integrations/handlers/shopify_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +80 -13
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
- mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
- mindsdb/integrations/libs/api_handler.py +10 -10
- mindsdb/integrations/libs/base.py +4 -4
- mindsdb/integrations/libs/llm/utils.py +2 -2
- mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
- mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
- mindsdb/integrations/libs/process_cache.py +132 -140
- mindsdb/integrations/libs/response.py +18 -12
- mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
- mindsdb/integrations/utilities/files/file_reader.py +6 -7
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
- mindsdb/integrations/utilities/rag/config_loader.py +37 -26
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
- mindsdb/integrations/utilities/rag/settings.py +58 -133
- mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
- mindsdb/interfaces/agents/agents_controller.py +2 -3
- mindsdb/interfaces/agents/constants.py +0 -2
- mindsdb/interfaces/agents/litellm_server.py +34 -58
- mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
- mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
- mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
- mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
- mindsdb/interfaces/chatbot/polling.py +30 -18
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
- mindsdb/interfaces/database/data_handlers_cache.py +190 -0
- mindsdb/interfaces/database/database.py +3 -3
- mindsdb/interfaces/database/integrations.py +7 -110
- mindsdb/interfaces/database/projects.py +2 -6
- mindsdb/interfaces/database/views.py +1 -4
- mindsdb/interfaces/file/file_controller.py +6 -6
- mindsdb/interfaces/functions/controller.py +1 -1
- mindsdb/interfaces/functions/to_markdown.py +2 -2
- mindsdb/interfaces/jobs/jobs_controller.py +5 -9
- mindsdb/interfaces/jobs/scheduler.py +3 -9
- mindsdb/interfaces/knowledge_base/controller.py +244 -128
- mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
- mindsdb/interfaces/knowledge_base/executor.py +11 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
- mindsdb/interfaces/model/model_controller.py +172 -168
- mindsdb/interfaces/query_context/context_controller.py +14 -2
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
- mindsdb/interfaces/skills/retrieval_tool.py +43 -50
- mindsdb/interfaces/skills/skill_tool.py +2 -2
- mindsdb/interfaces/skills/skills_controller.py +1 -4
- mindsdb/interfaces/skills/sql_agent.py +25 -19
- mindsdb/interfaces/storage/db.py +16 -6
- mindsdb/interfaces/storage/fs.py +114 -169
- mindsdb/interfaces/storage/json.py +19 -18
- mindsdb/interfaces/tabs/tabs_controller.py +49 -72
- mindsdb/interfaces/tasks/task_monitor.py +3 -9
- mindsdb/interfaces/tasks/task_thread.py +7 -9
- mindsdb/interfaces/triggers/trigger_task.py +7 -13
- mindsdb/interfaces/triggers/triggers_controller.py +47 -52
- mindsdb/migrations/migrate.py +16 -16
- mindsdb/utilities/api_status.py +58 -0
- mindsdb/utilities/config.py +68 -2
- mindsdb/utilities/exception.py +40 -1
- mindsdb/utilities/fs.py +0 -1
- mindsdb/utilities/hooks/profiling.py +17 -14
- mindsdb/utilities/json_encoder.py +24 -10
- mindsdb/utilities/langfuse.py +40 -45
- mindsdb/utilities/log.py +272 -0
- mindsdb/utilities/ml_task_queue/consumer.py +52 -58
- mindsdb/utilities/ml_task_queue/producer.py +26 -30
- mindsdb/utilities/render/sqlalchemy_render.py +22 -20
- mindsdb/utilities/starters.py +0 -10
- mindsdb/utilities/utils.py +2 -2
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/METADATA +286 -267
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/RECORD +145 -159
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
- mindsdb/api/postgres/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
- mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
- mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
- mindsdb/api/postgres/start.py +0 -11
- mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
- mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/top_level.txt +0 -0
|
@@ -27,51 +27,40 @@ logger = log.getLogger(__name__)
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def create_learn_mark():
|
|
30
|
-
if os.name ==
|
|
31
|
-
p = Path(tempfile.gettempdir()).joinpath(
|
|
30
|
+
if os.name == "posix":
|
|
31
|
+
p = Path(tempfile.gettempdir()).joinpath("mindsdb/learn_processes/")
|
|
32
32
|
p.mkdir(parents=True, exist_ok=True)
|
|
33
|
-
p.joinpath(f
|
|
33
|
+
p.joinpath(f"{os.getpid()}").touch()
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def delete_learn_mark():
|
|
37
|
-
if os.name ==
|
|
38
|
-
p = (
|
|
39
|
-
Path(tempfile.gettempdir())
|
|
40
|
-
.joinpath('mindsdb/learn_processes/')
|
|
41
|
-
.joinpath(f'{os.getpid()}')
|
|
42
|
-
)
|
|
37
|
+
if os.name == "posix":
|
|
38
|
+
p = Path(tempfile.gettempdir()).joinpath("mindsdb/learn_processes/").joinpath(f"{os.getpid()}")
|
|
43
39
|
if p.exists():
|
|
44
40
|
p.unlink()
|
|
45
41
|
|
|
46
42
|
|
|
47
|
-
@mark_process(name=
|
|
43
|
+
@mark_process(name="learn")
|
|
48
44
|
@profiler.profile()
|
|
49
45
|
def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = None):
|
|
46
|
+
model_storage.training_state_set(current_state_num=1, total_states=5, state_name="Generating problem definition")
|
|
47
|
+
json_ai_override = args.pop("using", {})
|
|
50
48
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
)
|
|
54
|
-
json_ai_override = args.pop('using', {})
|
|
55
|
-
|
|
56
|
-
if 'dtype_dict' in json_ai_override:
|
|
57
|
-
args['dtype_dict'] = json_ai_override.pop('dtype_dict')
|
|
49
|
+
if "dtype_dict" in json_ai_override:
|
|
50
|
+
args["dtype_dict"] = json_ai_override.pop("dtype_dict")
|
|
58
51
|
|
|
59
|
-
if
|
|
60
|
-
args = {**args, **json_ai_override[
|
|
52
|
+
if "problem_definition" in json_ai_override:
|
|
53
|
+
args = {**args, **json_ai_override["problem_definition"]}
|
|
61
54
|
|
|
62
|
-
if
|
|
63
|
-
for tss_key in [
|
|
64
|
-
|
|
65
|
-
]:
|
|
66
|
-
k = f'timeseries_settings.{tss_key}'
|
|
55
|
+
if "timeseries_settings" in args:
|
|
56
|
+
for tss_key in [f.name for f in dataclasses.fields(lightwood.api.TimeseriesSettings)]:
|
|
57
|
+
k = f"timeseries_settings.{tss_key}"
|
|
67
58
|
if k in json_ai_override:
|
|
68
|
-
args[
|
|
59
|
+
args["timeseries_settings"][tss_key] = json_ai_override.pop(k)
|
|
69
60
|
|
|
70
61
|
problem_definition = lightwood.ProblemDefinition.from_dict(args)
|
|
71
62
|
|
|
72
|
-
model_storage.training_state_set(
|
|
73
|
-
current_state_num=2, total_states=5, state_name='Generating JsonAI'
|
|
74
|
-
)
|
|
63
|
+
model_storage.training_state_set(current_state_num=2, total_states=5, state_name="Generating JsonAI")
|
|
75
64
|
json_ai = lightwood.json_ai_from_problem(df, problem_definition)
|
|
76
65
|
json_ai = json_ai.to_dict()
|
|
77
66
|
unpack_jsonai_old_args(json_ai_override)
|
|
@@ -79,9 +68,7 @@ def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = N
|
|
|
79
68
|
rep_recur(json_ai, json_ai_override)
|
|
80
69
|
json_ai = JsonAI.from_dict(json_ai)
|
|
81
70
|
|
|
82
|
-
model_storage.training_state_set(
|
|
83
|
-
current_state_num=3, total_states=5, state_name='Generating code'
|
|
84
|
-
)
|
|
71
|
+
model_storage.training_state_set(current_state_num=3, total_states=5, state_name="Generating code")
|
|
85
72
|
code = lightwood.code_from_json_ai(json_ai)
|
|
86
73
|
|
|
87
74
|
predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
|
|
@@ -89,33 +76,27 @@ def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = N
|
|
|
89
76
|
db.session.commit()
|
|
90
77
|
|
|
91
78
|
json_storage = get_json_storage(resource_id=predictor_id)
|
|
92
|
-
json_storage.set(
|
|
79
|
+
json_storage.set("json_ai", json_ai.to_dict())
|
|
93
80
|
|
|
94
81
|
|
|
95
|
-
@mark_process(name=
|
|
82
|
+
@mark_process(name="learn")
|
|
96
83
|
@profiler.profile()
|
|
97
84
|
def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
|
|
98
85
|
try:
|
|
99
86
|
predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
|
|
100
87
|
assert predictor_record is not None
|
|
101
88
|
|
|
102
|
-
predictor_record.data = {
|
|
89
|
+
predictor_record.data = {"training_log": "training"}
|
|
103
90
|
predictor_record.status = PREDICTOR_STATUS.TRAINING
|
|
104
91
|
db.session.commit()
|
|
105
92
|
|
|
106
|
-
model_storage.training_state_set(
|
|
107
|
-
|
|
108
|
-
)
|
|
109
|
-
predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(
|
|
110
|
-
predictor_record.code
|
|
111
|
-
)
|
|
93
|
+
model_storage.training_state_set(current_state_num=4, total_states=5, state_name="Training model")
|
|
94
|
+
predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code)
|
|
112
95
|
predictor.learn(df)
|
|
113
96
|
|
|
114
97
|
db.session.refresh(predictor_record)
|
|
115
98
|
|
|
116
|
-
fs = FileStorage(
|
|
117
|
-
resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True
|
|
118
|
-
)
|
|
99
|
+
fs = FileStorage(resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True)
|
|
119
100
|
predictor.save(fs.folder_path / fs.folder_name)
|
|
120
101
|
fs.push(compression_level=0)
|
|
121
102
|
|
|
@@ -124,9 +105,7 @@ def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
|
|
|
124
105
|
# getting training time for each tried model. it is possible to do
|
|
125
106
|
# after training only
|
|
126
107
|
fit_mixers = list(
|
|
127
|
-
predictor.runtime_log[x]
|
|
128
|
-
for x in predictor.runtime_log
|
|
129
|
-
if isinstance(x, tuple) and x[0] == "fit_mixer"
|
|
108
|
+
predictor.runtime_log[x] for x in predictor.runtime_log if isinstance(x, tuple) and x[0] == "fit_mixer"
|
|
130
109
|
)
|
|
131
110
|
submodel_data = predictor_record.data.get("submodel_data", [])
|
|
132
111
|
# add training time to other mixers info
|
|
@@ -135,43 +114,39 @@ def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
|
|
|
135
114
|
submodel_data[i]["training_time"] = tr_time
|
|
136
115
|
predictor_record.data["submodel_data"] = submodel_data
|
|
137
116
|
|
|
138
|
-
model_storage.training_state_set(
|
|
139
|
-
current_state_num=5, total_states=5, state_name='Complete'
|
|
140
|
-
)
|
|
117
|
+
model_storage.training_state_set(current_state_num=5, total_states=5, state_name="Complete")
|
|
141
118
|
predictor_record.dtype_dict = predictor.dtype_dict
|
|
142
119
|
db.session.commit()
|
|
143
120
|
except Exception as e:
|
|
144
121
|
db.session.refresh(predictor_record)
|
|
145
|
-
predictor_record.data = {
|
|
122
|
+
predictor_record.data = {"error": f"{traceback.format_exc()}\nMain error: {e}"}
|
|
146
123
|
db.session.commit()
|
|
147
124
|
raise e
|
|
148
125
|
|
|
149
126
|
|
|
150
|
-
@mark_process(name=
|
|
127
|
+
@mark_process(name="learn")
|
|
151
128
|
def run_learn_remote(df: DataFrame, predictor_id: int) -> None:
|
|
152
129
|
try:
|
|
153
130
|
serialized_df = json.dumps(df.to_dict())
|
|
154
131
|
predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
|
|
155
132
|
resp = requests.post(
|
|
156
|
-
predictor_record.data[
|
|
157
|
-
json={
|
|
133
|
+
predictor_record.data["train_url"],
|
|
134
|
+
json={"df": serialized_df, "target": predictor_record.to_predict[0]},
|
|
158
135
|
)
|
|
159
136
|
|
|
160
137
|
assert resp.status_code == 200
|
|
161
|
-
predictor_record.data[
|
|
138
|
+
predictor_record.data["status"] = "complete"
|
|
162
139
|
except Exception:
|
|
163
|
-
predictor_record.data[
|
|
164
|
-
predictor_record.data[
|
|
140
|
+
predictor_record.data["status"] = "error"
|
|
141
|
+
predictor_record.data["error"] = str(resp.text)
|
|
165
142
|
|
|
166
143
|
db.session.commit()
|
|
167
144
|
|
|
168
145
|
|
|
169
|
-
@mark_process(name=
|
|
146
|
+
@mark_process(name="learn")
|
|
170
147
|
def run_learn(df: DataFrame, args: dict, model_storage) -> None:
|
|
171
148
|
if df is None or df.shape[0] == 0:
|
|
172
|
-
raise Exception(
|
|
173
|
-
'No input data. Ensure the data source is healthy and try again.'
|
|
174
|
-
)
|
|
149
|
+
raise Exception("No input data. Ensure the data source is healthy and try again.")
|
|
175
150
|
|
|
176
151
|
predictor_id = model_storage.predictor_id
|
|
177
152
|
|
|
@@ -187,15 +162,13 @@ def run_learn(df: DataFrame, args: dict, model_storage) -> None:
|
|
|
187
162
|
db.session.commit()
|
|
188
163
|
|
|
189
164
|
|
|
190
|
-
@mark_process(name=
|
|
165
|
+
@mark_process(name="finetune")
|
|
191
166
|
def run_finetune(df: DataFrame, args: dict, model_storage):
|
|
192
167
|
try:
|
|
193
168
|
if df is None or df.shape[0] == 0:
|
|
194
|
-
raise Exception(
|
|
195
|
-
'No input data. Ensure the data source is healthy and try again.'
|
|
196
|
-
)
|
|
169
|
+
raise Exception("No input data. Ensure the data source is healthy and try again.")
|
|
197
170
|
|
|
198
|
-
base_predictor_id = args[
|
|
171
|
+
base_predictor_id = args["base_model_id"]
|
|
199
172
|
base_predictor_record = db.Predictor.query.get(base_predictor_id)
|
|
200
173
|
if base_predictor_record.status != PREDICTOR_STATUS.COMPLETE:
|
|
201
174
|
raise Exception("Base model must be in status 'complete'")
|
|
@@ -204,11 +177,9 @@ def run_finetune(df: DataFrame, args: dict, model_storage):
|
|
|
204
177
|
predictor_record = db.Predictor.query.get(predictor_id)
|
|
205
178
|
|
|
206
179
|
# TODO move this to ModelStorage (don't work with database directly)
|
|
207
|
-
predictor_record.data = {
|
|
180
|
+
predictor_record.data = {"training_log": "training"}
|
|
208
181
|
predictor_record.training_start_at = datetime.now()
|
|
209
|
-
predictor_record.status =
|
|
210
|
-
PREDICTOR_STATUS.FINETUNING
|
|
211
|
-
) # TODO: parallel execution block
|
|
182
|
+
predictor_record.status = PREDICTOR_STATUS.FINETUNING # TODO: parallel execution block
|
|
212
183
|
db.session.commit()
|
|
213
184
|
|
|
214
185
|
base_fs = FileStorage(
|
|
@@ -219,28 +190,23 @@ def run_finetune(df: DataFrame, args: dict, model_storage):
|
|
|
219
190
|
predictor = lightwood.predictor_from_state(
|
|
220
191
|
base_fs.folder_path / base_fs.folder_name, base_predictor_record.code
|
|
221
192
|
)
|
|
222
|
-
predictor.adjust(df, adjust_args=args.get(
|
|
193
|
+
predictor.adjust(df, adjust_args=args.get("using", {}))
|
|
223
194
|
|
|
224
|
-
fs = FileStorage(
|
|
225
|
-
resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True
|
|
226
|
-
)
|
|
195
|
+
fs = FileStorage(resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True)
|
|
227
196
|
predictor.save(fs.folder_path / fs.folder_name)
|
|
228
197
|
fs.push(compression_level=0)
|
|
229
198
|
|
|
230
|
-
predictor_record.data = (
|
|
231
|
-
predictor.model_analysis.to_dict()
|
|
232
|
-
) # todo: update accuracy in LW as post-finetune hook
|
|
199
|
+
predictor_record.data = predictor.model_analysis.to_dict() # todo: update accuracy in LW as post-finetune hook
|
|
233
200
|
predictor_record.code = base_predictor_record.code
|
|
234
|
-
predictor_record.update_status =
|
|
201
|
+
predictor_record.update_status = "up_to_date"
|
|
235
202
|
predictor_record.status = PREDICTOR_STATUS.COMPLETE
|
|
236
203
|
predictor_record.training_stop_at = datetime.now()
|
|
237
204
|
db.session.commit()
|
|
238
205
|
|
|
239
206
|
except Exception as e:
|
|
240
|
-
logger.error(
|
|
207
|
+
logger.error("Unexpected error during Lightwood model finetune:", exc_info=True)
|
|
241
208
|
predictor_id = model_storage.predictor_id
|
|
242
209
|
predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
|
|
243
|
-
logger.error(traceback.format_exc())
|
|
244
210
|
error_message = format_exception_error(e)
|
|
245
211
|
predictor_record.data = {"error": error_message}
|
|
246
212
|
predictor_record.status = PREDICTOR_STATUS.ERROR
|