MindsDB 25.9.1.2__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (120) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +39 -20
  3. mindsdb/api/a2a/agent.py +7 -9
  4. mindsdb/api/a2a/common/server/server.py +3 -3
  5. mindsdb/api/a2a/common/server/task_manager.py +4 -4
  6. mindsdb/api/a2a/task_manager.py +15 -17
  7. mindsdb/api/common/middleware.py +9 -11
  8. mindsdb/api/executor/command_executor.py +2 -4
  9. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  10. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
  11. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  12. mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
  13. mindsdb/api/executor/exceptions.py +29 -10
  14. mindsdb/api/executor/planner/plan_join.py +17 -3
  15. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  16. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  17. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  18. mindsdb/api/executor/utilities/functions.py +6 -6
  19. mindsdb/api/executor/utilities/sql.py +32 -16
  20. mindsdb/api/http/gui.py +5 -11
  21. mindsdb/api/http/initialize.py +8 -10
  22. mindsdb/api/http/namespaces/agents.py +10 -12
  23. mindsdb/api/http/namespaces/analysis.py +13 -20
  24. mindsdb/api/http/namespaces/auth.py +1 -1
  25. mindsdb/api/http/namespaces/config.py +15 -11
  26. mindsdb/api/http/namespaces/databases.py +140 -201
  27. mindsdb/api/http/namespaces/file.py +15 -4
  28. mindsdb/api/http/namespaces/handlers.py +7 -2
  29. mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
  30. mindsdb/api/http/namespaces/models.py +94 -126
  31. mindsdb/api/http/namespaces/projects.py +13 -22
  32. mindsdb/api/http/namespaces/sql.py +33 -25
  33. mindsdb/api/http/namespaces/tab.py +27 -37
  34. mindsdb/api/http/namespaces/views.py +1 -1
  35. mindsdb/api/http/start.py +14 -8
  36. mindsdb/api/mcp/__init__.py +2 -1
  37. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  38. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  39. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  40. mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
  41. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
  42. mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
  43. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +11 -5
  44. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  45. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  46. mindsdb/integrations/handlers/openai_handler/openai_handler.py +1 -1
  47. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +20 -2
  48. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +18 -3
  49. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
  50. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  51. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  52. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  53. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  54. mindsdb/integrations/libs/api_handler.py +10 -10
  55. mindsdb/integrations/libs/base.py +4 -4
  56. mindsdb/integrations/libs/llm/utils.py +2 -2
  57. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  58. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  59. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  60. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  61. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  62. mindsdb/integrations/libs/process_cache.py +132 -140
  63. mindsdb/integrations/libs/response.py +18 -12
  64. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  65. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  66. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  67. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
  68. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  69. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  70. mindsdb/integrations/utilities/rag/settings.py +58 -133
  71. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  72. mindsdb/interfaces/agents/agents_controller.py +2 -1
  73. mindsdb/interfaces/agents/constants.py +0 -2
  74. mindsdb/interfaces/agents/litellm_server.py +34 -58
  75. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  76. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  77. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  78. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  79. mindsdb/interfaces/chatbot/polling.py +30 -18
  80. mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
  81. mindsdb/interfaces/database/integrations.py +19 -2
  82. mindsdb/interfaces/file/file_controller.py +6 -6
  83. mindsdb/interfaces/functions/controller.py +1 -1
  84. mindsdb/interfaces/functions/to_markdown.py +2 -2
  85. mindsdb/interfaces/jobs/jobs_controller.py +5 -5
  86. mindsdb/interfaces/jobs/scheduler.py +3 -8
  87. mindsdb/interfaces/knowledge_base/controller.py +54 -25
  88. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  89. mindsdb/interfaces/model/model_controller.py +170 -166
  90. mindsdb/interfaces/query_context/context_controller.py +14 -2
  91. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
  92. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  93. mindsdb/interfaces/skills/skill_tool.py +2 -2
  94. mindsdb/interfaces/skills/sql_agent.py +25 -19
  95. mindsdb/interfaces/storage/fs.py +114 -169
  96. mindsdb/interfaces/storage/json.py +19 -18
  97. mindsdb/interfaces/storage/model_fs.py +54 -92
  98. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  99. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  100. mindsdb/interfaces/tasks/task_thread.py +7 -9
  101. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  102. mindsdb/interfaces/triggers/triggers_controller.py +47 -50
  103. mindsdb/migrations/migrate.py +16 -16
  104. mindsdb/utilities/api_status.py +58 -0
  105. mindsdb/utilities/config.py +49 -0
  106. mindsdb/utilities/exception.py +40 -1
  107. mindsdb/utilities/fs.py +0 -1
  108. mindsdb/utilities/hooks/profiling.py +17 -14
  109. mindsdb/utilities/langfuse.py +40 -45
  110. mindsdb/utilities/log.py +272 -0
  111. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  112. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  113. mindsdb/utilities/render/sqlalchemy_render.py +8 -7
  114. mindsdb/utilities/utils.py +2 -2
  115. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +266 -261
  116. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +119 -119
  117. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  118. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
  119. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
  120. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,6 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
17
17
  TableField,
18
18
  VectorStoreHandler,
19
19
  )
20
- from mindsdb.interfaces.storage.model_fs import HandlerStorage
21
20
  from mindsdb.utilities import log
22
21
 
23
22
  logger = log.getLogger(__name__)
@@ -55,7 +54,7 @@ class ChromaDBHandler(VectorStoreHandler):
55
54
 
56
55
  def __init__(self, name: str, **kwargs):
57
56
  super().__init__(name)
58
- self.handler_storage = HandlerStorage(kwargs.get("integration_id"))
57
+ self.handler_storage = kwargs["handler_storage"]
59
58
  self._client = None
60
59
  self.persist_directory = None
61
60
  self.is_connected = False
@@ -73,8 +72,6 @@ class ChromaDBHandler(VectorStoreHandler):
73
72
  "hnsw:space": config.distance,
74
73
  }
75
74
 
76
- self.connect()
77
-
78
75
  def validate_connection_parameters(self, name, **kwargs):
79
76
  """
80
77
  Validate the connection parameters.
@@ -88,7 +85,7 @@ class ChromaDBHandler(VectorStoreHandler):
88
85
  if config.persist_directory:
89
86
  if os.path.isabs(config.persist_directory):
90
87
  self.persist_directory = config.persist_directory
91
- elif not self.handler_storage.is_temporal:
88
+ else:
92
89
  # get full persistence directory from handler storage
93
90
  self.persist_directory = self.handler_storage.folder_get(config.persist_directory)
94
91
  self._use_handler_storage = True
@@ -149,6 +146,7 @@ class ChromaDBHandler(VectorStoreHandler):
149
146
  need_to_close = self.is_connected is False
150
147
 
151
148
  try:
149
+ self.connect()
152
150
  self._client.heartbeat()
153
151
  response_code.success = True
154
152
  except Exception as e:
@@ -233,6 +231,7 @@ class ChromaDBHandler(VectorStoreHandler):
233
231
  offset: int = None,
234
232
  limit: int = None,
235
233
  ) -> pd.DataFrame:
234
+ self.connect()
236
235
  collection = self._client.get_collection(table_name)
237
236
  filters = self._translate_metadata_condition(conditions)
238
237
 
@@ -399,6 +398,7 @@ class ChromaDBHandler(VectorStoreHandler):
399
398
  Insert/Upsert data into ChromaDB collection.
400
399
  If records with same IDs exist, they will be updated.
401
400
  """
401
+ self.connect()
402
402
  collection = self._client.get_or_create_collection(collection_name, metadata=self.create_collection_metadata)
403
403
 
404
404
  # Convert metadata from string to dict if needed
@@ -449,6 +449,7 @@ class ChromaDBHandler(VectorStoreHandler):
449
449
  """
450
450
  Update data in the ChromaDB database.
451
451
  """
452
+ self.connect()
452
453
  collection = self._client.get_collection(table_name)
453
454
 
454
455
  # drop columns with all None values
@@ -466,6 +467,7 @@ class ChromaDBHandler(VectorStoreHandler):
466
467
  self._sync()
467
468
 
468
469
  def delete(self, table_name: str, conditions: List[FilterCondition] = None):
470
+ self.connect()
469
471
  filters = self._translate_metadata_condition(conditions)
470
472
  # get id filters
471
473
  id_filters = [condition.value for condition in conditions if condition.column == TableField.ID.value] or None
@@ -480,6 +482,7 @@ class ChromaDBHandler(VectorStoreHandler):
480
482
  """
481
483
  Create a collection with the given name in the ChromaDB database.
482
484
  """
485
+ self.connect()
483
486
  self._client.create_collection(
484
487
  table_name, get_or_create=if_not_exists, metadata=self.create_collection_metadata
485
488
  )
@@ -489,6 +492,7 @@ class ChromaDBHandler(VectorStoreHandler):
489
492
  """
490
493
  Delete a collection from the ChromaDB database.
491
494
  """
495
+ self.connect()
492
496
  try:
493
497
  self._client.delete_collection(table_name)
494
498
  self._sync()
@@ -502,6 +506,7 @@ class ChromaDBHandler(VectorStoreHandler):
502
506
  """
503
507
  Get the list of collections in the ChromaDB database.
504
508
  """
509
+ self.connect()
505
510
  collections = self._client.list_collections()
506
511
  collections_name = pd.DataFrame(
507
512
  columns=["table_name"],
@@ -511,6 +516,7 @@ class ChromaDBHandler(VectorStoreHandler):
511
516
 
512
517
  def get_columns(self, table_name: str) -> HandlerResponse:
513
518
  # check if collection exists
519
+ self.connect()
514
520
  try:
515
521
  _ = self._client.get_collection(table_name)
516
522
  except ValueError:
@@ -50,6 +50,7 @@ class FileHandler(DatabaseHandler):
50
50
  self.chunk_size = connection_data.get("chunk_size", DEFAULT_CHUNK_SIZE)
51
51
  self.chunk_overlap = connection_data.get("chunk_overlap", DEFAULT_CHUNK_OVERLAP)
52
52
  self.file_controller = file_controller
53
+ self.thread_safe = True
53
54
 
54
55
  def connect(self, **kwargs):
55
56
  return
@@ -83,6 +84,12 @@ class FileHandler(DatabaseHandler):
83
84
  table_name = table_identifier.parts[-1]
84
85
  try:
85
86
  self.file_controller.delete_file(table_name)
87
+ except FileNotFoundError as e:
88
+ if not query.if_exists:
89
+ return Response(
90
+ RESPONSE_TYPE.ERROR,
91
+ error_message=f"Can't delete table '{table_name}': {e}",
92
+ )
86
93
  except Exception as e:
87
94
  return Response(
88
95
  RESPONSE_TYPE.ERROR,
@@ -27,51 +27,40 @@ logger = log.getLogger(__name__)
27
27
 
28
28
 
29
29
  def create_learn_mark():
30
- if os.name == 'posix':
31
- p = Path(tempfile.gettempdir()).joinpath('mindsdb/learn_processes/')
30
+ if os.name == "posix":
31
+ p = Path(tempfile.gettempdir()).joinpath("mindsdb/learn_processes/")
32
32
  p.mkdir(parents=True, exist_ok=True)
33
- p.joinpath(f'{os.getpid()}').touch()
33
+ p.joinpath(f"{os.getpid()}").touch()
34
34
 
35
35
 
36
36
  def delete_learn_mark():
37
- if os.name == 'posix':
38
- p = (
39
- Path(tempfile.gettempdir())
40
- .joinpath('mindsdb/learn_processes/')
41
- .joinpath(f'{os.getpid()}')
42
- )
37
+ if os.name == "posix":
38
+ p = Path(tempfile.gettempdir()).joinpath("mindsdb/learn_processes/").joinpath(f"{os.getpid()}")
43
39
  if p.exists():
44
40
  p.unlink()
45
41
 
46
42
 
47
- @mark_process(name='learn')
43
+ @mark_process(name="learn")
48
44
  @profiler.profile()
49
45
  def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = None):
46
+ model_storage.training_state_set(current_state_num=1, total_states=5, state_name="Generating problem definition")
47
+ json_ai_override = args.pop("using", {})
50
48
 
51
- model_storage.training_state_set(
52
- current_state_num=1, total_states=5, state_name='Generating problem definition'
53
- )
54
- json_ai_override = args.pop('using', {})
55
-
56
- if 'dtype_dict' in json_ai_override:
57
- args['dtype_dict'] = json_ai_override.pop('dtype_dict')
49
+ if "dtype_dict" in json_ai_override:
50
+ args["dtype_dict"] = json_ai_override.pop("dtype_dict")
58
51
 
59
- if 'problem_definition' in json_ai_override:
60
- args = {**args, **json_ai_override['problem_definition']}
52
+ if "problem_definition" in json_ai_override:
53
+ args = {**args, **json_ai_override["problem_definition"]}
61
54
 
62
- if 'timeseries_settings' in args:
63
- for tss_key in [
64
- f.name for f in dataclasses.fields(lightwood.api.TimeseriesSettings)
65
- ]:
66
- k = f'timeseries_settings.{tss_key}'
55
+ if "timeseries_settings" in args:
56
+ for tss_key in [f.name for f in dataclasses.fields(lightwood.api.TimeseriesSettings)]:
57
+ k = f"timeseries_settings.{tss_key}"
67
58
  if k in json_ai_override:
68
- args['timeseries_settings'][tss_key] = json_ai_override.pop(k)
59
+ args["timeseries_settings"][tss_key] = json_ai_override.pop(k)
69
60
 
70
61
  problem_definition = lightwood.ProblemDefinition.from_dict(args)
71
62
 
72
- model_storage.training_state_set(
73
- current_state_num=2, total_states=5, state_name='Generating JsonAI'
74
- )
63
+ model_storage.training_state_set(current_state_num=2, total_states=5, state_name="Generating JsonAI")
75
64
  json_ai = lightwood.json_ai_from_problem(df, problem_definition)
76
65
  json_ai = json_ai.to_dict()
77
66
  unpack_jsonai_old_args(json_ai_override)
@@ -79,9 +68,7 @@ def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = N
79
68
  rep_recur(json_ai, json_ai_override)
80
69
  json_ai = JsonAI.from_dict(json_ai)
81
70
 
82
- model_storage.training_state_set(
83
- current_state_num=3, total_states=5, state_name='Generating code'
84
- )
71
+ model_storage.training_state_set(current_state_num=3, total_states=5, state_name="Generating code")
85
72
  code = lightwood.code_from_json_ai(json_ai)
86
73
 
87
74
  predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
@@ -89,33 +76,27 @@ def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = N
89
76
  db.session.commit()
90
77
 
91
78
  json_storage = get_json_storage(resource_id=predictor_id)
92
- json_storage.set('json_ai', json_ai.to_dict())
79
+ json_storage.set("json_ai", json_ai.to_dict())
93
80
 
94
81
 
95
- @mark_process(name='learn')
82
+ @mark_process(name="learn")
96
83
  @profiler.profile()
97
84
  def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
98
85
  try:
99
86
  predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
100
87
  assert predictor_record is not None
101
88
 
102
- predictor_record.data = {'training_log': 'training'}
89
+ predictor_record.data = {"training_log": "training"}
103
90
  predictor_record.status = PREDICTOR_STATUS.TRAINING
104
91
  db.session.commit()
105
92
 
106
- model_storage.training_state_set(
107
- current_state_num=4, total_states=5, state_name='Training model'
108
- )
109
- predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(
110
- predictor_record.code
111
- )
93
+ model_storage.training_state_set(current_state_num=4, total_states=5, state_name="Training model")
94
+ predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code)
112
95
  predictor.learn(df)
113
96
 
114
97
  db.session.refresh(predictor_record)
115
98
 
116
- fs = FileStorage(
117
- resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True
118
- )
99
+ fs = FileStorage(resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True)
119
100
  predictor.save(fs.folder_path / fs.folder_name)
120
101
  fs.push(compression_level=0)
121
102
 
@@ -124,9 +105,7 @@ def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
124
105
  # getting training time for each tried model. it is possible to do
125
106
  # after training only
126
107
  fit_mixers = list(
127
- predictor.runtime_log[x]
128
- for x in predictor.runtime_log
129
- if isinstance(x, tuple) and x[0] == "fit_mixer"
108
+ predictor.runtime_log[x] for x in predictor.runtime_log if isinstance(x, tuple) and x[0] == "fit_mixer"
130
109
  )
131
110
  submodel_data = predictor_record.data.get("submodel_data", [])
132
111
  # add training time to other mixers info
@@ -135,43 +114,39 @@ def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
135
114
  submodel_data[i]["training_time"] = tr_time
136
115
  predictor_record.data["submodel_data"] = submodel_data
137
116
 
138
- model_storage.training_state_set(
139
- current_state_num=5, total_states=5, state_name='Complete'
140
- )
117
+ model_storage.training_state_set(current_state_num=5, total_states=5, state_name="Complete")
141
118
  predictor_record.dtype_dict = predictor.dtype_dict
142
119
  db.session.commit()
143
120
  except Exception as e:
144
121
  db.session.refresh(predictor_record)
145
- predictor_record.data = {'error': f'{traceback.format_exc()}\nMain error: {e}'}
122
+ predictor_record.data = {"error": f"{traceback.format_exc()}\nMain error: {e}"}
146
123
  db.session.commit()
147
124
  raise e
148
125
 
149
126
 
150
- @mark_process(name='learn')
127
+ @mark_process(name="learn")
151
128
  def run_learn_remote(df: DataFrame, predictor_id: int) -> None:
152
129
  try:
153
130
  serialized_df = json.dumps(df.to_dict())
154
131
  predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
155
132
  resp = requests.post(
156
- predictor_record.data['train_url'],
157
- json={'df': serialized_df, 'target': predictor_record.to_predict[0]},
133
+ predictor_record.data["train_url"],
134
+ json={"df": serialized_df, "target": predictor_record.to_predict[0]},
158
135
  )
159
136
 
160
137
  assert resp.status_code == 200
161
- predictor_record.data['status'] = 'complete'
138
+ predictor_record.data["status"] = "complete"
162
139
  except Exception:
163
- predictor_record.data['status'] = 'error'
164
- predictor_record.data['error'] = str(resp.text)
140
+ predictor_record.data["status"] = "error"
141
+ predictor_record.data["error"] = str(resp.text)
165
142
 
166
143
  db.session.commit()
167
144
 
168
145
 
169
- @mark_process(name='learn')
146
+ @mark_process(name="learn")
170
147
  def run_learn(df: DataFrame, args: dict, model_storage) -> None:
171
148
  if df is None or df.shape[0] == 0:
172
- raise Exception(
173
- 'No input data. Ensure the data source is healthy and try again.'
174
- )
149
+ raise Exception("No input data. Ensure the data source is healthy and try again.")
175
150
 
176
151
  predictor_id = model_storage.predictor_id
177
152
 
@@ -187,15 +162,13 @@ def run_learn(df: DataFrame, args: dict, model_storage) -> None:
187
162
  db.session.commit()
188
163
 
189
164
 
190
- @mark_process(name='finetune')
165
+ @mark_process(name="finetune")
191
166
  def run_finetune(df: DataFrame, args: dict, model_storage):
192
167
  try:
193
168
  if df is None or df.shape[0] == 0:
194
- raise Exception(
195
- 'No input data. Ensure the data source is healthy and try again.'
196
- )
169
+ raise Exception("No input data. Ensure the data source is healthy and try again.")
197
170
 
198
- base_predictor_id = args['base_model_id']
171
+ base_predictor_id = args["base_model_id"]
199
172
  base_predictor_record = db.Predictor.query.get(base_predictor_id)
200
173
  if base_predictor_record.status != PREDICTOR_STATUS.COMPLETE:
201
174
  raise Exception("Base model must be in status 'complete'")
@@ -204,11 +177,9 @@ def run_finetune(df: DataFrame, args: dict, model_storage):
204
177
  predictor_record = db.Predictor.query.get(predictor_id)
205
178
 
206
179
  # TODO move this to ModelStorage (don't work with database directly)
207
- predictor_record.data = {'training_log': 'training'}
180
+ predictor_record.data = {"training_log": "training"}
208
181
  predictor_record.training_start_at = datetime.now()
209
- predictor_record.status = (
210
- PREDICTOR_STATUS.FINETUNING
211
- ) # TODO: parallel execution block
182
+ predictor_record.status = PREDICTOR_STATUS.FINETUNING # TODO: parallel execution block
212
183
  db.session.commit()
213
184
 
214
185
  base_fs = FileStorage(
@@ -219,28 +190,23 @@ def run_finetune(df: DataFrame, args: dict, model_storage):
219
190
  predictor = lightwood.predictor_from_state(
220
191
  base_fs.folder_path / base_fs.folder_name, base_predictor_record.code
221
192
  )
222
- predictor.adjust(df, adjust_args=args.get('using', {}))
193
+ predictor.adjust(df, adjust_args=args.get("using", {}))
223
194
 
224
- fs = FileStorage(
225
- resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True
226
- )
195
+ fs = FileStorage(resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True)
227
196
  predictor.save(fs.folder_path / fs.folder_name)
228
197
  fs.push(compression_level=0)
229
198
 
230
- predictor_record.data = (
231
- predictor.model_analysis.to_dict()
232
- ) # todo: update accuracy in LW as post-finetune hook
199
+ predictor_record.data = predictor.model_analysis.to_dict() # todo: update accuracy in LW as post-finetune hook
233
200
  predictor_record.code = base_predictor_record.code
234
- predictor_record.update_status = 'up_to_date'
201
+ predictor_record.update_status = "up_to_date"
235
202
  predictor_record.status = PREDICTOR_STATUS.COMPLETE
236
203
  predictor_record.training_stop_at = datetime.now()
237
204
  db.session.commit()
238
205
 
239
206
  except Exception as e:
240
- logger.error(e)
207
+ logger.error("Unexpected error during Lightwood model finetune:", exc_info=True)
241
208
  predictor_id = model_storage.predictor_id
242
209
  predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
243
- logger.error(traceback.format_exc())
244
210
  error_message = format_exception_error(e)
245
211
  predictor_record.data = {"error": error_message}
246
212
  predictor_record.status = PREDICTOR_STATUS.ERROR
@@ -114,7 +114,7 @@ class OpenAIHandler(BaseMLEngine):
114
114
  except NotFoundError:
115
115
  pass
116
116
  except AuthenticationError as e:
117
- if e.body["code"] == "invalid_api_key":
117
+ if isinstance(e.body, dict) and e.body.get("code") == "invalid_api_key":
118
118
  raise Exception("Invalid api key")
119
119
  raise Exception(f"Something went wrong: {e}")
120
120
 
@@ -17,7 +17,9 @@ from mindsdb_sql_parser.ast import (
17
17
  Delete,
18
18
  Update,
19
19
  Function,
20
+ DropTables,
20
21
  )
22
+ from mindsdb_sql_parser.ast.base import ASTNode
21
23
  from pgvector.psycopg import register_vector
22
24
 
23
25
  from mindsdb.integrations.handlers.postgres_handler.postgres_handler import (
@@ -116,9 +118,22 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
116
118
  return Response(RESPONSE_TYPE.OK)
117
119
  return super().get_tables()
118
120
 
119
- def native_query(self, query, params=None) -> Response:
121
+ def query(self, query: ASTNode) -> Response:
122
+ # Option to drop table of shared pgvector connection
123
+ if isinstance(query, DropTables):
124
+ query.tables = [self._check_table(table.parts[-1]) for table in query.tables]
125
+ query_str, params = self.renderer.get_exec_params(query, with_failback=True)
126
+ return self.native_query(query_str, params, no_restrict=True)
127
+ return super().query(query)
128
+
129
+ def native_query(self, query, params=None, no_restrict=False) -> Response:
130
+ """
131
+ Altered `native_query` method of postgres handler.
132
+ Restrict usage of native query from executor with shared pg vector connection
133
+ Exceptions: if it is used by pgvector itself (with no_restrict = True)
134
+ """
120
135
  # Prevent execute native queries
121
- if self._is_shared_db:
136
+ if self._is_shared_db and not no_restrict:
122
137
  return Response(RESPONSE_TYPE.OK)
123
138
  return super().native_query(query, params=params)
124
139
 
@@ -550,6 +565,9 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
550
565
 
551
566
  def create_table(self, table_name: str):
552
567
  """Create a table with a vector column."""
568
+
569
+ table_name = self._check_table(table_name)
570
+
553
571
  with self.connection.cursor() as cur:
554
572
  # For sparse vectors, use sparsevec type
555
573
  vector_column_type = "sparsevec" if self._is_sparse else "vector"
@@ -1,5 +1,6 @@
1
1
  import time
2
2
  import json
3
+ import logging
3
4
  from typing import Optional, Any
4
5
 
5
6
  import pandas as pd
@@ -279,7 +280,7 @@ class PostgresHandler(MetaDatabaseHandler):
279
280
  df.columns = columns
280
281
 
281
282
  @profiler.profile()
282
- def native_query(self, query: str, params=None) -> Response:
283
+ def native_query(self, query: str, params=None, **kwargs) -> Response:
283
284
  """
284
285
  Executes a SQL query on the PostgreSQL database and returns the result.
285
286
 
@@ -304,8 +305,19 @@ class PostgresHandler(MetaDatabaseHandler):
304
305
  result = cur.fetchall()
305
306
  response = _make_table_response(result, cur)
306
307
  connection.commit()
308
+ except (psycopg.ProgrammingError, psycopg.DataError) as e:
309
+ # These is 'expected' exceptions, they should not be treated as mindsdb's errors
310
+ # ProgrammingError: table not found or already exists, syntax error, etc
311
+ # DataError: division by zero, numeric value out of range, etc.
312
+ # https://www.psycopg.org/psycopg3/docs/api/errors.html
313
+ log_message = "Database query failed with error, likely due to invalid SQL query"
314
+ if logger.isEnabledFor(logging.DEBUG):
315
+ log_message += f". Executed query:\n{query}"
316
+ logger.info(log_message)
317
+ response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e), is_acceptable_error=True)
318
+ connection.rollback()
307
319
  except Exception as e:
308
- logger.error(f"Error running query: {query} on {self.database}, {e}!")
320
+ logger.error(f"Error running query:\n{query}\non {self.database}, {e}")
309
321
  response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
310
322
  connection.rollback()
311
323
 
@@ -466,7 +478,10 @@ class PostgresHandler(MetaDatabaseHandler):
466
478
  AND
467
479
  table_schema = {schema_name}
468
480
  """
469
- result = self.native_query(query)
481
+ # If it is used by pgvector handler - `native_query` method of pgvector handler will be used
482
+ # in that case if shared pgvector db is used - `native_query` will be skipped (return empty result)
483
+ # `no_restrict` flag allows to execute native query, and it will call `native_query` of postgres handler
484
+ result = self.native_query(query, no_restrict=True)
470
485
  result.to_columns_table_response(map_type_fn=_map_type)
471
486
  return result
472
487
 
@@ -1,7 +1,17 @@
1
1
  import shopify
2
2
  import requests
3
3
 
4
- from mindsdb.integrations.handlers.shopify_handler.shopify_tables import ProductsTable, CustomersTable, OrdersTable, InventoryLevelTable, LocationTable, CustomerReviews, CarrierServiceTable, ShippingZoneTable, SalesChannelTable
4
+ from mindsdb.integrations.handlers.shopify_handler.shopify_tables import (
5
+ ProductsTable,
6
+ CustomersTable,
7
+ OrdersTable,
8
+ InventoryLevelTable,
9
+ LocationTable,
10
+ CustomerReviews,
11
+ CarrierServiceTable,
12
+ ShippingZoneTable,
13
+ SalesChannelTable,
14
+ )
5
15
  from mindsdb.integrations.libs.api_handler import APIHandler
6
16
  from mindsdb.integrations.libs.response import (
7
17
  HandlerStatusResponse as StatusResponse,
@@ -9,7 +19,11 @@ from mindsdb.integrations.libs.response import (
9
19
 
10
20
  from mindsdb.utilities import log
11
21
  from mindsdb_sql_parser import parse_sql
12
- from mindsdb.integrations.libs.api_handler_exceptions import InvalidNativeQuery, ConnectionFailed, MissingConnectionParams
22
+ from mindsdb.integrations.libs.api_handler_exceptions import (
23
+ InvalidNativeQuery,
24
+ ConnectionFailed,
25
+ MissingConnectionParams,
26
+ )
13
27
 
14
28
  logger = log.getLogger(__name__)
15
29
 
@@ -19,7 +33,7 @@ class ShopifyHandler(APIHandler):
19
33
  The Shopify handler implementation.
20
34
  """
21
35
 
22
- name = 'shopify'
36
+ name = "shopify"
23
37
 
24
38
  def __init__(self, name: str, **kwargs):
25
39
  """
@@ -81,10 +95,12 @@ class ShopifyHandler(APIHandler):
81
95
  if self.kwargs.get("connection_data") is None:
82
96
  raise MissingConnectionParams("Incomplete parameters passed to Shopify Handler")
83
97
 
84
- api_session = shopify.Session(self.connection_data['shop_url'], '2021-10', self.connection_data['access_token'])
98
+ api_session = shopify.Session(self.connection_data["shop_url"], "2021-10", self.connection_data["access_token"])
85
99
 
86
- self.yotpo_app_key = self.connection_data['yotpo_app_key'] if 'yotpo_app_key' in self.connection_data else None
87
- self.yotpo_access_token = self.connection_data['yotpo_access_token'] if 'yotpo_access_token' in self.connection_data else None
100
+ self.yotpo_app_key = self.connection_data["yotpo_app_key"] if "yotpo_app_key" in self.connection_data else None
101
+ self.yotpo_access_token = (
102
+ self.connection_data["yotpo_access_token"] if "yotpo_access_token" in self.connection_data else None
103
+ )
88
104
 
89
105
  self.connection = api_session
90
106
 
@@ -107,16 +123,13 @@ class ShopifyHandler(APIHandler):
107
123
  shopify.Shop.current()
108
124
  response.success = True
109
125
  except Exception as e:
110
- logger.error('Error connecting to Shopify!')
111
- raise ConnectionFailed("Conenction to Shopify failed.")
126
+ logger.error("Error connecting to Shopify!")
112
127
  response.error_message = str(e)
128
+ raise ConnectionFailed("Conenction to Shopify failed.")
113
129
 
114
130
  if self.yotpo_app_key is not None and self.yotpo_access_token is not None:
115
131
  url = f"https://api.yotpo.com/v1/apps/{self.yotpo_app_key}/reviews?count=1&utoken={self.yotpo_access_token}"
116
- headers = {
117
- "accept": "application/json",
118
- "Content-Type": "application/json"
119
- }
132
+ headers = {"accept": "application/json", "Content-Type": "application/json"}
120
133
  if requests.get(url, headers=headers).status_code == 200:
121
134
  response.success = True
122
135
  else:
@@ -204,10 +204,11 @@ class SnowflakeHandler(MetaDatabaseHandler):
204
204
  "user": self.connection_data.get("user"),
205
205
  "password": self.connection_data.get("password"),
206
206
  "database": self.connection_data.get("database"),
207
+ "schema": self.connection_data.get("schema", "PUBLIC"),
207
208
  }
208
209
 
209
210
  # Optional connection parameters
210
- optional_params = ["schema", "warehouse", "role"]
211
+ optional_params = ["warehouse", "role"]
211
212
  for param in optional_params:
212
213
  if param in self.connection_data:
213
214
  config[param] = self.connection_data[param]
@@ -1,2 +1,3 @@
1
1
  statsforecast==1.6.0
2
2
  scipy==1.15.3
3
+ numba >=0.55.0, <=0.61.2
@@ -1,2 +1,3 @@
1
1
  statsforecast==1.6.0
2
2
  scipy==1.15.3
3
+ numba >=0.55.0, <=0.61.2
@@ -170,9 +170,9 @@ def get_all_website_links(url, headers: dict = None) -> dict:
170
170
  href = href.rstrip("/")
171
171
  urls.add(href)
172
172
 
173
- except Exception as e:
173
+ except Exception:
174
174
  error_message = traceback.format_exc().splitlines()[-1]
175
- logger.error("An exception occurred: %s", str(e))
175
+ logger.exception("An exception occurred:")
176
176
  return {
177
177
  "url": url,
178
178
  "urls": urls,
@@ -238,9 +238,9 @@ def get_all_website_links_recursively(
238
238
  if url not in reviewed_urls and matches_filter:
239
239
  try:
240
240
  reviewed_urls[url] = get_all_website_links(url, headers=headers)
241
- except Exception as e:
241
+ except Exception:
242
242
  error_message = traceback.format_exc().splitlines()[-1]
243
- logger.error("An exception occurred: %s", str(e))
243
+ logger.exception("An exception occurred:")
244
244
  reviewed_urls[url] = {
245
245
  "url": url,
246
246
  "urls": [],
@@ -550,8 +550,8 @@ class MetaAPIHandler(APIHandler):
550
550
  if hasattr(table_class, "meta_get_tables"):
551
551
  table_metadata = table_class.meta_get_tables(table_name, **kwargs)
552
552
  df = pd.concat([df, pd.DataFrame([table_metadata])], ignore_index=True)
553
- except Exception as e:
554
- logger.error(f"Error retrieving metadata for table {table_name}: {e}")
553
+ except Exception:
554
+ logger.exception(f"Error retrieving metadata for table {table_name}:")
555
555
 
556
556
  return Response(RESPONSE_TYPE.TABLE, df)
557
557
 
@@ -572,8 +572,8 @@ class MetaAPIHandler(APIHandler):
572
572
  if hasattr(table_class, "meta_get_columns"):
573
573
  column_metadata = table_class.meta_get_columns(table_name, **kwargs)
574
574
  df = pd.concat([df, pd.DataFrame(column_metadata)], ignore_index=True)
575
- except Exception as e:
576
- logger.error(f"Error retrieving column metadata for table {table_name}: {e}")
575
+ except Exception:
576
+ logger.exception(f"Error retrieving column metadata for table {table_name}:")
577
577
 
578
578
  return Response(RESPONSE_TYPE.TABLE, df)
579
579
 
@@ -594,8 +594,8 @@ class MetaAPIHandler(APIHandler):
594
594
  if hasattr(table_class, "meta_get_column_statistics"):
595
595
  column_statistics = table_class.meta_get_column_statistics(table_name, **kwargs)
596
596
  df = pd.concat([df, pd.DataFrame(column_statistics)], ignore_index=True)
597
- except Exception as e:
598
- logger.error(f"Error retrieving column statistics for table {table_name}: {e}")
597
+ except Exception:
598
+ logger.exception(f"Error retrieving column statistics for table {table_name}:")
599
599
 
600
600
  return Response(RESPONSE_TYPE.TABLE, df)
601
601
 
@@ -616,8 +616,8 @@ class MetaAPIHandler(APIHandler):
616
616
  if hasattr(table_class, "meta_get_primary_keys"):
617
617
  primary_key_metadata = table_class.meta_get_primary_keys(table_name, **kwargs)
618
618
  df = pd.concat([df, pd.DataFrame(primary_key_metadata)], ignore_index=True)
619
- except Exception as e:
620
- logger.error(f"Error retrieving primary keys for table {table_name}: {e}")
619
+ except Exception:
620
+ logger.exception(f"Error retrieving primary keys for table {table_name}:")
621
621
 
622
622
  return Response(RESPONSE_TYPE.TABLE, df)
623
623
 
@@ -641,8 +641,8 @@ class MetaAPIHandler(APIHandler):
641
641
  table_name, all_tables=table_names if table_names else all_tables, **kwargs
642
642
  )
643
643
  df = pd.concat([df, pd.DataFrame(foreign_key_metadata)], ignore_index=True)
644
- except Exception as e:
645
- logger.error(f"Error retrieving foreign keys for table {table_name}: {e}")
644
+ except Exception:
645
+ logger.exception(f"Error retrieving foreign keys for table {table_name}:")
646
646
 
647
647
  return Response(RESPONSE_TYPE.TABLE, df)
648
648