MindsDB 25.9.1.2__py3-none-any.whl → 25.9.2.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = "MindsDB"
2
2
  __package_name__ = "mindsdb"
3
- __version__ = "25.9.1.2"
3
+ __version__ = "25.9.2.0a1"
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = "MindsDB Inc"
@@ -17,7 +17,6 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
17
17
  TableField,
18
18
  VectorStoreHandler,
19
19
  )
20
- from mindsdb.interfaces.storage.model_fs import HandlerStorage
21
20
  from mindsdb.utilities import log
22
21
 
23
22
  logger = log.getLogger(__name__)
@@ -55,7 +54,7 @@ class ChromaDBHandler(VectorStoreHandler):
55
54
 
56
55
  def __init__(self, name: str, **kwargs):
57
56
  super().__init__(name)
58
- self.handler_storage = HandlerStorage(kwargs.get("integration_id"))
57
+ self.handler_storage = kwargs["handler_storage"]
59
58
  self._client = None
60
59
  self.persist_directory = None
61
60
  self.is_connected = False
@@ -73,8 +72,6 @@ class ChromaDBHandler(VectorStoreHandler):
73
72
  "hnsw:space": config.distance,
74
73
  }
75
74
 
76
- self.connect()
77
-
78
75
  def validate_connection_parameters(self, name, **kwargs):
79
76
  """
80
77
  Validate the connection parameters.
@@ -88,7 +85,7 @@ class ChromaDBHandler(VectorStoreHandler):
88
85
  if config.persist_directory:
89
86
  if os.path.isabs(config.persist_directory):
90
87
  self.persist_directory = config.persist_directory
91
- elif not self.handler_storage.is_temporal:
88
+ else:
92
89
  # get full persistence directory from handler storage
93
90
  self.persist_directory = self.handler_storage.folder_get(config.persist_directory)
94
91
  self._use_handler_storage = True
@@ -149,6 +146,7 @@ class ChromaDBHandler(VectorStoreHandler):
149
146
  need_to_close = self.is_connected is False
150
147
 
151
148
  try:
149
+ self.connect()
152
150
  self._client.heartbeat()
153
151
  response_code.success = True
154
152
  except Exception as e:
@@ -233,6 +231,7 @@ class ChromaDBHandler(VectorStoreHandler):
233
231
  offset: int = None,
234
232
  limit: int = None,
235
233
  ) -> pd.DataFrame:
234
+ self.connect()
236
235
  collection = self._client.get_collection(table_name)
237
236
  filters = self._translate_metadata_condition(conditions)
238
237
 
@@ -399,6 +398,7 @@ class ChromaDBHandler(VectorStoreHandler):
399
398
  Insert/Upsert data into ChromaDB collection.
400
399
  If records with same IDs exist, they will be updated.
401
400
  """
401
+ self.connect()
402
402
  collection = self._client.get_or_create_collection(collection_name, metadata=self.create_collection_metadata)
403
403
 
404
404
  # Convert metadata from string to dict if needed
@@ -449,6 +449,7 @@ class ChromaDBHandler(VectorStoreHandler):
449
449
  """
450
450
  Update data in the ChromaDB database.
451
451
  """
452
+ self.connect()
452
453
  collection = self._client.get_collection(table_name)
453
454
 
454
455
  # drop columns with all None values
@@ -466,6 +467,7 @@ class ChromaDBHandler(VectorStoreHandler):
466
467
  self._sync()
467
468
 
468
469
  def delete(self, table_name: str, conditions: List[FilterCondition] = None):
470
+ self.connect()
469
471
  filters = self._translate_metadata_condition(conditions)
470
472
  # get id filters
471
473
  id_filters = [condition.value for condition in conditions if condition.column == TableField.ID.value] or None
@@ -480,6 +482,7 @@ class ChromaDBHandler(VectorStoreHandler):
480
482
  """
481
483
  Create a collection with the given name in the ChromaDB database.
482
484
  """
485
+ self.connect()
483
486
  self._client.create_collection(
484
487
  table_name, get_or_create=if_not_exists, metadata=self.create_collection_metadata
485
488
  )
@@ -489,6 +492,7 @@ class ChromaDBHandler(VectorStoreHandler):
489
492
  """
490
493
  Delete a collection from the ChromaDB database.
491
494
  """
495
+ self.connect()
492
496
  try:
493
497
  self._client.delete_collection(table_name)
494
498
  self._sync()
@@ -502,6 +506,7 @@ class ChromaDBHandler(VectorStoreHandler):
502
506
  """
503
507
  Get the list of collections in the ChromaDB database.
504
508
  """
509
+ self.connect()
505
510
  collections = self._client.list_collections()
506
511
  collections_name = pd.DataFrame(
507
512
  columns=["table_name"],
@@ -511,6 +516,7 @@ class ChromaDBHandler(VectorStoreHandler):
511
516
 
512
517
  def get_columns(self, table_name: str) -> HandlerResponse:
513
518
  # check if collection exists
519
+ self.connect()
514
520
  try:
515
521
  _ = self._client.get_collection(table_name)
516
522
  except ValueError:
@@ -114,7 +114,7 @@ class OpenAIHandler(BaseMLEngine):
114
114
  except NotFoundError:
115
115
  pass
116
116
  except AuthenticationError as e:
117
- if e.body["code"] == "invalid_api_key":
117
+ if isinstance(e.body, dict) and e.body.get("code") == "invalid_api_key":
118
118
  raise Exception("Invalid api key")
119
119
  raise Exception(f"Something went wrong: {e}")
120
120
 
@@ -17,7 +17,9 @@ from mindsdb_sql_parser.ast import (
17
17
  Delete,
18
18
  Update,
19
19
  Function,
20
+ DropTables,
20
21
  )
22
+ from mindsdb_sql_parser.ast.base import ASTNode
21
23
  from pgvector.psycopg import register_vector
22
24
 
23
25
  from mindsdb.integrations.handlers.postgres_handler.postgres_handler import (
@@ -116,9 +118,22 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
116
118
  return Response(RESPONSE_TYPE.OK)
117
119
  return super().get_tables()
118
120
 
119
- def native_query(self, query, params=None) -> Response:
121
+ def query(self, query: ASTNode) -> Response:
122
+ # Option to drop table of shared pgvector connection
123
+ if isinstance(query, DropTables):
124
+ query.tables = [self._check_table(table.parts[-1]) for table in query.tables]
125
+ query_str, params = self.renderer.get_exec_params(query, with_failback=True)
126
+ return self.native_query(query_str, params, no_restrict=True)
127
+ return super().query(query)
128
+
129
+ def native_query(self, query, params=None, no_restrict=False) -> Response:
130
+ """
131
+ Altered `native_query` method of postgres handler.
132
+ Restrict usage of native query from executor with shared pg vector connection
133
+ Exceptions: if it is used by pgvector itself (with no_restrict = True)
134
+ """
120
135
  # Prevent execute native queries
121
- if self._is_shared_db:
136
+ if self._is_shared_db and not no_restrict:
122
137
  return Response(RESPONSE_TYPE.OK)
123
138
  return super().native_query(query, params=params)
124
139
 
@@ -550,6 +565,9 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
550
565
 
551
566
  def create_table(self, table_name: str):
552
567
  """Create a table with a vector column."""
568
+
569
+ table_name = self._check_table(table_name)
570
+
553
571
  with self.connection.cursor() as cur:
554
572
  # For sparse vectors, use sparsevec type
555
573
  vector_column_type = "sparsevec" if self._is_sparse else "vector"
@@ -279,7 +279,7 @@ class PostgresHandler(MetaDatabaseHandler):
279
279
  df.columns = columns
280
280
 
281
281
  @profiler.profile()
282
- def native_query(self, query: str, params=None) -> Response:
282
+ def native_query(self, query: str, params=None, **kwargs) -> Response:
283
283
  """
284
284
  Executes a SQL query on the PostgreSQL database and returns the result.
285
285
 
@@ -466,7 +466,10 @@ class PostgresHandler(MetaDatabaseHandler):
466
466
  AND
467
467
  table_schema = {schema_name}
468
468
  """
469
- result = self.native_query(query)
469
+ # If it is used by pgvector handler - `native_query` method of pgvector handler will be used
470
+ # in that case if shared pgvector db is used - `native_query` will be skipped (return empty result)
471
+ # `no_restrict` flag allows to execute native query, and it will call `native_query` of postgres handler
472
+ result = self.native_query(query, no_restrict=True)
470
473
  result.to_columns_table_response(map_type_fn=_map_type)
471
474
  return result
472
475
 
@@ -547,7 +547,7 @@ class KnowledgeBaseTable:
547
547
  if processed_chunks:
548
548
  content.value = processed_chunks[0].content
549
549
 
550
- query.update_columns[emb_col] = Constant(self._content_to_embeddings(content))
550
+ query.update_columns[emb_col] = Constant(self._content_to_embeddings(content.value))
551
551
 
552
552
  if "metadata" not in query.update_columns:
553
553
  query.update_columns["metadata"] = Constant({})
@@ -1110,6 +1110,9 @@ class KnowledgeBaseController:
1110
1110
  model_record = db.Predictor.query.get(model["id"])
1111
1111
  embedding_model_id = model_record.id
1112
1112
 
1113
+ if model_record.learn_args.get("using", {}).get("sparse"):
1114
+ is_sparse = True
1115
+
1113
1116
  # if params.get("reranking_model", {}) is bool and False we evaluate it to empty dictionary
1114
1117
  reranking_model_params = params.get("reranking_model", {})
1115
1118
 
@@ -1138,7 +1141,6 @@ class KnowledgeBaseController:
1138
1141
  # Add sparse vector support for pgvector
1139
1142
  vector_db_params = {}
1140
1143
  # Check both explicit parameter and model configuration
1141
- is_sparse = is_sparse or model_record.learn_args.get("using", {}).get("sparse")
1142
1144
  if is_sparse:
1143
1145
  vector_db_params["is_sparse"] = True
1144
1146
  if vector_size is not None:
@@ -11,18 +11,16 @@ from .fs import RESOURCE_GROUP, FileStorageFactory, SERVICE_FILES_NAMES
11
11
  from .json import get_json_storage, get_encrypted_json_storage
12
12
 
13
13
 
14
- JSON_STORAGE_FILE = 'json_storage.json'
14
+ JSON_STORAGE_FILE = "json_storage.json"
15
15
 
16
16
 
17
17
  class ModelStorage:
18
18
  """
19
19
  This class deals with all model-related storage requirements, from setting status to storing artifacts.
20
20
  """
21
+
21
22
  def __init__(self, predictor_id):
22
- storageFactory = FileStorageFactory(
23
- resource_group=RESOURCE_GROUP.PREDICTOR,
24
- sync=True
25
- )
23
+ storageFactory = FileStorageFactory(resource_group=RESOURCE_GROUP.PREDICTOR, sync=True)
26
24
  self.fileStorage = storageFactory(predictor_id)
27
25
  self.predictor_id = predictor_id
28
26
 
@@ -43,15 +41,12 @@ class ModelStorage:
43
41
  """
44
42
  model_record = db.Predictor.query.get(self.predictor_id)
45
43
  if check_exists is True and model_record is None:
46
- raise KeyError('Model does not exists')
44
+ raise KeyError("Model does not exists")
47
45
  return model_record
48
46
 
49
47
  def get_info(self):
50
48
  rec = self._get_model_record(self.predictor_id)
51
- return dict(status=rec.status,
52
- to_predict=rec.to_predict,
53
- data=rec.data,
54
- learn_args=rec.learn_args)
49
+ return dict(status=rec.status, to_predict=rec.to_predict, data=rec.data, learn_args=rec.learn_args)
55
50
 
56
51
  def status_set(self, status, status_info=None):
57
52
  rec = self._get_model_record(self.predictor_id)
@@ -95,67 +90,52 @@ class ModelStorage:
95
90
 
96
91
  def folder_get(self, name):
97
92
  # pull folder and return path
98
- name = name.lower().replace(' ', '_')
99
- name = re.sub(r'([^a-z^A-Z^_\d]+)', '_', name)
93
+ name = name.lower().replace(" ", "_")
94
+ name = re.sub(r"([^a-z^A-Z^_\d]+)", "_", name)
100
95
 
101
96
  self.fileStorage.pull_path(name)
102
97
  return str(self.fileStorage.get_path(name))
103
98
 
104
99
  def folder_sync(self, name):
105
100
  # sync abs path
106
- name = name.lower().replace(' ', '_')
107
- name = re.sub(r'([^a-z^A-Z^_\d]+)', '_', name)
101
+ name = name.lower().replace(" ", "_")
102
+ name = re.sub(r"([^a-z^A-Z^_\d]+)", "_", name)
108
103
 
109
104
  self.fileStorage.push_path(name)
110
105
 
111
- def file_list(self):
112
- ...
106
+ def file_list(self): ...
113
107
 
114
- def file_del(self, name):
115
- ...
108
+ def file_del(self, name): ...
116
109
 
117
110
  # jsons
118
111
 
119
112
  def json_set(self, name, data):
120
- json_storage = get_json_storage(
121
- resource_id=self.predictor_id,
122
- resource_group=RESOURCE_GROUP.PREDICTOR
123
- )
113
+ json_storage = get_json_storage(resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR)
124
114
  return json_storage.set(name, data)
125
115
 
126
116
  def encrypted_json_set(self, name: str, data: dict) -> None:
127
117
  json_storage = get_encrypted_json_storage(
128
- resource_id=self.predictor_id,
129
- resource_group=RESOURCE_GROUP.PREDICTOR
118
+ resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR
130
119
  )
131
120
  return json_storage.set(name, data)
132
121
 
133
122
  def json_get(self, name):
134
- json_storage = get_json_storage(
135
- resource_id=self.predictor_id,
136
- resource_group=RESOURCE_GROUP.PREDICTOR
137
- )
123
+ json_storage = get_json_storage(resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR)
138
124
  return json_storage.get(name)
139
125
 
140
126
  def encrypted_json_get(self, name: str) -> dict:
141
127
  json_storage = get_encrypted_json_storage(
142
- resource_id=self.predictor_id,
143
- resource_group=RESOURCE_GROUP.PREDICTOR
128
+ resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR
144
129
  )
145
130
  return json_storage.get(name)
146
131
 
147
- def json_list(self):
148
- ...
132
+ def json_list(self): ...
149
133
 
150
- def json_del(self, name):
151
- ...
134
+ def json_del(self, name): ...
152
135
 
153
136
  def delete(self):
154
137
  self.fileStorage.delete()
155
- json_storage = get_json_storage(
156
- resource_id=self.predictor_id,
157
- resource_group=RESOURCE_GROUP.PREDICTOR
158
- )
138
+ json_storage = get_json_storage(resource_id=self.predictor_id, resource_group=RESOURCE_GROUP.PREDICTOR)
159
139
  json_storage.clean()
160
140
 
161
141
 
@@ -164,29 +144,26 @@ class HandlerStorage:
164
144
  This class deals with all handler-related storage requirements, from storing metadata to synchronizing folders
165
145
  across instances.
166
146
  """
147
+
167
148
  def __init__(self, integration_id: int, root_dir: str = None, is_temporal=False):
168
149
  args = {}
169
150
  if root_dir is not None:
170
- args['root_dir'] = root_dir
171
- storageFactory = FileStorageFactory(
172
- resource_group=RESOURCE_GROUP.INTEGRATION,
173
- sync=False,
174
- **args
175
- )
151
+ args["root_dir"] = root_dir
152
+ storageFactory = FileStorageFactory(resource_group=RESOURCE_GROUP.INTEGRATION, sync=False, **args)
176
153
  self.fileStorage = storageFactory(integration_id)
177
154
  self.integration_id = integration_id
178
155
  self.is_temporal = is_temporal
179
156
  # do not sync with remote storage
180
157
 
181
158
  def __convert_name(self, name):
182
- name = name.lower().replace(' ', '_')
183
- return re.sub(r'([^a-z^A-Z^_\d]+)', '_', name)
159
+ name = name.lower().replace(" ", "_")
160
+ return re.sub(r"([^a-z^A-Z^_\d]+)", "_", name)
184
161
 
185
162
  def is_empty(self):
186
- """ check if storage directory is empty
163
+ """check if storage directory is empty
187
164
 
188
- Returns:
189
- bool: true if dir is empty
165
+ Returns:
166
+ bool: true if dir is empty
190
167
  """
191
168
  for path in self.fileStorage.folder_path.iterdir():
192
169
  if path.is_file() and path.name in SERVICE_FILES_NAMES:
@@ -221,19 +198,17 @@ class HandlerStorage:
221
198
  if not self.is_temporal:
222
199
  self.fileStorage.push_path(name)
223
200
 
224
- def file_list(self):
225
- ...
201
+ def file_list(self): ...
226
202
 
227
- def file_del(self, name):
228
- ...
203
+ def file_del(self, name): ...
229
204
 
230
205
  # folder
231
206
 
232
207
  def folder_get(self, name):
233
- ''' Copies folder from remote to local file system and returns its path
208
+ """Copies folder from remote to local file system and returns its path
234
209
 
235
210
  :param name: name of the folder
236
- '''
211
+ """
237
212
  name = self.__convert_name(name)
238
213
 
239
214
  self.fileStorage.pull_path(name)
@@ -249,38 +224,28 @@ class HandlerStorage:
249
224
  # jsons
250
225
 
251
226
  def json_set(self, name, content):
252
- json_storage = get_json_storage(
253
- resource_id=self.integration_id,
254
- resource_group=RESOURCE_GROUP.INTEGRATION
255
- )
227
+ json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
256
228
  return json_storage.set(name, content)
257
229
 
258
230
  def encrypted_json_set(self, name: str, content: dict) -> None:
259
231
  json_storage = get_encrypted_json_storage(
260
- resource_id=self.integration_id,
261
- resource_group=RESOURCE_GROUP.INTEGRATION
232
+ resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION
262
233
  )
263
234
  return json_storage.set(name, content)
264
235
 
265
236
  def json_get(self, name):
266
- json_storage = get_json_storage(
267
- resource_id=self.integration_id,
268
- resource_group=RESOURCE_GROUP.INTEGRATION
269
- )
237
+ json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
270
238
  return json_storage.get(name)
271
239
 
272
240
  def encrypted_json_get(self, name: str) -> dict:
273
241
  json_storage = get_encrypted_json_storage(
274
- resource_id=self.integration_id,
275
- resource_group=RESOURCE_GROUP.INTEGRATION
242
+ resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION
276
243
  )
277
244
  return json_storage.get(name)
278
245
 
279
- def json_list(self):
280
- ...
246
+ def json_list(self): ...
281
247
 
282
- def json_del(self, name):
283
- ...
248
+ def json_del(self, name): ...
284
249
 
285
250
  def export_files(self) -> bytes:
286
251
  json_storage = self.export_json_storage()
@@ -288,11 +253,11 @@ class HandlerStorage:
288
253
  if self.is_empty() and not json_storage:
289
254
  return None
290
255
 
291
- folder_path = self.folder_get('')
256
+ folder_path = self.folder_get("")
292
257
 
293
258
  zip_fd = io.BytesIO()
294
259
 
295
- with zipfile.ZipFile(zip_fd, 'w', zipfile.ZIP_DEFLATED) as zipf:
260
+ with zipfile.ZipFile(zip_fd, "w", zipfile.ZIP_DEFLATED) as zipf:
296
261
  for root, dirs, files in os.walk(folder_path):
297
262
  for file_name in files:
298
263
  if file_name in SERVICE_FILES_NAMES:
@@ -309,14 +274,13 @@ class HandlerStorage:
309
274
  return zip_fd.read()
310
275
 
311
276
  def import_files(self, content: bytes):
312
-
313
- folder_path = self.folder_get('')
277
+ folder_path = self.folder_get("")
314
278
 
315
279
  zip_fd = io.BytesIO()
316
280
  zip_fd.write(content)
317
281
  zip_fd.seek(0)
318
282
 
319
- with zipfile.ZipFile(zip_fd, 'r') as zip_ref:
283
+ with zipfile.ZipFile(zip_fd, "r") as zip_ref:
320
284
  for name in zip_ref.namelist():
321
285
  # If JSON storage file is in the zip file, import the content to the JSON storage.
322
286
  # Thereafter, remove the file from the folder.
@@ -327,38 +291,36 @@ class HandlerStorage:
327
291
  else:
328
292
  zip_ref.extract(name, folder_path)
329
293
 
330
- self.folder_sync('')
294
+ self.folder_sync("")
331
295
 
332
296
  def export_json_storage(self) -> list[dict]:
333
- json_storage = get_json_storage(
334
- resource_id=self.integration_id,
335
- resource_group=RESOURCE_GROUP.INTEGRATION
336
- )
297
+ json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
337
298
 
338
299
  records = []
339
300
  for record in json_storage.get_all_records():
340
301
  record_dict = record.to_dict()
341
- if record_dict.get('encrypted_content'):
342
- record_dict['encrypted_content'] = record_dict['encrypted_content'].decode()
302
+ if record_dict.get("encrypted_content"):
303
+ record_dict["encrypted_content"] = record_dict["encrypted_content"].decode()
343
304
  records.append(record_dict)
344
305
 
345
306
  return records
346
307
 
347
308
  def import_json_storage(self, records: bytes) -> None:
348
- json_storage = get_json_storage(
349
- resource_id=self.integration_id,
350
- resource_group=RESOURCE_GROUP.INTEGRATION
351
- )
309
+ json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
352
310
 
353
311
  encrypted_json_storage = get_encrypted_json_storage(
354
- resource_id=self.integration_id,
355
- resource_group=RESOURCE_GROUP.INTEGRATION
312
+ resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION
356
313
  )
357
314
 
358
315
  records = json.loads(records.decode())
359
316
 
360
317
  for record in records:
361
- if record['encrypted_content']:
362
- encrypted_json_storage.set_str(record['name'], record['encrypted_content'])
318
+ if record["encrypted_content"]:
319
+ encrypted_json_storage.set_str(record["name"], record["encrypted_content"])
363
320
  else:
364
- json_storage.set(record['name'], record['content'])
321
+ json_storage.set(record["name"], record["content"])
322
+
323
+ def delete(self):
324
+ self.fileStorage.delete()
325
+ json_storage = get_json_storage(resource_id=self.integration_id, resource_group=RESOURCE_GROUP.INTEGRATION)
326
+ json_storage.clean()
@@ -47,7 +47,7 @@ def _compile_interval(element, compiler, **kw):
47
47
  if items[1].upper().endswith("S"):
48
48
  items[1] = items[1][:-1]
49
49
 
50
- if compiler.dialect.driver in ["snowflake"] or compiler.dialect.name in ["postgresql"]:
50
+ if getattr(compiler.dialect, "driver", None) == "snowflake" or compiler.dialect.name == "postgresql":
51
51
  # quote all
52
52
  args = " ".join(map(str, items))
53
53
  args = f"'{args}'"