MindsDB 25.6.4.0__py3-none-any.whl → 25.7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (46) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/command_executor.py +8 -6
  3. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
  4. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
  5. mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
  6. mindsdb/api/executor/planner/query_prepare.py +68 -87
  7. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
  8. mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
  9. mindsdb/api/http/namespaces/file.py +49 -24
  10. mindsdb/api/mcp/start.py +45 -31
  11. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
  12. mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
  13. mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
  14. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
  15. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
  16. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
  17. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  18. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
  19. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
  20. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
  21. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  22. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  23. mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
  24. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
  25. mindsdb/interfaces/agents/agents_controller.py +29 -9
  26. mindsdb/interfaces/agents/langchain_agent.py +7 -5
  27. mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
  28. mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
  29. mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
  30. mindsdb/interfaces/knowledge_base/controller.py +115 -89
  31. mindsdb/interfaces/knowledge_base/evaluate.py +16 -4
  32. mindsdb/interfaces/knowledge_base/executor.py +346 -0
  33. mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
  34. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
  35. mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
  36. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
  37. mindsdb/interfaces/skills/sql_agent.py +181 -130
  38. mindsdb/interfaces/storage/db.py +9 -7
  39. mindsdb/utilities/config.py +12 -1
  40. mindsdb/utilities/exception.py +47 -7
  41. mindsdb/utilities/security.py +54 -11
  42. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +248 -262
  43. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +46 -45
  44. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
  45. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
  46. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import json
3
- from typing import Dict, List, Union, Literal
3
+ from typing import Dict, List, Literal, Tuple
4
4
  from urllib.parse import urlparse
5
5
 
6
6
  import pandas as pd
@@ -16,7 +16,7 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
16
16
  FilterCondition,
17
17
  VectorStoreHandler,
18
18
  DistanceFunction,
19
- TableField
19
+ TableField,
20
20
  )
21
21
  from mindsdb.utilities import log
22
22
  from mindsdb.utilities.profiler import profiler
@@ -32,13 +32,12 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
32
32
  name = "pgvector"
33
33
 
34
34
  def __init__(self, name: str, **kwargs):
35
-
36
35
  super().__init__(name=name, **kwargs)
37
36
  self._is_shared_db = False
38
37
  self._is_vector_registered = False
39
38
  # we get these from the connection args on PostgresHandler parent
40
- self._is_sparse = self.connection_args.get('is_sparse', False)
41
- self._vector_size = self.connection_args.get('vector_size', None)
39
+ self._is_sparse = self.connection_args.get("is_sparse", False)
40
+ self._vector_size = self.connection_args.get("vector_size", None)
42
41
 
43
42
  if self._is_sparse:
44
43
  if not self._vector_size:
@@ -48,20 +47,20 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
48
47
  distance_op = "<#>"
49
48
 
50
49
  else:
51
- distance_op = '<=>'
52
- if 'distance' in self.connection_args:
50
+ distance_op = "<=>"
51
+ if "distance" in self.connection_args:
53
52
  distance_ops = {
54
- 'l1': '<+>',
55
- 'l2': '<->',
56
- 'ip': '<#>', # inner product
57
- 'cosine': '<=>',
58
- 'hamming': '<~>',
59
- 'jaccard': '<%>'
53
+ "l1": "<+>",
54
+ "l2": "<->",
55
+ "ip": "<#>", # inner product
56
+ "cosine": "<=>",
57
+ "hamming": "<~>",
58
+ "jaccard": "<%>",
60
59
  }
61
60
 
62
- distance_op = distance_ops.get(self.connection_args['distance'])
61
+ distance_op = distance_ops.get(self.connection_args["distance"])
63
62
  if distance_op is None:
64
- raise ValueError(f'Wrong distance type. Allowed options are {list(distance_ops.keys())}')
63
+ raise ValueError(f"Wrong distance type. Allowed options are {list(distance_ops.keys())}")
65
64
 
66
65
  self.distance_op = distance_op
67
66
  self.connect()
@@ -72,26 +71,26 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
72
71
 
73
72
  """
74
73
  distance_ops_to_metric_type_map = {
75
- '<->': 'vector_l2_ops',
76
- '<#>': 'vector_ip_ops',
77
- '<=>': 'vector_cosine_ops',
78
- '<+>': 'vector_l1_ops',
79
- '<~>': 'bit_hamming_ops',
80
- '<%>': 'bit_jaccard_ops'
74
+ "<->": "vector_l2_ops",
75
+ "<#>": "vector_ip_ops",
76
+ "<=>": "vector_cosine_ops",
77
+ "<+>": "vector_l1_ops",
78
+ "<~>": "bit_hamming_ops",
79
+ "<%>": "bit_jaccard_ops",
81
80
  }
82
- return distance_ops_to_metric_type_map.get(self.distance_op, 'vector_cosine_ops')
81
+ return distance_ops_to_metric_type_map.get(self.distance_op, "vector_cosine_ops")
83
82
 
84
83
  def _make_connection_args(self):
85
- cloud_pgvector_url = os.environ.get('KB_PGVECTOR_URL')
84
+ cloud_pgvector_url = os.environ.get("KB_PGVECTOR_URL")
86
85
  # if no connection args and shared pg vector defined - use it
87
86
  if len(self.connection_args) == 0 and cloud_pgvector_url is not None:
88
87
  result = urlparse(cloud_pgvector_url)
89
88
  self.connection_args = {
90
- 'host': result.hostname,
91
- 'port': result.port,
92
- 'user': result.username,
93
- 'password': result.password,
94
- 'database': result.path[1:]
89
+ "host": result.hostname,
90
+ "port": result.port,
91
+ "user": result.username,
92
+ "password": result.password,
93
+ "database": result.path[1:],
95
94
  }
96
95
  self._is_shared_db = True
97
96
  return super()._make_connection_args()
@@ -132,9 +131,7 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
132
131
 
133
132
  except psycopg.Error as e:
134
133
  self.connection.rollback()
135
- logger.error(
136
- f"Error loading pg_vector extension, ensure you have installed it before running, {e}!"
137
- )
134
+ logger.error(f"Error loading pg_vector extension, ensure you have installed it before running, {e}!")
138
135
  raise
139
136
 
140
137
  # register vector type with psycopg2 connection
@@ -143,19 +140,33 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
143
140
 
144
141
  return self.connection
145
142
 
143
+ def add_full_text_index(self, table_name: str, column_name: str) -> Response:
144
+ """
145
+ Add a full text index to the specified column of the table.
146
+ Args:
147
+ table_name (str): Name of the table to add the index to.
148
+ column_name (str): Name of the column to add the index to.
149
+ Returns:
150
+ Response: Response object indicating success or failure.
151
+ """
152
+ table_name = self._check_table(table_name)
153
+ query = f"CREATE INDEX IF NOT EXISTS {table_name}_{column_name}_fts_idx ON {table_name} USING gin(to_tsvector('english', {column_name}))"
154
+ self.raw_query(query)
155
+ return Response(RESPONSE_TYPE.OK)
156
+
146
157
  @staticmethod
147
- def _translate_conditions(conditions: List[FilterCondition]) -> Union[dict, None]:
158
+ def _translate_conditions(conditions: List[FilterCondition]) -> Tuple[List[dict], dict]:
148
159
  """
149
160
  Translate filter conditions to a dictionary
150
161
  """
151
162
 
152
163
  if conditions is None:
153
- return {}
164
+ conditions = []
154
165
 
155
- filter_conditions = {}
166
+ filter_conditions = []
167
+ embedding_condition = None
156
168
 
157
169
  for condition in conditions:
158
-
159
170
  parts = condition.column.split(".")
160
171
  key = parts[0]
161
172
  # converts 'col.el1.el2' to col->'el1'->>'el2'
@@ -167,12 +178,25 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
167
178
  # last element
168
179
  key += f" ->> '{parts[-1]}'"
169
180
 
170
- filter_conditions[key] = {
181
+ type_cast = None
182
+ if isinstance(condition.value, int):
183
+ type_cast = "int"
184
+ elif isinstance(condition.value, float):
185
+ type_cast = "float"
186
+ if type_cast is not None:
187
+ key = f"({key})::{type_cast}"
188
+
189
+ item = {
190
+ "name": key,
171
191
  "op": condition.op.value,
172
192
  "value": condition.value,
173
193
  }
194
+ if key == "embeddings":
195
+ embedding_condition = item
196
+ else:
197
+ filter_conditions.append(item)
174
198
 
175
- return filter_conditions
199
+ return filter_conditions, embedding_condition
176
200
 
177
201
  @staticmethod
178
202
  def _construct_where_clause(filter_conditions=None):
@@ -184,15 +208,18 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
184
208
 
185
209
  where_clauses = []
186
210
 
187
- for key, value in filter_conditions.items():
188
- if key == "embeddings":
189
- continue
190
- if value['op'].lower() in ('in', 'not in'):
191
- values = list(repr(i) for i in value['value'])
192
- value['value'] = '({})'.format(', '.join(values))
211
+ for item in filter_conditions:
212
+ key = item["name"]
213
+
214
+ if item["op"].lower() in ("in", "not in"):
215
+ values = list(repr(i) for i in item["value"])
216
+ item["value"] = "({})".format(", ".join(values))
193
217
  else:
194
- value['value'] = repr(value['value'])
195
- where_clauses.append(f'{key} {value["op"]} {value["value"]}')
218
+ if item["value"] is None:
219
+ item["value"] = "null"
220
+ else:
221
+ item["value"] = repr(item["value"])
222
+ where_clauses.append(f"{key} {item['op']} {item['value']}")
196
223
 
197
224
  if len(where_clauses) > 1:
198
225
  return f"WHERE {' AND '.join(where_clauses)}"
@@ -207,7 +234,6 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
207
234
  offset_clause: str,
208
235
  limit_clause: str,
209
236
  ) -> str:
210
-
211
237
  return f"{where_clause} {offset_clause} {limit_clause}"
212
238
 
213
239
  def _build_select_query(
@@ -225,10 +251,7 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
225
251
  offset_clause = f"OFFSET {offset}" if offset else ""
226
252
 
227
253
  # translate filter conditions to dictionary
228
- filter_conditions = self._translate_conditions(conditions)
229
-
230
- # check if search vector is in filter conditions
231
- embedding_search = filter_conditions.get("embeddings", None)
254
+ filter_conditions, embedding_search = self._translate_conditions(conditions)
232
255
 
233
256
  # given filter conditions, construct where clause
234
257
  where_clause = self._construct_where_clause(filter_conditions)
@@ -243,47 +266,41 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
243
266
  else:
244
267
  modified_columns.append(col)
245
268
  else:
246
- modified_columns = ['id', 'content', 'embeddings', 'metadata']
269
+ modified_columns = ["id", "content", "embeddings", "metadata"]
247
270
  has_distance = True
248
271
 
249
- targets = ', '.join(modified_columns)
272
+ targets = ", ".join(modified_columns)
250
273
 
251
- if filter_conditions:
274
+ if embedding_search:
275
+ search_vector = embedding_search["value"]
252
276
 
253
- if embedding_search:
254
- search_vector = filter_conditions["embeddings"]["value"]
255
- filter_conditions.pop("embeddings")
277
+ if self._is_sparse:
278
+ # Convert dict to sparse vector if needed
279
+ if isinstance(search_vector, dict):
280
+ from pgvector.utils import SparseVector
256
281
 
257
- if self._is_sparse:
258
- # Convert dict to sparse vector if needed
259
- if isinstance(search_vector, dict):
260
- from pgvector.utils import SparseVector
261
- embedding = SparseVector(search_vector, self._vector_size)
262
- search_vector = embedding.to_text()
263
- else:
264
- # Convert list to vector string if needed
265
- if isinstance(search_vector, list):
266
- search_vector = f"[{','.join(str(x) for x in search_vector)}]"
282
+ embedding = SparseVector(search_vector, self._vector_size)
283
+ search_vector = embedding.to_text()
284
+ else:
285
+ # Convert list to vector string if needed
286
+ if isinstance(search_vector, list):
287
+ search_vector = f"[{','.join(str(x) for x in search_vector)}]"
267
288
 
268
- # Calculate distance as part of the query if needed
269
- if has_distance:
270
- targets = f"{targets}, (embeddings {self.distance_op} '{search_vector}') as distance"
289
+ # Calculate distance as part of the query if needed
290
+ if has_distance:
291
+ targets = f"{targets}, (embeddings {self.distance_op} '{search_vector}') as distance"
271
292
 
272
- return f"SELECT {targets} FROM {table_name} {where_clause} ORDER BY embeddings {self.distance_op} '{search_vector}' ASC {limit_clause} {offset_clause} "
273
-
274
- else:
275
- # if filter conditions, return rows that satisfy the conditions
276
- return f"SELECT {targets} FROM {table_name} {where_clause} {limit_clause} {offset_clause}"
293
+ return f"SELECT {targets} FROM {table_name} {where_clause} ORDER BY embeddings {self.distance_op} '{search_vector}' ASC {limit_clause} {offset_clause} "
277
294
 
278
295
  else:
279
- # if no filter conditions, return all rows
280
- return f"SELECT {targets} FROM {table_name} {limit_clause} {offset_clause}"
296
+ # if filter conditions, return rows that satisfy the conditions
297
+ return f"SELECT {targets} FROM {table_name} {where_clause} {limit_clause} {offset_clause}"
281
298
 
282
299
  def _check_table(self, table_name: str):
283
300
  # Apply namespace for a user
284
301
  if self._is_shared_db:
285
- company_id = ctx.company_id or 'x'
286
- return f't_{company_id}_{table_name}'
302
+ company_id = ctx.company_id or "x"
303
+ return f"t_{company_id}_{table_name}"
287
304
  return table_name
288
305
 
289
306
  def select(
@@ -318,9 +335,9 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
318
335
  query: str = None,
319
336
  metadata: Dict[str, str] = None,
320
337
  distance_function=DistanceFunction.COSINE_DISTANCE,
321
- **kwargs
338
+ **kwargs,
322
339
  ) -> pd.DataFrame:
323
- '''
340
+ """
324
341
  Executes a hybrid search, combining semantic search and one or both of keyword/metadata search.
325
342
 
326
343
  For insight on the query construction, see: https://docs.pgvecto.rs/use-case/hybrid-search.html#advanced-search-merge-the-results-of-full-text-search-and-vector-search.
@@ -340,23 +357,25 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
340
357
 
341
358
  Returns:
342
359
  df(pd.DataFrame): Hybrid search result, sorted by hybrid search rank
343
- '''
360
+ """
344
361
  if query is None and metadata is None:
345
- raise ValueError('Must provide at least one of: query for keyword search, or metadata filters. For only embeddings search, use normal search instead.')
346
-
347
- id_column_name = kwargs.get('id_column_name', 'id')
348
- content_column_name = kwargs.get('content_column_name', 'content')
349
- embeddings_column_name = kwargs.get('embeddings_column_name', 'embeddings')
350
- metadata_column_name = kwargs.get('metadata_column_name', 'metadata')
362
+ raise ValueError(
363
+ "Must provide at least one of: query for keyword search, or metadata filters. For only embeddings search, use normal search instead."
364
+ )
365
+
366
+ id_column_name = kwargs.get("id_column_name", "id")
367
+ content_column_name = kwargs.get("content_column_name", "content")
368
+ embeddings_column_name = kwargs.get("embeddings_column_name", "embeddings")
369
+ metadata_column_name = kwargs.get("metadata_column_name", "metadata")
351
370
  # Filter by given metadata for semantic search & full text search CTEs, if present.
352
- where_clause = ' WHERE '
371
+ where_clause = " WHERE "
353
372
  if metadata is None:
354
- where_clause = ''
373
+ where_clause = ""
355
374
  metadata = {}
356
375
  for i, (k, v) in enumerate(metadata.items()):
357
376
  where_clause += f"{metadata_column_name}->>'{k}' = '{v}'"
358
377
  if i < len(metadata.items()) - 1:
359
- where_clause += ' AND '
378
+ where_clause += " AND "
360
379
 
361
380
  # See https://docs.pgvecto.rs/use-case/hybrid-search.html#advanced-search-merge-the-results-of-full-text-search-and-vector-search.
362
381
  #
@@ -381,47 +400,51 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
381
400
  # Or, if we are only doing metadata search, we leave out the JOIN & full text search CTEs.
382
401
  #
383
402
  # We calculate the final "hybrid" rank by summing the reciprocals of the ranks from each individual CTE.
384
- semantic_search_cte = f'''WITH semantic_search AS (
403
+ semantic_search_cte = f"""WITH semantic_search AS (
385
404
  SELECT {id_column_name}, {content_column_name}, {embeddings_column_name},
386
405
  RANK () OVER (ORDER BY {embeddings_column_name} {distance_function.value} '{str(embeddings)}') AS rank
387
406
  FROM {table_name}{where_clause}
388
407
  ORDER BY {embeddings_column_name} {distance_function.value} '{str(embeddings)}'::vector
389
- )'''
408
+ )"""
390
409
 
391
- full_text_search_cte = ''
410
+ full_text_search_cte = ""
392
411
  if query is not None:
393
- ts_vector_clause = f"WHERE to_tsvector('english', {content_column_name}) @@ plainto_tsquery('english', '{query}')"
412
+ ts_vector_clause = (
413
+ f"WHERE to_tsvector('english', {content_column_name}) @@ plainto_tsquery('english', '{query}')"
414
+ )
394
415
  if metadata:
395
- ts_vector_clause = f"AND to_tsvector('english', {content_column_name}) @@ plainto_tsquery('english', '{query}')"
396
- full_text_search_cte = f''',
416
+ ts_vector_clause = (
417
+ f"AND to_tsvector('english', {content_column_name}) @@ plainto_tsquery('english', '{query}')"
418
+ )
419
+ full_text_search_cte = f""",
397
420
  full_text_search AS (
398
421
  SELECT {id_column_name}, {content_column_name}, {embeddings_column_name},
399
422
  RANK () OVER (ORDER BY ts_rank(to_tsvector('english', {content_column_name}), plainto_tsquery('english', '{query}')) DESC) AS rank
400
423
  FROM {table_name}{where_clause}
401
424
  {ts_vector_clause}
402
425
  ORDER BY ts_rank(to_tsvector('english', {content_column_name}), plainto_tsquery('english', '{query}')) DESC
403
- )'''
426
+ )"""
404
427
 
405
- hybrid_select = '''
406
- SELECT * FROM semantic_search'''
428
+ hybrid_select = """
429
+ SELECT * FROM semantic_search"""
407
430
  if query is not None:
408
- hybrid_select = f'''
431
+ hybrid_select = f"""
409
432
  SELECT
410
433
  COALESCE(semantic_search.{id_column_name}, full_text_search.{id_column_name}) AS id,
411
434
  COALESCE(semantic_search.{content_column_name}, full_text_search.{content_column_name}) AS content,
412
435
  COALESCE(semantic_search.{embeddings_column_name}, full_text_search.{embeddings_column_name}) AS embeddings,
413
436
  COALESCE(1.0 / (1 + semantic_search.rank), 0.0) + COALESCE(1.0 / (1 + full_text_search.rank), 0.0) AS rank
414
437
  FROM semantic_search FULL OUTER JOIN full_text_search USING ({id_column_name}) ORDER BY rank DESC;
415
- '''
438
+ """
416
439
 
417
- full_search_query = f'{semantic_search_cte}{full_text_search_cte}{hybrid_select}'
440
+ full_search_query = f"{semantic_search_cte}{full_text_search_cte}{hybrid_select}"
418
441
  return self.raw_query(full_search_query)
419
442
 
420
443
  def create_table(self, table_name: str):
421
444
  """Create a table with a vector column."""
422
445
  with self.connection.cursor() as cur:
423
446
  # For sparse vectors, use sparsevec type
424
- vector_column_type = 'sparsevec' if self._is_sparse else 'vector'
447
+ vector_column_type = "sparsevec" if self._is_sparse else "vector"
425
448
 
426
449
  # Vector size is required for sparse vectors, optional for dense
427
450
  if self._is_sparse and not self._vector_size:
@@ -429,8 +452,8 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
429
452
 
430
453
  # Add vector size specification only if provided
431
454
  size_spec = f"({self._vector_size})" if self._vector_size is not None else "()"
432
- if vector_column_type == 'vector':
433
- size_spec = ''
455
+ if vector_column_type == "vector":
456
+ size_spec = ""
434
457
 
435
458
  cur.execute(f"""
436
459
  CREATE TABLE IF NOT EXISTS {table_name} (
@@ -442,16 +465,14 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
442
465
  """)
443
466
  self.connection.commit()
444
467
 
445
- def insert(
446
- self, table_name: str, data: pd.DataFrame
447
- ):
468
+ def insert(self, table_name: str, data: pd.DataFrame):
448
469
  """
449
470
  Insert data into the pgvector table database.
450
471
  """
451
472
  table_name = self._check_table(table_name)
452
473
 
453
- if 'metadata' in data.columns:
454
- data['metadata'] = data['metadata'].apply(json.dumps)
474
+ if "metadata" in data.columns:
475
+ data["metadata"] = data["metadata"].apply(json.dumps)
455
476
 
456
477
  resp = super().insert(table_name, data)
457
478
  if resp.resp_type == RESPONSE_TYPE.ERROR:
@@ -459,9 +480,7 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
459
480
  if resp.resp_type == RESPONSE_TYPE.TABLE:
460
481
  return resp.data_frame
461
482
 
462
- def update(
463
- self, table_name: str, data: pd.DataFrame, key_columns: List[str] = None
464
- ):
483
+ def update(self, table_name: str, data: pd.DataFrame, key_columns: List[str] = None):
465
484
  """
466
485
  Udate data into the pgvector table database.
467
486
  """
@@ -471,43 +490,32 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
471
490
  update_columns = {}
472
491
 
473
492
  for col in data.columns:
474
- value = Parameter('%s')
493
+ value = Parameter("%s")
475
494
 
476
495
  if col in key_columns:
477
- cond = BinaryOperation(
478
- op='=',
479
- args=[Identifier(col), value]
480
- )
496
+ cond = BinaryOperation(op="=", args=[Identifier(col), value])
481
497
  if where is None:
482
498
  where = cond
483
499
  else:
484
- where = BinaryOperation(
485
- op='AND',
486
- args=[where, cond]
487
- )
500
+ where = BinaryOperation(op="AND", args=[where, cond])
488
501
  else:
489
502
  update_columns[col] = value
490
503
 
491
- query = Update(
492
- table=Identifier(table_name),
493
- update_columns=update_columns,
494
- where=where
495
- )
504
+ query = Update(table=Identifier(table_name), update_columns=update_columns, where=where)
496
505
 
497
506
  if TableField.METADATA.value in data.columns:
507
+
498
508
  def fnc(v):
499
509
  if isinstance(v, dict):
500
510
  return json.dumps(v)
511
+
501
512
  data[TableField.METADATA.value] = data[TableField.METADATA.value].apply(fnc)
502
513
 
503
514
  data = data.astype({TableField.METADATA.value: str})
504
515
 
505
516
  transposed_data = []
506
517
  for _, record in data.iterrows():
507
- row = [
508
- record[col]
509
- for col in update_columns.keys()
510
- ]
518
+ row = [record[col] for col in update_columns.keys()]
511
519
  for key_column in key_columns:
512
520
  row.append(record[key_column])
513
521
  transposed_data.append(row)
@@ -515,17 +523,13 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
515
523
  query_str = self.renderer.get_string(query)
516
524
  self.raw_query(query_str, transposed_data)
517
525
 
518
- def delete(
519
- self, table_name: str, conditions: List[FilterCondition] = None
520
- ):
526
+ def delete(self, table_name: str, conditions: List[FilterCondition] = None):
521
527
  table_name = self._check_table(table_name)
522
528
 
523
- filter_conditions = self._translate_conditions(conditions)
529
+ filter_conditions, _ = self._translate_conditions(conditions)
524
530
  where_clause = self._construct_where_clause(filter_conditions)
525
531
 
526
- query = (
527
- f"DELETE FROM {table_name} {where_clause}"
528
- )
532
+ query = f"DELETE FROM {table_name} {where_clause}"
529
533
  self.raw_query(query)
530
534
 
531
535
  def drop_table(self, table_name: str, if_exists=True):
@@ -535,7 +539,13 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
535
539
  table_name = self._check_table(table_name)
536
540
  self.raw_query(f"DROP TABLE IF EXISTS {table_name}")
537
541
 
538
- def create_index(self, table_name: str, column_name: str = "embeddings", index_type: Literal['ivfflat', 'hnsw'] = "hnsw", metric_type: str = None):
542
+ def create_index(
543
+ self,
544
+ table_name: str,
545
+ column_name: str = "embeddings",
546
+ index_type: Literal["ivfflat", "hnsw"] = "hnsw",
547
+ metric_type: str = None,
548
+ ):
539
549
  """
540
550
  Create an index on the pgvector table.
541
551
  Args:
@@ -547,7 +557,7 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
547
557
  if metric_type is None:
548
558
  metric_type = self.get_metric_type()
549
559
  # Check if the index type is supported
550
- if index_type not in ['ivfflat', 'hnsw']:
560
+ if index_type not in ["ivfflat", "hnsw"]:
551
561
  raise ValueError("Invalid index type. Supported types are 'ivfflat' and 'hnsw'.")
552
562
  table_name = self._check_table(table_name)
553
563
  # first we make sure embedding dimension is set
@@ -476,7 +476,7 @@ class PostgresHandler(MetaDatabaseHandler):
476
476
  config = self._make_connection_args()
477
477
  config["autocommit"] = True
478
478
 
479
- conn = psycopg.connect(connect_timeout=10, **config)
479
+ conn = psycopg.connect(**config)
480
480
 
481
481
  # create db trigger
482
482
  trigger_name = f"mdb_notify_{table_name}"
@@ -1 +1,2 @@
1
1
  statsforecast==1.6.0
2
+ scipy==1.15.3
@@ -1 +1,2 @@
1
1
  statsforecast==1.6.0
2
+ scipy==1.15.3