MindsDB 25.7.2.0__py3-none-any.whl → 25.7.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (69) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +1 -1
  3. mindsdb/api/a2a/common/server/server.py +16 -6
  4. mindsdb/api/executor/command_executor.py +213 -137
  5. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
  6. mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
  7. mindsdb/api/executor/planner/plan_join.py +3 -0
  8. mindsdb/api/executor/planner/plan_join_ts.py +117 -100
  9. mindsdb/api/executor/planner/query_planner.py +1 -0
  10. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
  11. mindsdb/api/http/initialize.py +16 -43
  12. mindsdb/api/http/namespaces/agents.py +24 -21
  13. mindsdb/api/http/namespaces/chatbots.py +83 -120
  14. mindsdb/api/http/namespaces/file.py +1 -1
  15. mindsdb/api/http/namespaces/jobs.py +38 -60
  16. mindsdb/api/http/namespaces/tree.py +69 -61
  17. mindsdb/api/mcp/start.py +2 -0
  18. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
  19. mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
  20. mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
  21. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
  22. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
  23. mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
  24. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
  25. mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
  26. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -76
  27. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  28. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +16 -3
  29. mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
  30. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
  31. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
  32. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
  33. mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
  34. mindsdb/integrations/handlers/salesforce_handler/constants.py +208 -0
  35. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +142 -81
  36. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -4
  37. mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
  38. mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
  39. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
  40. mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
  41. mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
  42. mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
  43. mindsdb/integrations/utilities/handler_utils.py +32 -12
  44. mindsdb/interfaces/agents/agents_controller.py +169 -110
  45. mindsdb/interfaces/agents/langchain_agent.py +10 -3
  46. mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -8
  47. mindsdb/interfaces/database/database.py +38 -13
  48. mindsdb/interfaces/database/integrations.py +20 -5
  49. mindsdb/interfaces/database/projects.py +63 -16
  50. mindsdb/interfaces/database/views.py +86 -60
  51. mindsdb/interfaces/jobs/jobs_controller.py +103 -110
  52. mindsdb/interfaces/knowledge_base/controller.py +33 -5
  53. mindsdb/interfaces/knowledge_base/evaluate.py +53 -9
  54. mindsdb/interfaces/knowledge_base/executor.py +24 -0
  55. mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
  56. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
  57. mindsdb/interfaces/query_context/context_controller.py +100 -133
  58. mindsdb/interfaces/skills/skills_controller.py +18 -6
  59. mindsdb/interfaces/storage/db.py +40 -6
  60. mindsdb/interfaces/variables/variables_controller.py +8 -15
  61. mindsdb/utilities/config.py +3 -3
  62. mindsdb/utilities/functions.py +72 -60
  63. mindsdb/utilities/log.py +38 -6
  64. mindsdb/utilities/ps.py +7 -7
  65. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/METADATA +262 -263
  66. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/RECORD +69 -68
  67. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/WHEEL +0 -0
  68. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/licenses/LICENSE +0 -0
  69. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,19 @@ from urllib.parse import urlparse
5
5
 
6
6
  import pandas as pd
7
7
  import psycopg
8
- from mindsdb_sql_parser.ast import Parameter, Identifier, Update, BinaryOperation
8
+ from mindsdb_sql_parser.ast import (
9
+ Parameter,
10
+ Identifier,
11
+ BinaryOperation,
12
+ Tuple as AstTuple,
13
+ Constant,
14
+ Select,
15
+ OrderBy,
16
+ TypeCast,
17
+ Delete,
18
+ Update,
19
+ Function,
20
+ )
9
21
  from pgvector.psycopg import register_vector
10
22
 
11
23
  from mindsdb.integrations.handlers.postgres_handler.postgres_handler import (
@@ -17,6 +29,7 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
17
29
  VectorStoreHandler,
18
30
  DistanceFunction,
19
31
  TableField,
32
+ FilterOperator,
20
33
  )
21
34
  from mindsdb.integrations.libs.keyword_search_base import KeywordSearchBase
22
35
  from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs
@@ -169,31 +182,42 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
169
182
  embedding_condition = None
170
183
 
171
184
  for condition in conditions:
185
+ is_embedding = condition.column == "embeddings"
186
+
172
187
  parts = condition.column.split(".")
173
- key = parts[0]
188
+ key = Identifier(parts[0])
189
+
174
190
  # converts 'col.el1.el2' to col->'el1'->>'el2'
175
191
  if len(parts) > 1:
176
192
  # intermediate elements
177
193
  for el in parts[1:-1]:
178
- key += f" -> '{el}'"
194
+ key = BinaryOperation(op="->", args=[key, Constant(el)])
179
195
 
180
196
  # last element
181
- key += f" ->> '{parts[-1]}'"
197
+ key = BinaryOperation(op="->>", args=[key, Constant(parts[-1])])
182
198
 
183
199
  type_cast = None
184
- if isinstance(condition.value, int):
200
+ value = condition.value
201
+ if (
202
+ isinstance(value, list)
203
+ and len(value) > 0
204
+ and condition.op in (FilterOperator.IN, FilterOperator.NOT_IN)
205
+ ):
206
+ value = condition.value[0]
207
+
208
+ if isinstance(value, int):
185
209
  type_cast = "int"
186
- elif isinstance(condition.value, float):
210
+ elif isinstance(value, float):
187
211
  type_cast = "float"
188
212
  if type_cast is not None:
189
- key = f"({key})::{type_cast}"
213
+ key = TypeCast(type_cast, key)
190
214
 
191
215
  item = {
192
216
  "name": key,
193
217
  "op": condition.op.value,
194
218
  "value": condition.value,
195
219
  }
196
- if key == "embeddings":
220
+ if is_embedding:
197
221
  embedding_condition = item
198
222
  else:
199
223
  filter_conditions.append(item)
@@ -205,64 +229,24 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
205
229
  """
206
230
  Construct where clauses from filter conditions
207
231
  """
208
- if filter_conditions is None:
209
- return ""
210
232
 
211
- where_clauses = []
233
+ where_clause = None
212
234
 
213
235
  for item in filter_conditions:
214
236
  key = item["name"]
215
237
 
216
238
  if item["op"].lower() in ("in", "not in"):
217
- values = list(repr(i) for i in item["value"])
218
- item["value"] = "({})".format(", ".join(values))
239
+ values = [Constant(i) for i in item["value"]]
240
+ value = AstTuple(values)
219
241
  else:
220
- if item["value"] is None:
221
- item["value"] = "null"
222
- else:
223
- item["value"] = repr(item["value"])
224
- where_clauses.append(f"{key} {item['op']} {item['value']}")
225
-
226
- if len(where_clauses) > 1:
227
- return f"WHERE {' AND '.join(where_clauses)}"
228
- elif len(where_clauses) == 1:
229
- return f"WHERE {where_clauses[0]}"
230
- else:
231
- return ""
232
-
233
- @staticmethod
234
- def _construct_where_clause_with_keywords(filter_conditions=None, keyword_query=None, content_column_name=None):
235
- if not keyword_query or not content_column_name:
236
- return PgVectorHandler._construct_where_clause(filter_conditions)
237
-
238
- keyword_query_condition = (
239
- f"""to_tsvector('english', {content_column_name}) @@ websearch_to_tsquery('english', '{keyword_query}')"""
240
- )
241
- if filter_conditions is None:
242
- return ""
243
-
244
- where_clauses = []
245
-
246
- for item in filter_conditions:
247
- key = item["name"]
242
+ value = Constant(item["value"])
243
+ condition = BinaryOperation(op=item["op"], args=[key, value])
248
244
 
249
- if item["op"].lower() in ("in", "not in"):
250
- values = list(repr(i) for i in item["value"])
251
- item["value"] = "({})".format(", ".join(values))
245
+ if where_clause is None:
246
+ where_clause = condition
252
247
  else:
253
- if item["value"] is None:
254
- item["value"] = "null"
255
- else:
256
- item["value"] = repr(item["value"])
257
- where_clauses.append(f"{key} {item['op']} {item['value']}")
258
-
259
- where_clauses.append(keyword_query_condition)
260
- if len(where_clauses) > 1:
261
- return f"WHERE {' AND '.join(where_clauses)}"
262
- elif len(where_clauses) == 1:
263
- return f"WHERE {where_clauses[0]}"
264
- else:
265
- return ""
248
+ where_clause = BinaryOperation(op="AND", args=[where_clause, condition])
249
+ return where_clause
266
250
 
267
251
  @staticmethod
268
252
  def _construct_full_after_from_clause(
@@ -275,9 +259,8 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
275
259
  def _build_keyword_bm25_query(
276
260
  self,
277
261
  table_name: str,
278
- query: str,
262
+ keyword_search_args: KeywordSearchArgs,
279
263
  columns: List[str] = None,
280
- content_column_name: str = "content",
281
264
  conditions: List[FilterCondition] = None,
282
265
  limit: int = None,
283
266
  offset: int = None,
@@ -286,21 +269,44 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
286
269
  columns = ["id", "content", "metadata"]
287
270
 
288
271
  filter_conditions, _ = self._translate_conditions(conditions)
272
+ where_clause = self._construct_where_clause(filter_conditions)
289
273
 
290
- # given filter conditions, construct where clause
291
- where_clause = self._construct_where_clause_with_keywords(filter_conditions, query, content_column_name)
292
-
293
- query = f"""
294
- SELECT
295
- {", ".join(columns)},
296
- ts_rank_cd(to_tsvector('english', {content_column_name}), websearch_to_tsquery('english', '{query}')) as distance
297
- FROM
298
- {table_name}
299
- {where_clause if where_clause else ""}
300
- {f"LIMIT {limit}" if limit else ""}
301
- {f"OFFSET {offset}" if offset else ""};"""
274
+ if keyword_search_args:
275
+ keyword_query_condition = BinaryOperation(
276
+ op="@@",
277
+ args=[
278
+ Function("to_tsvector", args=[Constant("english"), Identifier(keyword_search_args.column)]),
279
+ Function("websearch_to_tsquery", args=[Constant("english"), Constant(keyword_search_args.query)]),
280
+ ],
281
+ )
302
282
 
303
- return query
283
+ if where_clause:
284
+ where_clause = BinaryOperation(op="AND", args=[where_clause, keyword_query_condition])
285
+ else:
286
+ where_clause = keyword_query_condition
287
+
288
+ distance = Function(
289
+ "ts_rank_cd",
290
+ args=[
291
+ Function("to_tsvector", args=[Constant("english"), Identifier(keyword_search_args.column)]),
292
+ Function("websearch_to_tsquery", args=[Constant("english"), Constant(keyword_search_args.query)]),
293
+ ],
294
+ alias=Identifier("distance"),
295
+ )
296
+
297
+ targets = [Identifier(col) for col in columns]
298
+ targets.append(distance)
299
+
300
+ limit_clause = Constant(limit) if limit else None
301
+ offset_clause = Constant(offset) if offset else None
302
+
303
+ return Select(
304
+ targets=targets,
305
+ from_table=Identifier(table_name),
306
+ where=where_clause,
307
+ limit=limit_clause,
308
+ offset=offset_clause,
309
+ )
304
310
 
305
311
  def _build_select_query(
306
312
  self,
@@ -309,12 +315,12 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
309
315
  conditions: List[FilterCondition] = None,
310
316
  limit: int = None,
311
317
  offset: int = None,
312
- ) -> str:
318
+ ) -> Select:
313
319
  """
314
320
  given inputs, build string query
315
321
  """
316
- limit_clause = f"LIMIT {limit}" if limit else ""
317
- offset_clause = f"OFFSET {offset}" if offset else ""
322
+ limit_clause = Constant(limit) if limit else None
323
+ offset_clause = Constant(offset) if offset else None
318
324
 
319
325
  # translate filter conditions to dictionary
320
326
  filter_conditions, embedding_search = self._translate_conditions(conditions)
@@ -335,7 +341,15 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
335
341
  modified_columns = ["id", "content", "embeddings", "metadata"]
336
342
  has_distance = True
337
343
 
338
- targets = ", ".join(modified_columns)
344
+ targets = [Identifier(col) for col in modified_columns]
345
+
346
+ query = Select(
347
+ targets=targets,
348
+ from_table=Identifier(table_name),
349
+ where=where_clause,
350
+ limit=limit_clause,
351
+ offset=offset_clause,
352
+ )
339
353
 
340
354
  if embedding_search:
341
355
  search_vector = embedding_search["value"]
@@ -352,15 +366,18 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
352
366
  if isinstance(search_vector, list):
353
367
  search_vector = f"[{','.join(str(x) for x in search_vector)}]"
354
368
 
369
+ vector_op = BinaryOperation(
370
+ op=self.distance_op,
371
+ args=[Identifier("embeddings"), Constant(search_vector)],
372
+ alias=Identifier("distance"),
373
+ )
355
374
  # Calculate distance as part of the query if needed
356
375
  if has_distance:
357
- targets = f"{targets}, (embeddings {self.distance_op} '{search_vector}') as distance"
376
+ query.targets.append(vector_op)
358
377
 
359
- return f"SELECT {targets} FROM {table_name} {where_clause} ORDER BY embeddings {self.distance_op} '{search_vector}' ASC {limit_clause} {offset_clause} "
378
+ query.order_by = [OrderBy(vector_op, direction="ASC")]
360
379
 
361
- else:
362
- # if filter conditions, return rows that satisfy the conditions
363
- return f"SELECT {targets} FROM {table_name} {where_clause} {limit_clause} {offset_clause}"
380
+ return query
364
381
 
365
382
  def _check_table(self, table_name: str):
366
383
  # Apply namespace for a user
@@ -386,8 +403,8 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
386
403
  columns = ["id", "content", "embeddings", "metadata"]
387
404
 
388
405
  query = self._build_select_query(table_name, columns, conditions, limit, offset)
389
-
390
- result = self.raw_query(query)
406
+ query_str = self.renderer.get_string(query, with_failback=True)
407
+ result = self.raw_query(query_str)
391
408
 
392
409
  # ensure embeddings are returned as string so they can be parsed by mindsdb
393
410
  if "embeddings" in columns:
@@ -408,12 +425,10 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
408
425
 
409
426
  if columns is None:
410
427
  columns = ["id", "content", "embeddings", "metadata"]
411
- content_column_name = keyword_search_args.column
412
- query = self._build_keyword_bm25_query(
413
- table_name, keyword_search_args.query, columns, content_column_name, conditions, limit, offset
414
- )
415
428
 
416
- result = self.raw_query(query)
429
+ query = self._build_keyword_bm25_query(table_name, keyword_search_args, columns, conditions, limit, offset)
430
+ query_str = self.renderer.get_string(query, with_failback=True)
431
+ result = self.raw_query(query_str)
417
432
 
418
433
  # ensure embeddings are returned as string so they can be parsed by mindsdb
419
434
  if "embeddings" in columns:
@@ -622,8 +637,9 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
622
637
  filter_conditions, _ = self._translate_conditions(conditions)
623
638
  where_clause = self._construct_where_clause(filter_conditions)
624
639
 
625
- query = f"DELETE FROM {table_name} {where_clause}"
626
- self.raw_query(query)
640
+ query = Delete(table=Identifier(table_name), where=where_clause)
641
+ query_str = self.renderer.get_string(query, with_failback=True)
642
+ self.raw_query(query_str)
627
643
 
628
644
  def drop_table(self, table_name: str, if_exists=True):
629
645
  """
@@ -1,5 +1,3 @@
1
- import csv
2
- import io
3
1
  import time
4
2
  import json
5
3
  from typing import Optional, Any
@@ -625,7 +623,7 @@ class PostgresHandler(MetaDatabaseHandler):
625
623
  result = self.native_query(query)
626
624
  return result
627
625
 
628
- def meta_get_column_statistics(self, table_names: Optional[list] = None) -> dict:
626
+ def meta_get_column_statistics(self, table_names: Optional[list] = None) -> Response:
629
627
  """
630
628
  Retrieves column statistics (e.g., most common values, frequencies, null percentage, and distinct value count)
631
629
  for the specified tables or all tables if no list is provided.
@@ -634,54 +632,58 @@ class PostgresHandler(MetaDatabaseHandler):
634
632
  table_names (list): A list of table names for which to retrieve column statistics.
635
633
 
636
634
  Returns:
637
- dict: A dictionary containing the column statistics.
635
+ Response: A response object containing the column statistics.
638
636
  """
639
- query = """
637
+ table_filter = ""
638
+ if table_names is not None and len(table_names) > 0:
639
+ quoted_names = [f"'{t}'" for t in table_names]
640
+ table_filter = f" AND ps.tablename IN ({','.join(quoted_names)})"
641
+
642
+ query = (
643
+ """
640
644
  SELECT
641
- ps.attname AS column_name,
642
- ps.tablename AS table_name,
643
- ps.most_common_vals AS most_common_values,
644
- ps.most_common_freqs::text AS most_common_frequencies,
645
- ps.null_frac * 100 AS null_percentage,
646
- ps.n_distinct AS distinct_values_count,
647
- ps.histogram_bounds AS histogram_bounds
645
+ ps.tablename AS TABLE_NAME,
646
+ ps.attname AS COLUMN_NAME,
647
+ ROUND(ps.null_frac::numeric * 100, 2) AS NULL_PERCENTAGE,
648
+ CASE
649
+ WHEN ps.n_distinct < 0 THEN NULL
650
+ ELSE ps.n_distinct::bigint
651
+ END AS DISTINCT_VALUES_COUNT,
652
+ ps.most_common_vals AS MOST_COMMON_VALUES,
653
+ ps.most_common_freqs AS MOST_COMMON_FREQUENCIES,
654
+ ps.histogram_bounds
648
655
  FROM pg_stats ps
649
656
  WHERE ps.schemaname = current_schema()
650
657
  AND ps.tablename NOT LIKE 'pg_%'
651
658
  AND ps.tablename NOT LIKE 'sql_%'
652
659
  """
653
-
654
- if table_names is not None and len(table_names) > 0:
655
- table_names = [f"'{t}'" for t in table_names]
656
- query += f" AND ps.tablename IN ({','.join(table_names)})"
660
+ + table_filter
661
+ + """
662
+ ORDER BY ps.tablename, ps.attname
663
+ """
664
+ )
657
665
 
658
666
  result = self.native_query(query)
659
- df = result.data_frame
660
667
 
661
- def parse_pg_array_string(x):
662
- try:
663
- return (
664
- [item.strip(" ,") for row in csv.reader(io.StringIO(x.strip("{}"))) for item in row if item.strip()]
665
- if x
666
- else []
667
- )
668
- except IndexError:
669
- logger.error(f"Error parsing PostgreSQL array string: {x}")
670
- return []
671
-
672
- # Convert most_common_values and most_common_frequencies from string representation to lists.
673
- df["most_common_values"] = df["most_common_values"].apply(lambda x: parse_pg_array_string(x))
674
- df["most_common_frequencies"] = df["most_common_frequencies"].apply(lambda x: parse_pg_array_string(x))
675
-
676
- # Get the minimum and maximum values from the histogram bounds.
677
- df["minimum_value"] = df["histogram_bounds"].apply(lambda x: parse_pg_array_string(x)[0] if x else None)
678
- df["maximum_value"] = df["histogram_bounds"].apply(lambda x: parse_pg_array_string(x)[-1] if x else None)
679
-
680
- # Handle cases where distinct_values_count is negative (indicating an approximation).
681
- df["distinct_values_count"] = df["distinct_values_count"].apply(lambda x: x if x >= 0 else None)
668
+ if result.type == RESPONSE_TYPE.TABLE and result.data_frame is not None:
669
+ df = result.data_frame
682
670
 
683
- result.data_frame = df.drop(columns=["histogram_bounds"])
671
+ # Extract min/max from histogram bounds
672
+ def extract_min_max(histogram_str):
673
+ if histogram_str and str(histogram_str) != "nan":
674
+ clean = str(histogram_str).strip("{}")
675
+ if clean:
676
+ values = clean.split(",")
677
+ min_val = values[0].strip(" \"'") if values else None
678
+ max_val = values[-1].strip(" \"'") if values else None
679
+ return min_val, max_val
680
+ return None, None
684
681
 
682
+ min_max_values = df["histogram_bounds"].apply(extract_min_max)
683
+ df["MINIMUM_VALUE"] = min_max_values.apply(lambda x: x[0])
684
+ df["MAXIMUM_VALUE"] = min_max_values.apply(lambda x: x[1])
685
+
686
+ result.data_frame = df.drop(columns=["histogram_bounds"])
685
687
  return result
686
688
 
687
689
  def meta_get_primary_keys(self, table_names: Optional[list] = None) -> Response: