MindsDB 25.7.2.0__py3-none-any.whl → 25.7.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = "MindsDB"
2
2
  __package_name__ = "mindsdb"
3
- __version__ = "25.7.2.0"
3
+ __version__ = "25.7.3.0"
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = "MindsDB Inc"
mindsdb/__main__.py CHANGED
@@ -375,7 +375,7 @@ if __name__ == "__main__":
375
375
  apis = os.getenv("MINDSDB_APIS") or config.cmd_args.api
376
376
 
377
377
  if apis is None: # If "--api" option is not specified, start the default APIs
378
- api_arr = [TrunkProcessEnum.HTTP, TrunkProcessEnum.MYSQL]
378
+ api_arr = [TrunkProcessEnum.HTTP, TrunkProcessEnum.MYSQL, TrunkProcessEnum.MCP, TrunkProcessEnum.A2A]
379
379
  elif apis == "": # If "--api=" (blank) is specified, don't start any APIs
380
380
  api_arr = []
381
381
  else: # The user has provided a list of APIs to start
@@ -84,7 +84,7 @@ from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import (
84
84
  TYPES,
85
85
  )
86
86
 
87
- from .exceptions import (
87
+ from mindsdb.api.executor.exceptions import (
88
88
  ExecutorException,
89
89
  BadDbError,
90
90
  NotSupportedYet,
@@ -1221,9 +1221,11 @@ class ExecuteCommands:
1221
1221
  db_name = database_name
1222
1222
 
1223
1223
  dn = self.session.datahub[db_name]
1224
+ if dn is None:
1225
+ raise ExecutorException(f"Cannot delete a table from database '{db_name}': the database does not exist")
1226
+
1224
1227
  if db_name is not None:
1225
1228
  dn.drop_table(table, if_exists=statement.if_exists)
1226
-
1227
1229
  elif db_name in self.session.database_controller.get_dict(filter_type="project"):
1228
1230
  # TODO do we need feature: delete object from project via drop table?
1229
1231
 
@@ -1428,6 +1430,9 @@ class ExecuteCommands:
1428
1430
  provider=provider,
1429
1431
  params=statement.params,
1430
1432
  )
1433
+ except EntityExistsError as e:
1434
+ if statement.if_not_exists is not True:
1435
+ raise ExecutorException(str(e))
1431
1436
  except ValueError as e:
1432
1437
  # Project does not exist or agent already exists.
1433
1438
  raise ExecutorException(str(e))
@@ -164,7 +164,11 @@ class IntegrationDataNode(DataNode):
164
164
  df = result_set.to_df()
165
165
 
166
166
  result: HandlerResponse = self.integration_handler.insert(table_name.parts[-1], df)
167
- return DataHubResponse(affected_rows=result.affected_rows)
167
+ if result is not None:
168
+ affected_rows = result.affected_rows
169
+ else:
170
+ affected_rows = None
171
+ return DataHubResponse(affected_rows=affected_rows)
168
172
 
169
173
  insert_columns = [Identifier(parts=[x.alias]) for x in result_set.columns]
170
174
 
@@ -54,7 +54,7 @@ def create_agent(project_name, name, agent):
54
54
  name=name, project_name=project_name, model_name=model_name, skills=skills, provider=provider, params=params
55
55
  )
56
56
  return created_agent.as_dict(), HTTPStatus.CREATED
57
- except ValueError:
57
+ except (ValueError, EntityExistsError):
58
58
  # Model or skill doesn't exist.
59
59
  return http_error(
60
60
  HTTPStatus.NOT_FOUND,
@@ -2,7 +2,8 @@ import ast
2
2
  from typing import Dict, Optional, List
3
3
 
4
4
 
5
- from litellm import completion, batch_completion, embedding, acompletion
5
+ from litellm import completion, batch_completion, embedding, acompletion, supports_response_schema
6
+
6
7
  import pandas as pd
7
8
 
8
9
  from mindsdb.integrations.libs.base import BaseMLEngine
@@ -58,6 +59,15 @@ class LiteLLMHandler(BaseMLEngine):
58
59
  @classmethod
59
60
  def completion(cls, provider: str, model: str, messages: List[dict], args: dict):
60
61
  model, args = cls.prepare_arguments(provider, model, args)
62
+ json_output = args.pop("json_output", False)
63
+
64
+ supports_json_output = supports_response_schema(model=model, custom_llm_provider=provider)
65
+
66
+ if json_output and supports_json_output:
67
+ args["response_format"] = {"type": "json_object"}
68
+ else:
69
+ args["response_format"] = None
70
+
61
71
  return completion(model=model, messages=messages, stream=False, **args)
62
72
 
63
73
  def create(
@@ -1,4 +1,4 @@
1
- llama-index==0.12.28
1
+ llama-index==0.12.41
2
2
  pydantic-settings >= 2.1.0
3
3
  llama-index-readers-web
4
4
  llama-index-embeddings-openai
@@ -7,6 +7,7 @@ from duckdb import HTTPException
7
7
  from mindsdb_sql_parser import parse_sql
8
8
  import pandas as pd
9
9
  from typing import Text, Dict, Optional
10
+ from botocore.client import Config
10
11
  from botocore.exceptions import ClientError
11
12
 
12
13
  from mindsdb_sql_parser.ast.base import ASTNode
@@ -16,7 +17,7 @@ from mindsdb.utilities import log
16
17
  from mindsdb.integrations.libs.response import (
17
18
  HandlerStatusResponse as StatusResponse,
18
19
  HandlerResponse as Response,
19
- RESPONSE_TYPE
20
+ RESPONSE_TYPE,
20
21
  )
21
22
 
22
23
  from mindsdb.integrations.libs.api_handler import APIResource, APIHandler
@@ -26,16 +27,12 @@ logger = log.getLogger(__name__)
26
27
 
27
28
 
28
29
  class ListFilesTable(APIResource):
29
-
30
- def list(self,
31
- targets: List[str] = None,
32
- conditions: List[FilterCondition] = None,
33
- limit: int = None,
34
- *args, **kwargs) -> pd.DataFrame:
35
-
30
+ def list(
31
+ self, targets: List[str] = None, conditions: List[FilterCondition] = None, limit: int = None, *args, **kwargs
32
+ ) -> pd.DataFrame:
36
33
  buckets = None
37
34
  for condition in conditions:
38
- if condition.column == 'bucket':
35
+ if condition.column == "bucket":
39
36
  if condition.op == FilterOperator.IN:
40
37
  buckets = condition.value
41
38
  elif condition.op == FilterOperator.EQUAL:
@@ -44,25 +41,27 @@ class ListFilesTable(APIResource):
44
41
 
45
42
  data = []
46
43
  for obj in self.handler.get_objects(limit=limit, buckets=buckets):
47
- path = obj['Key']
48
- path = path.replace('`', '')
44
+ path = obj["Key"]
45
+ path = path.replace("`", "")
49
46
  item = {
50
- 'path': path,
51
- 'bucket': obj['Bucket'],
52
- 'name': path[path.rfind('/') + 1:],
53
- 'extension': path[path.rfind('.') + 1:]
47
+ "path": path,
48
+ "bucket": obj["Bucket"],
49
+ "name": path[path.rfind("/") + 1 :],
50
+ "extension": path[path.rfind(".") + 1 :],
54
51
  }
55
52
 
53
+ if targets and "public_url" in targets:
54
+ item["public_url"] = self.handler.generate_sas_url(path, obj["Bucket"])
55
+
56
56
  data.append(item)
57
57
 
58
58
  return pd.DataFrame(data=data, columns=self.get_columns())
59
59
 
60
60
  def get_columns(self) -> List[str]:
61
- return ["path", "name", "extension", "bucket", "content"]
61
+ return ["path", "name", "extension", "bucket", "content", "public_url"]
62
62
 
63
63
 
64
64
  class FileTable(APIResource):
65
-
66
65
  def list(self, targets: List[str] = None, table_name=None, *args, **kwargs) -> pd.DataFrame:
67
66
  return self.handler.read_as_table(table_name)
68
67
 
@@ -76,9 +75,9 @@ class S3Handler(APIHandler):
76
75
  This handler handles connection and execution of the SQL statements on AWS S3.
77
76
  """
78
77
 
79
- name = 's3'
78
+ name = "s3"
80
79
  # TODO: Can other file formats be supported?
81
- supported_file_formats = ['csv', 'tsv', 'json', 'parquet']
80
+ supported_file_formats = ["csv", "tsv", "json", "parquet"]
82
81
 
83
82
  def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs):
84
83
  """
@@ -96,7 +95,7 @@ class S3Handler(APIHandler):
96
95
  self.connection = None
97
96
  self.is_connected = False
98
97
  self.thread_safe = True
99
- self.bucket = self.connection_data.get('bucket')
98
+ self.bucket = self.connection_data.get("bucket")
100
99
  self._regions = {}
101
100
 
102
101
  self._files_table = ListFilesTable(self)
@@ -119,8 +118,8 @@ class S3Handler(APIHandler):
119
118
  return self.connection
120
119
 
121
120
  # Validate mandatory parameters.
122
- if not all(key in self.connection_data for key in ['aws_access_key_id', 'aws_secret_access_key']):
123
- raise ValueError('Required parameters (aws_access_key_id, aws_secret_access_key) must be provided.')
121
+ if not all(key in self.connection_data for key in ["aws_access_key_id", "aws_secret_access_key"]):
122
+ raise ValueError("Required parameters (aws_access_key_id, aws_secret_access_key) must be provided.")
124
123
 
125
124
  # Connect to S3 and configure mandatory credentials.
126
125
  self.connection = self._connect_boto3()
@@ -152,13 +151,13 @@ class S3Handler(APIHandler):
152
151
  duckdb_conn.execute(f"SET s3_secret_access_key='{self.connection_data['aws_secret_access_key']}'")
153
152
 
154
153
  # Configure optional parameters.
155
- if 'aws_session_token' in self.connection_data:
154
+ if "aws_session_token" in self.connection_data:
156
155
  duckdb_conn.execute(f"SET s3_session_token='{self.connection_data['aws_session_token']}'")
157
156
 
158
157
  # detect region for bucket
159
158
  if bucket not in self._regions:
160
159
  client = self.connect()
161
- self._regions[bucket] = client.get_bucket_location(Bucket=bucket)['LocationConstraint']
160
+ self._regions[bucket] = client.get_bucket_location(Bucket=bucket)["LocationConstraint"]
162
161
 
163
162
  region = self._regions[bucket]
164
163
  duckdb_conn.execute(f"SET s3_region='{region}'")
@@ -177,15 +176,17 @@ class S3Handler(APIHandler):
177
176
  """
178
177
  # Configure mandatory credentials.
179
178
  config = {
180
- 'aws_access_key_id': self.connection_data['aws_access_key_id'],
181
- 'aws_secret_access_key': self.connection_data['aws_secret_access_key']
179
+ "aws_access_key_id": self.connection_data["aws_access_key_id"],
180
+ "aws_secret_access_key": self.connection_data["aws_secret_access_key"],
182
181
  }
183
182
 
184
183
  # Configure optional parameters.
185
- if 'aws_session_token' in self.connection_data:
186
- config['aws_session_token'] = self.connection_data['aws_session_token']
184
+ optional_parameters = ["region_name", "aws_session_token"]
185
+ for parameter in optional_parameters:
186
+ if parameter in self.connection_data:
187
+ config[parameter] = self.connection_data[parameter]
187
188
 
188
- client = boto3.client('s3', **config)
189
+ client = boto3.client("s3", **config, config=Config(signature_version="s3v4"))
189
190
 
190
191
  # check connection
191
192
  if self.bucket is not None:
@@ -219,7 +220,7 @@ class S3Handler(APIHandler):
219
220
  self._connect_boto3()
220
221
  response.success = True
221
222
  except (ClientError, ValueError) as e:
222
- logger.error(f'Error connecting to S3 with the given credentials, {e}!')
223
+ logger.error(f"Error connecting to S3 with the given credentials, {e}!")
223
224
  response.error_message = str(e)
224
225
 
225
226
  if response.success and need_to_close:
@@ -235,8 +236,8 @@ class S3Handler(APIHandler):
235
236
  return self.bucket, key
236
237
 
237
238
  # get bucket from first part of the key
238
- ar = key.split('/')
239
- return ar[0], '/'.join(ar[1:])
239
+ ar = key.split("/")
240
+ return ar[0], "/".join(ar[1:])
240
241
 
241
242
  def read_as_table(self, key) -> pd.DataFrame:
242
243
  """
@@ -245,7 +246,6 @@ class S3Handler(APIHandler):
245
246
  bucket, key = self._get_bucket(key)
246
247
 
247
248
  with self._connect_duckdb(bucket) as connection:
248
-
249
249
  cursor = connection.execute(f"SELECT * FROM 's3://{bucket}/{key}'")
250
250
 
251
251
  return cursor.fetchdf()
@@ -259,7 +259,7 @@ class S3Handler(APIHandler):
259
259
  client = self.connect()
260
260
 
261
261
  obj = client.get_object(Bucket=bucket, Key=key)
262
- content = obj['Body'].read()
262
+ content = obj["Body"].read()
263
263
  return content
264
264
 
265
265
  def add_data_to_table(self, key, df) -> None:
@@ -277,7 +277,7 @@ class S3Handler(APIHandler):
277
277
  client = self.connect()
278
278
  client.head_object(Bucket=bucket, Key=key)
279
279
  except ClientError as e:
280
- logger.error(f'Error querying the file {key} in the bucket {bucket}, {e}!')
280
+ logger.error(f"Error querying the file {key} in the bucket {bucket}, {e}!")
281
281
  raise e
282
282
 
283
283
  with self._connect_duckdb(bucket) as connection:
@@ -309,31 +309,28 @@ class S3Handler(APIHandler):
309
309
  if isinstance(query, Select):
310
310
  table_name = query.from_table.parts[-1]
311
311
 
312
- if table_name == 'files':
312
+ if table_name == "files":
313
313
  table = self._files_table
314
314
  df = table.select(query)
315
315
 
316
316
  # add content
317
317
  has_content = False
318
318
  for target in query.targets:
319
- if isinstance(target, Identifier) and target.parts[-1].lower() == 'content':
319
+ if isinstance(target, Identifier) and target.parts[-1].lower() == "content":
320
320
  has_content = True
321
321
  break
322
322
  if has_content:
323
- df['content'] = df['path'].apply(self._read_as_content)
323
+ df["content"] = df["path"].apply(self._read_as_content)
324
324
  else:
325
- extension = table_name.split('.')[-1]
325
+ extension = table_name.split(".")[-1]
326
326
  if extension not in self.supported_file_formats:
327
- logger.error(f'The file format {extension} is not supported!')
328
- raise ValueError(f'The file format {extension} is not supported!')
327
+ logger.error(f"The file format {extension} is not supported!")
328
+ raise ValueError(f"The file format {extension} is not supported!")
329
329
 
330
330
  table = FileTable(self, table_name=table_name)
331
331
  df = table.select(query)
332
332
 
333
- response = Response(
334
- RESPONSE_TYPE.TABLE,
335
- data_frame=df
336
- )
333
+ response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
337
334
  elif isinstance(query, Insert):
338
335
  table_name = query.table.parts[-1]
339
336
  table = FileTable(self, table_name=table_name)
@@ -364,7 +361,7 @@ class S3Handler(APIHandler):
364
361
  scan_buckets = [self.bucket]
365
362
  else:
366
363
  add_bucket_to_name = True
367
- scan_buckets = [b['Name'] for b in client.list_buckets()['Buckets']]
364
+ scan_buckets = [b["Name"] for b in client.list_buckets()["Buckets"]]
368
365
 
369
366
  objects = []
370
367
  for bucket in scan_buckets:
@@ -372,23 +369,38 @@ class S3Handler(APIHandler):
372
369
  continue
373
370
 
374
371
  resp = client.list_objects_v2(Bucket=bucket)
375
- if 'Contents' not in resp:
372
+ if "Contents" not in resp:
376
373
  continue
377
374
 
378
- for obj in resp['Contents']:
379
- if obj.get('StorageClass', 'STANDARD') != 'STANDARD':
375
+ for obj in resp["Contents"]:
376
+ if obj.get("StorageClass", "STANDARD") != "STANDARD":
380
377
  continue
381
378
 
382
- obj['Bucket'] = bucket
379
+ obj["Bucket"] = bucket
383
380
  if add_bucket_to_name:
384
381
  # bucket is part of the name
385
- obj['Key'] = f'{bucket}/{obj["Key"]}'
382
+ obj["Key"] = f"{bucket}/{obj['Key']}"
386
383
  objects.append(obj)
387
384
  if limit is not None and len(objects) >= limit:
388
385
  break
389
386
 
390
387
  return objects
391
388
 
389
+ def generate_sas_url(self, key: str, bucket: str) -> str:
390
+ """
391
+ Generates a pre-signed URL for accessing an object in the S3 bucket.
392
+
393
+ Args:
394
+ key (str): The key (path) of the object in the S3 bucket.
395
+ bucket (str): The name of the S3 bucket.
396
+
397
+ Returns:
398
+ str: The pre-signed URL for accessing the object.
399
+ """
400
+ client = self.connect()
401
+ url = client.generate_presigned_url("get_object", Params={"Bucket": bucket, "Key": key}, ExpiresIn=3600)
402
+ return url
403
+
392
404
  def get_tables(self) -> Response:
393
405
  """
394
406
  Retrieves a list of tables (objects) in the S3 bucket.
@@ -402,21 +414,13 @@ class S3Handler(APIHandler):
402
414
  # Get only the supported file formats.
403
415
  # Wrap the object names with backticks to prevent SQL syntax errors.
404
416
  supported_names = [
405
- f"`{obj['Key']}`"
406
- for obj in self.get_objects()
407
- if obj['Key'].split('.')[-1] in self.supported_file_formats
417
+ f"`{obj['Key']}`" for obj in self.get_objects() if obj["Key"].split(".")[-1] in self.supported_file_formats
408
418
  ]
409
419
 
410
420
  # virtual table with list of files
411
- supported_names.insert(0, 'files')
421
+ supported_names.insert(0, "files")
412
422
 
413
- response = Response(
414
- RESPONSE_TYPE.TABLE,
415
- data_frame=pd.DataFrame(
416
- supported_names,
417
- columns=['table_name']
418
- )
419
- )
423
+ response = Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(supported_names, columns=["table_name"]))
420
424
 
421
425
  return response
422
426
 
@@ -433,11 +437,7 @@ class S3Handler(APIHandler):
433
437
  Returns:
434
438
  Response: A response object containing the column details, formatted as per the `Response` class.
435
439
  """
436
- query = Select(
437
- targets=[Star()],
438
- from_table=Identifier(parts=[table_name]),
439
- limit=Constant(1)
440
- )
440
+ query = Select(targets=[Star()], from_table=Identifier(parts=[table_name]), limit=Constant(1))
441
441
 
442
442
  result = self.query(query)
443
443
 
@@ -445,10 +445,12 @@ class S3Handler(APIHandler):
445
445
  RESPONSE_TYPE.TABLE,
446
446
  data_frame=pd.DataFrame(
447
447
  {
448
- 'column_name': result.data_frame.columns,
449
- 'data_type': [data_type if data_type != 'object' else 'string' for data_type in result.data_frame.dtypes]
448
+ "column_name": result.data_frame.columns,
449
+ "data_type": [
450
+ data_type if data_type != "object" else "string" for data_type in result.data_frame.dtypes
451
+ ],
450
452
  }
451
- )
453
+ ),
452
454
  )
453
455
 
454
456
  return response
@@ -72,7 +72,7 @@ class SalesforceHandler(MetaAPIHandler):
72
72
 
73
73
  resource_tables = self._get_resource_names()
74
74
  for resource_name in resource_tables:
75
- table_class = create_table_class(resource_name)
75
+ table_class = create_table_class(resource_name.lower())
76
76
  self._register_table(resource_name, table_class(self))
77
77
 
78
78
  return self.connection
@@ -164,9 +164,18 @@ def create_table_class(resource_name: Text) -> MetaAPIResource:
164
164
  """
165
165
  client = self.handler.connect()
166
166
 
167
- resource_metadata = next(
168
- (resource for resource in main_metadata if resource["name"].lower() == resource_name),
169
- )
167
+ try:
168
+ resource_metadata = next(
169
+ (resource for resource in main_metadata if resource["name"].lower() == resource_name),
170
+ )
171
+ except Exception as e:
172
+ logger.warning(f"Failed to get resource metadata for {resource_name}: {e}")
173
+ return {
174
+ "table_name": table_name,
175
+ "table_type": "BASE TABLE",
176
+ "table_description": "",
177
+ "row_count": None,
178
+ }
170
179
 
171
180
  # Get row count if Id column is aggregatable.
172
181
  row_count = None