MindsDB 25.7.2.0__py3-none-any.whl → 25.7.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (69) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +1 -1
  3. mindsdb/api/a2a/common/server/server.py +16 -6
  4. mindsdb/api/executor/command_executor.py +213 -137
  5. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
  6. mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
  7. mindsdb/api/executor/planner/plan_join.py +3 -0
  8. mindsdb/api/executor/planner/plan_join_ts.py +117 -100
  9. mindsdb/api/executor/planner/query_planner.py +1 -0
  10. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
  11. mindsdb/api/http/initialize.py +16 -43
  12. mindsdb/api/http/namespaces/agents.py +24 -21
  13. mindsdb/api/http/namespaces/chatbots.py +83 -120
  14. mindsdb/api/http/namespaces/file.py +1 -1
  15. mindsdb/api/http/namespaces/jobs.py +38 -60
  16. mindsdb/api/http/namespaces/tree.py +69 -61
  17. mindsdb/api/mcp/start.py +2 -0
  18. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
  19. mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
  20. mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
  21. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
  22. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
  23. mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
  24. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
  25. mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
  26. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -76
  27. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  28. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +16 -3
  29. mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
  30. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
  31. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
  32. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
  33. mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
  34. mindsdb/integrations/handlers/salesforce_handler/constants.py +208 -0
  35. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +142 -81
  36. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -4
  37. mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
  38. mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
  39. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
  40. mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
  41. mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
  42. mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
  43. mindsdb/integrations/utilities/handler_utils.py +32 -12
  44. mindsdb/interfaces/agents/agents_controller.py +169 -110
  45. mindsdb/interfaces/agents/langchain_agent.py +10 -3
  46. mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -8
  47. mindsdb/interfaces/database/database.py +38 -13
  48. mindsdb/interfaces/database/integrations.py +20 -5
  49. mindsdb/interfaces/database/projects.py +63 -16
  50. mindsdb/interfaces/database/views.py +86 -60
  51. mindsdb/interfaces/jobs/jobs_controller.py +103 -110
  52. mindsdb/interfaces/knowledge_base/controller.py +33 -5
  53. mindsdb/interfaces/knowledge_base/evaluate.py +53 -9
  54. mindsdb/interfaces/knowledge_base/executor.py +24 -0
  55. mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
  56. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
  57. mindsdb/interfaces/query_context/context_controller.py +100 -133
  58. mindsdb/interfaces/skills/skills_controller.py +18 -6
  59. mindsdb/interfaces/storage/db.py +40 -6
  60. mindsdb/interfaces/variables/variables_controller.py +8 -15
  61. mindsdb/utilities/config.py +3 -3
  62. mindsdb/utilities/functions.py +72 -60
  63. mindsdb/utilities/log.py +38 -6
  64. mindsdb/utilities/ps.py +7 -7
  65. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/METADATA +262 -263
  66. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/RECORD +69 -68
  67. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/WHEEL +0 -0
  68. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/licenses/LICENSE +0 -0
  69. {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from duckdb import HTTPException
7
7
  from mindsdb_sql_parser import parse_sql
8
8
  import pandas as pd
9
9
  from typing import Text, Dict, Optional
10
+ from botocore.client import Config
10
11
  from botocore.exceptions import ClientError
11
12
 
12
13
  from mindsdb_sql_parser.ast.base import ASTNode
@@ -16,7 +17,7 @@ from mindsdb.utilities import log
16
17
  from mindsdb.integrations.libs.response import (
17
18
  HandlerStatusResponse as StatusResponse,
18
19
  HandlerResponse as Response,
19
- RESPONSE_TYPE
20
+ RESPONSE_TYPE,
20
21
  )
21
22
 
22
23
  from mindsdb.integrations.libs.api_handler import APIResource, APIHandler
@@ -26,16 +27,12 @@ logger = log.getLogger(__name__)
26
27
 
27
28
 
28
29
  class ListFilesTable(APIResource):
29
-
30
- def list(self,
31
- targets: List[str] = None,
32
- conditions: List[FilterCondition] = None,
33
- limit: int = None,
34
- *args, **kwargs) -> pd.DataFrame:
35
-
30
+ def list(
31
+ self, targets: List[str] = None, conditions: List[FilterCondition] = None, limit: int = None, *args, **kwargs
32
+ ) -> pd.DataFrame:
36
33
  buckets = None
37
34
  for condition in conditions:
38
- if condition.column == 'bucket':
35
+ if condition.column == "bucket":
39
36
  if condition.op == FilterOperator.IN:
40
37
  buckets = condition.value
41
38
  elif condition.op == FilterOperator.EQUAL:
@@ -44,25 +41,27 @@ class ListFilesTable(APIResource):
44
41
 
45
42
  data = []
46
43
  for obj in self.handler.get_objects(limit=limit, buckets=buckets):
47
- path = obj['Key']
48
- path = path.replace('`', '')
44
+ path = obj["Key"]
45
+ path = path.replace("`", "")
49
46
  item = {
50
- 'path': path,
51
- 'bucket': obj['Bucket'],
52
- 'name': path[path.rfind('/') + 1:],
53
- 'extension': path[path.rfind('.') + 1:]
47
+ "path": path,
48
+ "bucket": obj["Bucket"],
49
+ "name": path[path.rfind("/") + 1 :],
50
+ "extension": path[path.rfind(".") + 1 :],
54
51
  }
55
52
 
53
+ if targets and "public_url" in targets:
54
+ item["public_url"] = self.handler.generate_sas_url(path, obj["Bucket"])
55
+
56
56
  data.append(item)
57
57
 
58
58
  return pd.DataFrame(data=data, columns=self.get_columns())
59
59
 
60
60
  def get_columns(self) -> List[str]:
61
- return ["path", "name", "extension", "bucket", "content"]
61
+ return ["path", "name", "extension", "bucket", "content", "public_url"]
62
62
 
63
63
 
64
64
  class FileTable(APIResource):
65
-
66
65
  def list(self, targets: List[str] = None, table_name=None, *args, **kwargs) -> pd.DataFrame:
67
66
  return self.handler.read_as_table(table_name)
68
67
 
@@ -76,9 +75,9 @@ class S3Handler(APIHandler):
76
75
  This handler handles connection and execution of the SQL statements on AWS S3.
77
76
  """
78
77
 
79
- name = 's3'
78
+ name = "s3"
80
79
  # TODO: Can other file formats be supported?
81
- supported_file_formats = ['csv', 'tsv', 'json', 'parquet']
80
+ supported_file_formats = ["csv", "tsv", "json", "parquet"]
82
81
 
83
82
  def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs):
84
83
  """
@@ -96,7 +95,7 @@ class S3Handler(APIHandler):
96
95
  self.connection = None
97
96
  self.is_connected = False
98
97
  self.thread_safe = True
99
- self.bucket = self.connection_data.get('bucket')
98
+ self.bucket = self.connection_data.get("bucket")
100
99
  self._regions = {}
101
100
 
102
101
  self._files_table = ListFilesTable(self)
@@ -119,8 +118,8 @@ class S3Handler(APIHandler):
119
118
  return self.connection
120
119
 
121
120
  # Validate mandatory parameters.
122
- if not all(key in self.connection_data for key in ['aws_access_key_id', 'aws_secret_access_key']):
123
- raise ValueError('Required parameters (aws_access_key_id, aws_secret_access_key) must be provided.')
121
+ if not all(key in self.connection_data for key in ["aws_access_key_id", "aws_secret_access_key"]):
122
+ raise ValueError("Required parameters (aws_access_key_id, aws_secret_access_key) must be provided.")
124
123
 
125
124
  # Connect to S3 and configure mandatory credentials.
126
125
  self.connection = self._connect_boto3()
@@ -152,13 +151,13 @@ class S3Handler(APIHandler):
152
151
  duckdb_conn.execute(f"SET s3_secret_access_key='{self.connection_data['aws_secret_access_key']}'")
153
152
 
154
153
  # Configure optional parameters.
155
- if 'aws_session_token' in self.connection_data:
154
+ if "aws_session_token" in self.connection_data:
156
155
  duckdb_conn.execute(f"SET s3_session_token='{self.connection_data['aws_session_token']}'")
157
156
 
158
157
  # detect region for bucket
159
158
  if bucket not in self._regions:
160
159
  client = self.connect()
161
- self._regions[bucket] = client.get_bucket_location(Bucket=bucket)['LocationConstraint']
160
+ self._regions[bucket] = client.get_bucket_location(Bucket=bucket)["LocationConstraint"]
162
161
 
163
162
  region = self._regions[bucket]
164
163
  duckdb_conn.execute(f"SET s3_region='{region}'")
@@ -177,15 +176,17 @@ class S3Handler(APIHandler):
177
176
  """
178
177
  # Configure mandatory credentials.
179
178
  config = {
180
- 'aws_access_key_id': self.connection_data['aws_access_key_id'],
181
- 'aws_secret_access_key': self.connection_data['aws_secret_access_key']
179
+ "aws_access_key_id": self.connection_data["aws_access_key_id"],
180
+ "aws_secret_access_key": self.connection_data["aws_secret_access_key"],
182
181
  }
183
182
 
184
183
  # Configure optional parameters.
185
- if 'aws_session_token' in self.connection_data:
186
- config['aws_session_token'] = self.connection_data['aws_session_token']
184
+ optional_parameters = ["region_name", "aws_session_token"]
185
+ for parameter in optional_parameters:
186
+ if parameter in self.connection_data:
187
+ config[parameter] = self.connection_data[parameter]
187
188
 
188
- client = boto3.client('s3', **config)
189
+ client = boto3.client("s3", **config, config=Config(signature_version="s3v4"))
189
190
 
190
191
  # check connection
191
192
  if self.bucket is not None:
@@ -219,7 +220,7 @@ class S3Handler(APIHandler):
219
220
  self._connect_boto3()
220
221
  response.success = True
221
222
  except (ClientError, ValueError) as e:
222
- logger.error(f'Error connecting to S3 with the given credentials, {e}!')
223
+ logger.error(f"Error connecting to S3 with the given credentials, {e}!")
223
224
  response.error_message = str(e)
224
225
 
225
226
  if response.success and need_to_close:
@@ -235,8 +236,8 @@ class S3Handler(APIHandler):
235
236
  return self.bucket, key
236
237
 
237
238
  # get bucket from first part of the key
238
- ar = key.split('/')
239
- return ar[0], '/'.join(ar[1:])
239
+ ar = key.split("/")
240
+ return ar[0], "/".join(ar[1:])
240
241
 
241
242
  def read_as_table(self, key) -> pd.DataFrame:
242
243
  """
@@ -245,7 +246,6 @@ class S3Handler(APIHandler):
245
246
  bucket, key = self._get_bucket(key)
246
247
 
247
248
  with self._connect_duckdb(bucket) as connection:
248
-
249
249
  cursor = connection.execute(f"SELECT * FROM 's3://{bucket}/{key}'")
250
250
 
251
251
  return cursor.fetchdf()
@@ -259,7 +259,7 @@ class S3Handler(APIHandler):
259
259
  client = self.connect()
260
260
 
261
261
  obj = client.get_object(Bucket=bucket, Key=key)
262
- content = obj['Body'].read()
262
+ content = obj["Body"].read()
263
263
  return content
264
264
 
265
265
  def add_data_to_table(self, key, df) -> None:
@@ -277,7 +277,7 @@ class S3Handler(APIHandler):
277
277
  client = self.connect()
278
278
  client.head_object(Bucket=bucket, Key=key)
279
279
  except ClientError as e:
280
- logger.error(f'Error querying the file {key} in the bucket {bucket}, {e}!')
280
+ logger.error(f"Error querying the file {key} in the bucket {bucket}, {e}!")
281
281
  raise e
282
282
 
283
283
  with self._connect_duckdb(bucket) as connection:
@@ -309,31 +309,28 @@ class S3Handler(APIHandler):
309
309
  if isinstance(query, Select):
310
310
  table_name = query.from_table.parts[-1]
311
311
 
312
- if table_name == 'files':
312
+ if table_name == "files":
313
313
  table = self._files_table
314
314
  df = table.select(query)
315
315
 
316
316
  # add content
317
317
  has_content = False
318
318
  for target in query.targets:
319
- if isinstance(target, Identifier) and target.parts[-1].lower() == 'content':
319
+ if isinstance(target, Identifier) and target.parts[-1].lower() == "content":
320
320
  has_content = True
321
321
  break
322
322
  if has_content:
323
- df['content'] = df['path'].apply(self._read_as_content)
323
+ df["content"] = df["path"].apply(self._read_as_content)
324
324
  else:
325
- extension = table_name.split('.')[-1]
325
+ extension = table_name.split(".")[-1]
326
326
  if extension not in self.supported_file_formats:
327
- logger.error(f'The file format {extension} is not supported!')
328
- raise ValueError(f'The file format {extension} is not supported!')
327
+ logger.error(f"The file format {extension} is not supported!")
328
+ raise ValueError(f"The file format {extension} is not supported!")
329
329
 
330
330
  table = FileTable(self, table_name=table_name)
331
331
  df = table.select(query)
332
332
 
333
- response = Response(
334
- RESPONSE_TYPE.TABLE,
335
- data_frame=df
336
- )
333
+ response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
337
334
  elif isinstance(query, Insert):
338
335
  table_name = query.table.parts[-1]
339
336
  table = FileTable(self, table_name=table_name)
@@ -364,7 +361,7 @@ class S3Handler(APIHandler):
364
361
  scan_buckets = [self.bucket]
365
362
  else:
366
363
  add_bucket_to_name = True
367
- scan_buckets = [b['Name'] for b in client.list_buckets()['Buckets']]
364
+ scan_buckets = [b["Name"] for b in client.list_buckets()["Buckets"]]
368
365
 
369
366
  objects = []
370
367
  for bucket in scan_buckets:
@@ -372,23 +369,38 @@ class S3Handler(APIHandler):
372
369
  continue
373
370
 
374
371
  resp = client.list_objects_v2(Bucket=bucket)
375
- if 'Contents' not in resp:
372
+ if "Contents" not in resp:
376
373
  continue
377
374
 
378
- for obj in resp['Contents']:
379
- if obj.get('StorageClass', 'STANDARD') != 'STANDARD':
375
+ for obj in resp["Contents"]:
376
+ if obj.get("StorageClass", "STANDARD") != "STANDARD":
380
377
  continue
381
378
 
382
- obj['Bucket'] = bucket
379
+ obj["Bucket"] = bucket
383
380
  if add_bucket_to_name:
384
381
  # bucket is part of the name
385
- obj['Key'] = f'{bucket}/{obj["Key"]}'
382
+ obj["Key"] = f"{bucket}/{obj['Key']}"
386
383
  objects.append(obj)
387
384
  if limit is not None and len(objects) >= limit:
388
385
  break
389
386
 
390
387
  return objects
391
388
 
389
+ def generate_sas_url(self, key: str, bucket: str) -> str:
390
+ """
391
+ Generates a pre-signed URL for accessing an object in the S3 bucket.
392
+
393
+ Args:
394
+ key (str): The key (path) of the object in the S3 bucket.
395
+ bucket (str): The name of the S3 bucket.
396
+
397
+ Returns:
398
+ str: The pre-signed URL for accessing the object.
399
+ """
400
+ client = self.connect()
401
+ url = client.generate_presigned_url("get_object", Params={"Bucket": bucket, "Key": key}, ExpiresIn=3600)
402
+ return url
403
+
392
404
  def get_tables(self) -> Response:
393
405
  """
394
406
  Retrieves a list of tables (objects) in the S3 bucket.
@@ -402,21 +414,13 @@ class S3Handler(APIHandler):
402
414
  # Get only the supported file formats.
403
415
  # Wrap the object names with backticks to prevent SQL syntax errors.
404
416
  supported_names = [
405
- f"`{obj['Key']}`"
406
- for obj in self.get_objects()
407
- if obj['Key'].split('.')[-1] in self.supported_file_formats
417
+ f"`{obj['Key']}`" for obj in self.get_objects() if obj["Key"].split(".")[-1] in self.supported_file_formats
408
418
  ]
409
419
 
410
420
  # virtual table with list of files
411
- supported_names.insert(0, 'files')
421
+ supported_names.insert(0, "files")
412
422
 
413
- response = Response(
414
- RESPONSE_TYPE.TABLE,
415
- data_frame=pd.DataFrame(
416
- supported_names,
417
- columns=['table_name']
418
- )
419
- )
423
+ response = Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(supported_names, columns=["table_name"]))
420
424
 
421
425
  return response
422
426
 
@@ -433,11 +437,7 @@ class S3Handler(APIHandler):
433
437
  Returns:
434
438
  Response: A response object containing the column details, formatted as per the `Response` class.
435
439
  """
436
- query = Select(
437
- targets=[Star()],
438
- from_table=Identifier(parts=[table_name]),
439
- limit=Constant(1)
440
- )
440
+ query = Select(targets=[Star()], from_table=Identifier(parts=[table_name]), limit=Constant(1))
441
441
 
442
442
  result = self.query(query)
443
443
 
@@ -445,10 +445,12 @@ class S3Handler(APIHandler):
445
445
  RESPONSE_TYPE.TABLE,
446
446
  data_frame=pd.DataFrame(
447
447
  {
448
- 'column_name': result.data_frame.columns,
449
- 'data_type': [data_type if data_type != 'object' else 'string' for data_type in result.data_frame.dtypes]
448
+ "column_name": result.data_frame.columns,
449
+ "data_type": [
450
+ data_type if data_type != "object" else "string" for data_type in result.data_frame.dtypes
451
+ ],
450
452
  }
451
- )
453
+ ),
452
454
  )
453
455
 
454
456
  return response
@@ -0,0 +1,208 @@
1
+ """
2
+ Constants for Salesforce handler.
3
+ """
4
+
5
+
6
+ def get_soql_instructions(integration_name):
7
+ return f"""This handler executes SOQL (Salesforce Object Query Language), NOT SQL! Follow these rules strictly:
8
+
9
+ **BASIC STRUCTURE:**
10
+ - NO "SELECT *" - must explicitly list all fields
11
+ SQL: SELECT * FROM Account;
12
+ SOQL: SELECT Id, Name, Industry FROM Account
13
+ - NO table aliases - use full table names only
14
+ SQL: SELECT a.Name FROM Account a;
15
+ SOQL: SELECT Name FROM Account
16
+ - NO column aliases - field names cannot be aliased
17
+ SQL: SELECT Name AS CompanyName FROM Account;
18
+ SOQL: SELECT Name FROM Account
19
+ - NO DISTINCT keyword - not supported in SOQL
20
+ SQL: SELECT DISTINCT Industry FROM Account;
21
+ SOQL: Not possible - use separate logic
22
+ - NO subqueries in FROM clause - only relationship-based subqueries allowed
23
+ SQL: SELECT * FROM (SELECT Name FROM Account) AS AccountNames;
24
+ SOQL: Not supported
25
+
26
+ **FIELD SELECTION:**
27
+ - Always include Id field when querying
28
+ CORRECT: SELECT Id, Name, Industry FROM Account
29
+ INCORRECT: SELECT Name, Industry FROM Account
30
+ - Field names are case-sensitive
31
+ CORRECT: SELECT CreatedDate FROM Account
32
+ INCORRECT: SELECT createddate FROM Account
33
+ - Use exact field names from the data catalog
34
+ CORRECT: SELECT CustomerPriority__c FROM Account
35
+ INCORRECT: SELECT customer_priority FROM Account
36
+
37
+ **FILTERING (WHERE clause):**
38
+ - Date/DateTime fields: Use unquoted literals in YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ format
39
+ CORRECT: WHERE CloseDate >= 2025-05-28
40
+ CORRECT: WHERE CreatedDate >= 2025-05-28T10:30:00Z
41
+ INCORRECT: WHERE CloseDate >= '2025-05-28'
42
+ INCORRECT: WHERE CreatedDate >= "2025-05-28"
43
+ - Special date literals: TODAY, YESTERDAY, LAST_WEEK, LAST_MONTH, LAST_QUARTER, LAST_YEAR, THIS_WEEK, THIS_MONTH, THIS_QUARTER, THIS_YEAR
44
+ CORRECT: WHERE CreatedDate = TODAY
45
+ CORRECT: WHERE LastModifiedDate >= LAST_MONTH
46
+ CORRECT: WHERE CloseDate = THIS_QUARTER
47
+ - LIKE operator: Only supports % wildcard, NO underscore (_) wildcard
48
+ CORRECT: WHERE Name LIKE '%Corp%'
49
+ CORRECT: WHERE Name LIKE 'Acme%'
50
+ INCORRECT: WHERE Name LIKE 'A_me%'
51
+ - BETWEEN operator: NOT supported, use >= AND <= instead
52
+ SQL: WHERE CreatedDate BETWEEN '2025-01-01' AND '2025-12-31'
53
+ SOQL: WHERE CreatedDate >= 2025-01-01 AND CreatedDate <= 2025-12-31
54
+ - Boolean values: Use lowercase true/false, NOT TRUE/FALSE
55
+ CORRECT: WHERE Active__c = true
56
+ CORRECT: WHERE IsDeleted = false
57
+ INCORRECT: WHERE Active__c = TRUE
58
+ INCORRECT: WHERE IsDeleted = FALSE
59
+ - NULL values: Use lowercase null, NOT NULL
60
+ CORRECT: WHERE ParentId = null
61
+ CORRECT: WHERE Description != null
62
+ INCORRECT: WHERE ParentId IS NULL
63
+ INCORRECT: WHERE Description IS NOT NULL
64
+ - String values: Use single quotes for strings
65
+ CORRECT: WHERE Industry = 'Technology'
66
+ CORRECT: WHERE Name = 'Acme Corp'
67
+ INCORRECT: WHERE Industry = "Technology"
68
+ - Multi-select picklist fields: Use INCLUDES('value1;value2') or EXCLUDES('value1;value2')
69
+ CORRECT: WHERE Services__c INCLUDES ('Consulting;Support')
70
+ CORRECT: WHERE Services__c EXCLUDES ('Training')
71
+ INCORRECT: WHERE Services__c = 'Consulting'
72
+
73
+ **JOINS:**
74
+ - NO explicit JOIN syntax supported
75
+ SQL: SELECT a.Name, c.FirstName FROM Account a JOIN Contact c ON a.Id = c.AccountId
76
+ SOQL: Not supported - use relationship traversal (not applicable in this use case)
77
+
78
+ **AGGREGATES:**
79
+ - NO COUNT(*) - use COUNT(Id) instead
80
+ SQL: SELECT COUNT(*) FROM Account
81
+ SOQL: SELECT COUNT(Id) FROM Account
82
+ - Cannot mix aggregate functions with non-aggregate fields unless using GROUP BY
83
+ CORRECT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
84
+ CORRECT: SELECT COUNT(Id) FROM Account
85
+ INCORRECT: SELECT Industry, Name, COUNT(Id) FROM Account
86
+ - NO GROUP_CONCAT or string aggregation functions
87
+ SQL: SELECT GROUP_CONCAT(Name) FROM Account
88
+ SOQL: Not supported
89
+ - NO HAVING clause
90
+ SQL: SELECT Industry, COUNT(*) FROM Account GROUP BY Industry HAVING COUNT(*) > 5
91
+ SOQL: Not supported - filter with separate logic
92
+ - GROUP BY has limited field type support
93
+ CORRECT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
94
+ INCORRECT: SELECT Description, COUNT(Id) FROM Account GROUP BY Description (textarea fields not supported)
95
+
96
+ **FUNCTIONS:**
97
+ - Date functions: CALENDAR_MONTH(), CALENDAR_YEAR(), CALENDAR_QUARTER(), DAY_IN_MONTH(), DAY_IN_WEEK(), DAY_IN_YEAR(), HOUR_IN_DAY(), WEEK_IN_MONTH(), WEEK_IN_YEAR()
98
+ CORRECT: SELECT Id, Name FROM Account WHERE CALENDAR_YEAR(CreatedDate) = 2025
99
+ CORRECT: SELECT Id, Name FROM Account WHERE CALENDAR_MONTH(CreatedDate) = 5
100
+ CORRECT: SELECT Id, Name FROM Account WHERE DAY_IN_WEEK(CreatedDate) = 2
101
+ - NO math functions: ROUND, FLOOR, CEILING, ABS, etc.
102
+ SQL: SELECT ROUND(AnnualRevenue, 2) FROM Account
103
+ SOQL: Not supported
104
+ - NO conditional functions: CASE WHEN, COALESCE, NULLIF, etc.
105
+ SQL: SELECT CASE WHEN Industry = 'Technology' THEN 'Tech' ELSE 'Other' END FROM Account
106
+ SOQL: Not supported
107
+ - NO string functions except INCLUDES/EXCLUDES for multi-select picklists
108
+ SQL: SELECT UPPER(Name) FROM Account
109
+ SOQL: Not supported
110
+
111
+ **OPERATORS:**
112
+ - Supported: =, !=, <, >, <=, >=, LIKE, IN, NOT IN, INCLUDES, EXCLUDES
113
+ CORRECT: WHERE Industry = 'Technology'
114
+ CORRECT: WHERE AnnualRevenue >= 1000000
115
+ CORRECT: WHERE Industry IN ('Technology', 'Finance')
116
+ CORRECT: WHERE Industry NOT IN ('Government', 'Non-Profit')
117
+ CORRECT: WHERE Services__c INCLUDES ('Consulting')
118
+ - NOT supported: REGEXP, BETWEEN, EXISTS, NOT EXISTS
119
+ SQL: WHERE Name REGEXP '^[A-Z]'
120
+ SOQL: Not supported
121
+
122
+ **SORTING & LIMITING:**
123
+ - ORDER BY: Fully supported
124
+ CORRECT: SELECT Id, Name FROM Account ORDER BY Name ASC
125
+ CORRECT: SELECT Id, Name FROM Account ORDER BY CreatedDate DESC, Name ASC
126
+ CORRECT: SELECT Id, Name FROM Account ORDER BY Name NULLS LAST
127
+ - LIMIT: Maximum 2000 records, use smaller limits for better performance
128
+ CORRECT: SELECT Id, Name FROM Account LIMIT 100
129
+ CORRECT: SELECT Id, Name FROM Account LIMIT 2000
130
+ INCORRECT: SELECT Id, Name FROM Account LIMIT 5000
131
+ - NO OFFSET: Not supported for pagination
132
+ SQL: SELECT Id, Name FROM Account LIMIT 10 OFFSET 20
133
+ SOQL: Not supported
134
+
135
+ **DATA TYPES:**
136
+ - picklist: Single-select dropdown, use = operator with string values
137
+ CORRECT: WHERE Industry = 'Technology'
138
+ CORRECT: WHERE Rating = 'Hot'
139
+ - reference: Foreign key field, typically ends with Id
140
+ CORRECT: WHERE OwnerId = '00530000003OOwn'
141
+ CORRECT: WHERE AccountId = '0013000000UzXyz'
142
+ - boolean: Use lowercase true/false
143
+ CORRECT: WHERE IsDeleted = false
144
+ CORRECT: WHERE Active__c = true
145
+ - currency: Numeric field for money values
146
+ CORRECT: WHERE AnnualRevenue > 1000000
147
+ CORRECT: WHERE AnnualRevenue >= 500000.50
148
+ - date: Date only, use YYYY-MM-DD format
149
+ CORRECT: WHERE LastActivityDate = 2025-05-28
150
+ CORRECT: WHERE SLAExpirationDate__c >= 2025-01-01
151
+ - datetime: Date and time, use YYYY-MM-DDThh:mm:ssZ format
152
+ CORRECT: WHERE CreatedDate >= 2025-05-28T10:30:00Z
153
+ CORRECT: WHERE LastModifiedDate = 2025-05-28T00:00:00Z
154
+ - double/int: Numeric fields
155
+ CORRECT: WHERE NumberOfEmployees > 100
156
+ CORRECT: WHERE NumberofLocations__c >= 5.5
157
+ - string/textarea: Text fields, use single quotes
158
+ CORRECT: WHERE Name = 'Acme Corporation'
159
+ CORRECT: WHERE Description = 'Leading tech company'
160
+ - phone/url/email: Specialized string fields, treat as strings
161
+ CORRECT: WHERE Phone = '555-1234'
162
+ CORRECT: WHERE Website = 'https://example.com'
163
+
164
+ **COMMON MISTAKES TO AVOID:**
165
+ - Using SELECT * (not allowed)
166
+ WRONG: SELECT * FROM Account
167
+ RIGHT: SELECT Id, Name, Industry FROM Account
168
+ - Quoting date literals (dates must be unquoted)
169
+ WRONG: WHERE CreatedDate >= '2025-01-01'
170
+ RIGHT: WHERE CreatedDate >= 2025-01-01
171
+ - Using SQL JOIN syntax (not supported)
172
+ WRONG: SELECT Account.Name FROM Account JOIN Contact ON Account.Id = Contact.AccountId
173
+ RIGHT: Use relationship traversal (not applicable in this use case)
174
+ - Using BETWEEN operator (not supported)
175
+ WRONG: WHERE CreatedDate BETWEEN 2025-01-01 AND 2025-12-31
176
+ RIGHT: WHERE CreatedDate >= 2025-01-01 AND CreatedDate <= 2025-12-31
177
+ - Using uppercase TRUE/FALSE/NULL (must be lowercase)
178
+ WRONG: WHERE Active__c = TRUE
179
+ RIGHT: WHERE Active__c = true
180
+ - Using underscore _ in LIKE patterns (only % supported)
181
+ WRONG: WHERE Name LIKE 'A_me%'
182
+ RIGHT: WHERE Name LIKE 'A%me%'
183
+ - Mixing aggregate and non-aggregate fields without GROUP BY
184
+ WRONG: SELECT Name, COUNT(Id) FROM Account
185
+ RIGHT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
186
+
187
+ **EXAMPLE QUERIES:**
188
+ - Basic selection: SELECT Id, Name, Industry FROM Account WHERE Industry = 'Technology'
189
+ - Date filtering: SELECT Id, Name FROM Account WHERE CreatedDate >= 2025-01-01
190
+ - Multiple conditions: SELECT Id, Name FROM Account WHERE Name LIKE '%Corp%' AND Industry IN ('Technology', 'Finance')
191
+ - Aggregation: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
192
+ - Boolean and numeric: SELECT Id, Name FROM Account WHERE Active__c = true AND NumberOfEmployees > 100
193
+ - Date functions: SELECT Id, Name FROM Account WHERE CALENDAR_YEAR(CreatedDate) = 2025
194
+ - Null checks: SELECT Id, Name FROM Account WHERE ParentId = null
195
+ - Multi-select picklist: SELECT Id, Name FROM Account WHERE Services__c INCLUDES ('Consulting;Support')
196
+ - Sorting and limiting: SELECT Id, Name FROM Account ORDER BY Name ASC LIMIT 50
197
+
198
+ ***EXECUTION INSTRUCTIONS. IMPORTANT!***
199
+ After generating the core SOQL (and nothing else), always make sure you wrap it exactly as:
200
+
201
+ SELECT *
202
+ FROM {integration_name}(
203
+ /* your generated SOQL goes here, without a trailing semicolon */
204
+ )
205
+
206
+ Return only that wrapper call.
207
+
208
+ """