MindsDB 25.7.2.0__py3-none-any.whl → 25.7.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +1 -1
- mindsdb/api/a2a/common/server/server.py +16 -6
- mindsdb/api/executor/command_executor.py +213 -137
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
- mindsdb/api/executor/planner/plan_join.py +3 -0
- mindsdb/api/executor/planner/plan_join_ts.py +117 -100
- mindsdb/api/executor/planner/query_planner.py +1 -0
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
- mindsdb/api/http/initialize.py +16 -43
- mindsdb/api/http/namespaces/agents.py +24 -21
- mindsdb/api/http/namespaces/chatbots.py +83 -120
- mindsdb/api/http/namespaces/file.py +1 -1
- mindsdb/api/http/namespaces/jobs.py +38 -60
- mindsdb/api/http/namespaces/tree.py +69 -61
- mindsdb/api/mcp/start.py +2 -0
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
- mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
- mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
- mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -76
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +16 -3
- mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
- mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
- mindsdb/integrations/handlers/salesforce_handler/constants.py +208 -0
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +142 -81
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -4
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
- mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
- mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
- mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
- mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
- mindsdb/integrations/utilities/handler_utils.py +32 -12
- mindsdb/interfaces/agents/agents_controller.py +169 -110
- mindsdb/interfaces/agents/langchain_agent.py +10 -3
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -8
- mindsdb/interfaces/database/database.py +38 -13
- mindsdb/interfaces/database/integrations.py +20 -5
- mindsdb/interfaces/database/projects.py +63 -16
- mindsdb/interfaces/database/views.py +86 -60
- mindsdb/interfaces/jobs/jobs_controller.py +103 -110
- mindsdb/interfaces/knowledge_base/controller.py +33 -5
- mindsdb/interfaces/knowledge_base/evaluate.py +53 -9
- mindsdb/interfaces/knowledge_base/executor.py +24 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
- mindsdb/interfaces/query_context/context_controller.py +100 -133
- mindsdb/interfaces/skills/skills_controller.py +18 -6
- mindsdb/interfaces/storage/db.py +40 -6
- mindsdb/interfaces/variables/variables_controller.py +8 -15
- mindsdb/utilities/config.py +3 -3
- mindsdb/utilities/functions.py +72 -60
- mindsdb/utilities/log.py +38 -6
- mindsdb/utilities/ps.py +7 -7
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/METADATA +262 -263
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/RECORD +69 -68
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,7 @@ from duckdb import HTTPException
|
|
|
7
7
|
from mindsdb_sql_parser import parse_sql
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from typing import Text, Dict, Optional
|
|
10
|
+
from botocore.client import Config
|
|
10
11
|
from botocore.exceptions import ClientError
|
|
11
12
|
|
|
12
13
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
@@ -16,7 +17,7 @@ from mindsdb.utilities import log
|
|
|
16
17
|
from mindsdb.integrations.libs.response import (
|
|
17
18
|
HandlerStatusResponse as StatusResponse,
|
|
18
19
|
HandlerResponse as Response,
|
|
19
|
-
RESPONSE_TYPE
|
|
20
|
+
RESPONSE_TYPE,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
from mindsdb.integrations.libs.api_handler import APIResource, APIHandler
|
|
@@ -26,16 +27,12 @@ logger = log.getLogger(__name__)
|
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
class ListFilesTable(APIResource):
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
conditions: List[FilterCondition] = None,
|
|
33
|
-
limit: int = None,
|
|
34
|
-
*args, **kwargs) -> pd.DataFrame:
|
|
35
|
-
|
|
30
|
+
def list(
|
|
31
|
+
self, targets: List[str] = None, conditions: List[FilterCondition] = None, limit: int = None, *args, **kwargs
|
|
32
|
+
) -> pd.DataFrame:
|
|
36
33
|
buckets = None
|
|
37
34
|
for condition in conditions:
|
|
38
|
-
if condition.column ==
|
|
35
|
+
if condition.column == "bucket":
|
|
39
36
|
if condition.op == FilterOperator.IN:
|
|
40
37
|
buckets = condition.value
|
|
41
38
|
elif condition.op == FilterOperator.EQUAL:
|
|
@@ -44,25 +41,27 @@ class ListFilesTable(APIResource):
|
|
|
44
41
|
|
|
45
42
|
data = []
|
|
46
43
|
for obj in self.handler.get_objects(limit=limit, buckets=buckets):
|
|
47
|
-
path = obj[
|
|
48
|
-
path = path.replace(
|
|
44
|
+
path = obj["Key"]
|
|
45
|
+
path = path.replace("`", "")
|
|
49
46
|
item = {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
47
|
+
"path": path,
|
|
48
|
+
"bucket": obj["Bucket"],
|
|
49
|
+
"name": path[path.rfind("/") + 1 :],
|
|
50
|
+
"extension": path[path.rfind(".") + 1 :],
|
|
54
51
|
}
|
|
55
52
|
|
|
53
|
+
if targets and "public_url" in targets:
|
|
54
|
+
item["public_url"] = self.handler.generate_sas_url(path, obj["Bucket"])
|
|
55
|
+
|
|
56
56
|
data.append(item)
|
|
57
57
|
|
|
58
58
|
return pd.DataFrame(data=data, columns=self.get_columns())
|
|
59
59
|
|
|
60
60
|
def get_columns(self) -> List[str]:
|
|
61
|
-
return ["path", "name", "extension", "bucket", "content"]
|
|
61
|
+
return ["path", "name", "extension", "bucket", "content", "public_url"]
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
class FileTable(APIResource):
|
|
65
|
-
|
|
66
65
|
def list(self, targets: List[str] = None, table_name=None, *args, **kwargs) -> pd.DataFrame:
|
|
67
66
|
return self.handler.read_as_table(table_name)
|
|
68
67
|
|
|
@@ -76,9 +75,9 @@ class S3Handler(APIHandler):
|
|
|
76
75
|
This handler handles connection and execution of the SQL statements on AWS S3.
|
|
77
76
|
"""
|
|
78
77
|
|
|
79
|
-
name =
|
|
78
|
+
name = "s3"
|
|
80
79
|
# TODO: Can other file formats be supported?
|
|
81
|
-
supported_file_formats = [
|
|
80
|
+
supported_file_formats = ["csv", "tsv", "json", "parquet"]
|
|
82
81
|
|
|
83
82
|
def __init__(self, name: Text, connection_data: Optional[Dict], **kwargs):
|
|
84
83
|
"""
|
|
@@ -96,7 +95,7 @@ class S3Handler(APIHandler):
|
|
|
96
95
|
self.connection = None
|
|
97
96
|
self.is_connected = False
|
|
98
97
|
self.thread_safe = True
|
|
99
|
-
self.bucket = self.connection_data.get(
|
|
98
|
+
self.bucket = self.connection_data.get("bucket")
|
|
100
99
|
self._regions = {}
|
|
101
100
|
|
|
102
101
|
self._files_table = ListFilesTable(self)
|
|
@@ -119,8 +118,8 @@ class S3Handler(APIHandler):
|
|
|
119
118
|
return self.connection
|
|
120
119
|
|
|
121
120
|
# Validate mandatory parameters.
|
|
122
|
-
if not all(key in self.connection_data for key in [
|
|
123
|
-
raise ValueError(
|
|
121
|
+
if not all(key in self.connection_data for key in ["aws_access_key_id", "aws_secret_access_key"]):
|
|
122
|
+
raise ValueError("Required parameters (aws_access_key_id, aws_secret_access_key) must be provided.")
|
|
124
123
|
|
|
125
124
|
# Connect to S3 and configure mandatory credentials.
|
|
126
125
|
self.connection = self._connect_boto3()
|
|
@@ -152,13 +151,13 @@ class S3Handler(APIHandler):
|
|
|
152
151
|
duckdb_conn.execute(f"SET s3_secret_access_key='{self.connection_data['aws_secret_access_key']}'")
|
|
153
152
|
|
|
154
153
|
# Configure optional parameters.
|
|
155
|
-
if
|
|
154
|
+
if "aws_session_token" in self.connection_data:
|
|
156
155
|
duckdb_conn.execute(f"SET s3_session_token='{self.connection_data['aws_session_token']}'")
|
|
157
156
|
|
|
158
157
|
# detect region for bucket
|
|
159
158
|
if bucket not in self._regions:
|
|
160
159
|
client = self.connect()
|
|
161
|
-
self._regions[bucket] = client.get_bucket_location(Bucket=bucket)[
|
|
160
|
+
self._regions[bucket] = client.get_bucket_location(Bucket=bucket)["LocationConstraint"]
|
|
162
161
|
|
|
163
162
|
region = self._regions[bucket]
|
|
164
163
|
duckdb_conn.execute(f"SET s3_region='{region}'")
|
|
@@ -177,15 +176,17 @@ class S3Handler(APIHandler):
|
|
|
177
176
|
"""
|
|
178
177
|
# Configure mandatory credentials.
|
|
179
178
|
config = {
|
|
180
|
-
|
|
181
|
-
|
|
179
|
+
"aws_access_key_id": self.connection_data["aws_access_key_id"],
|
|
180
|
+
"aws_secret_access_key": self.connection_data["aws_secret_access_key"],
|
|
182
181
|
}
|
|
183
182
|
|
|
184
183
|
# Configure optional parameters.
|
|
185
|
-
|
|
186
|
-
|
|
184
|
+
optional_parameters = ["region_name", "aws_session_token"]
|
|
185
|
+
for parameter in optional_parameters:
|
|
186
|
+
if parameter in self.connection_data:
|
|
187
|
+
config[parameter] = self.connection_data[parameter]
|
|
187
188
|
|
|
188
|
-
client = boto3.client(
|
|
189
|
+
client = boto3.client("s3", **config, config=Config(signature_version="s3v4"))
|
|
189
190
|
|
|
190
191
|
# check connection
|
|
191
192
|
if self.bucket is not None:
|
|
@@ -219,7 +220,7 @@ class S3Handler(APIHandler):
|
|
|
219
220
|
self._connect_boto3()
|
|
220
221
|
response.success = True
|
|
221
222
|
except (ClientError, ValueError) as e:
|
|
222
|
-
logger.error(f
|
|
223
|
+
logger.error(f"Error connecting to S3 with the given credentials, {e}!")
|
|
223
224
|
response.error_message = str(e)
|
|
224
225
|
|
|
225
226
|
if response.success and need_to_close:
|
|
@@ -235,8 +236,8 @@ class S3Handler(APIHandler):
|
|
|
235
236
|
return self.bucket, key
|
|
236
237
|
|
|
237
238
|
# get bucket from first part of the key
|
|
238
|
-
ar = key.split(
|
|
239
|
-
return ar[0],
|
|
239
|
+
ar = key.split("/")
|
|
240
|
+
return ar[0], "/".join(ar[1:])
|
|
240
241
|
|
|
241
242
|
def read_as_table(self, key) -> pd.DataFrame:
|
|
242
243
|
"""
|
|
@@ -245,7 +246,6 @@ class S3Handler(APIHandler):
|
|
|
245
246
|
bucket, key = self._get_bucket(key)
|
|
246
247
|
|
|
247
248
|
with self._connect_duckdb(bucket) as connection:
|
|
248
|
-
|
|
249
249
|
cursor = connection.execute(f"SELECT * FROM 's3://{bucket}/{key}'")
|
|
250
250
|
|
|
251
251
|
return cursor.fetchdf()
|
|
@@ -259,7 +259,7 @@ class S3Handler(APIHandler):
|
|
|
259
259
|
client = self.connect()
|
|
260
260
|
|
|
261
261
|
obj = client.get_object(Bucket=bucket, Key=key)
|
|
262
|
-
content = obj[
|
|
262
|
+
content = obj["Body"].read()
|
|
263
263
|
return content
|
|
264
264
|
|
|
265
265
|
def add_data_to_table(self, key, df) -> None:
|
|
@@ -277,7 +277,7 @@ class S3Handler(APIHandler):
|
|
|
277
277
|
client = self.connect()
|
|
278
278
|
client.head_object(Bucket=bucket, Key=key)
|
|
279
279
|
except ClientError as e:
|
|
280
|
-
logger.error(f
|
|
280
|
+
logger.error(f"Error querying the file {key} in the bucket {bucket}, {e}!")
|
|
281
281
|
raise e
|
|
282
282
|
|
|
283
283
|
with self._connect_duckdb(bucket) as connection:
|
|
@@ -309,31 +309,28 @@ class S3Handler(APIHandler):
|
|
|
309
309
|
if isinstance(query, Select):
|
|
310
310
|
table_name = query.from_table.parts[-1]
|
|
311
311
|
|
|
312
|
-
if table_name ==
|
|
312
|
+
if table_name == "files":
|
|
313
313
|
table = self._files_table
|
|
314
314
|
df = table.select(query)
|
|
315
315
|
|
|
316
316
|
# add content
|
|
317
317
|
has_content = False
|
|
318
318
|
for target in query.targets:
|
|
319
|
-
if isinstance(target, Identifier) and target.parts[-1].lower() ==
|
|
319
|
+
if isinstance(target, Identifier) and target.parts[-1].lower() == "content":
|
|
320
320
|
has_content = True
|
|
321
321
|
break
|
|
322
322
|
if has_content:
|
|
323
|
-
df[
|
|
323
|
+
df["content"] = df["path"].apply(self._read_as_content)
|
|
324
324
|
else:
|
|
325
|
-
extension = table_name.split(
|
|
325
|
+
extension = table_name.split(".")[-1]
|
|
326
326
|
if extension not in self.supported_file_formats:
|
|
327
|
-
logger.error(f
|
|
328
|
-
raise ValueError(f
|
|
327
|
+
logger.error(f"The file format {extension} is not supported!")
|
|
328
|
+
raise ValueError(f"The file format {extension} is not supported!")
|
|
329
329
|
|
|
330
330
|
table = FileTable(self, table_name=table_name)
|
|
331
331
|
df = table.select(query)
|
|
332
332
|
|
|
333
|
-
response = Response(
|
|
334
|
-
RESPONSE_TYPE.TABLE,
|
|
335
|
-
data_frame=df
|
|
336
|
-
)
|
|
333
|
+
response = Response(RESPONSE_TYPE.TABLE, data_frame=df)
|
|
337
334
|
elif isinstance(query, Insert):
|
|
338
335
|
table_name = query.table.parts[-1]
|
|
339
336
|
table = FileTable(self, table_name=table_name)
|
|
@@ -364,7 +361,7 @@ class S3Handler(APIHandler):
|
|
|
364
361
|
scan_buckets = [self.bucket]
|
|
365
362
|
else:
|
|
366
363
|
add_bucket_to_name = True
|
|
367
|
-
scan_buckets = [b[
|
|
364
|
+
scan_buckets = [b["Name"] for b in client.list_buckets()["Buckets"]]
|
|
368
365
|
|
|
369
366
|
objects = []
|
|
370
367
|
for bucket in scan_buckets:
|
|
@@ -372,23 +369,38 @@ class S3Handler(APIHandler):
|
|
|
372
369
|
continue
|
|
373
370
|
|
|
374
371
|
resp = client.list_objects_v2(Bucket=bucket)
|
|
375
|
-
if
|
|
372
|
+
if "Contents" not in resp:
|
|
376
373
|
continue
|
|
377
374
|
|
|
378
|
-
for obj in resp[
|
|
379
|
-
if obj.get(
|
|
375
|
+
for obj in resp["Contents"]:
|
|
376
|
+
if obj.get("StorageClass", "STANDARD") != "STANDARD":
|
|
380
377
|
continue
|
|
381
378
|
|
|
382
|
-
obj[
|
|
379
|
+
obj["Bucket"] = bucket
|
|
383
380
|
if add_bucket_to_name:
|
|
384
381
|
# bucket is part of the name
|
|
385
|
-
obj[
|
|
382
|
+
obj["Key"] = f"{bucket}/{obj['Key']}"
|
|
386
383
|
objects.append(obj)
|
|
387
384
|
if limit is not None and len(objects) >= limit:
|
|
388
385
|
break
|
|
389
386
|
|
|
390
387
|
return objects
|
|
391
388
|
|
|
389
|
+
def generate_sas_url(self, key: str, bucket: str) -> str:
|
|
390
|
+
"""
|
|
391
|
+
Generates a pre-signed URL for accessing an object in the S3 bucket.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
key (str): The key (path) of the object in the S3 bucket.
|
|
395
|
+
bucket (str): The name of the S3 bucket.
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
str: The pre-signed URL for accessing the object.
|
|
399
|
+
"""
|
|
400
|
+
client = self.connect()
|
|
401
|
+
url = client.generate_presigned_url("get_object", Params={"Bucket": bucket, "Key": key}, ExpiresIn=3600)
|
|
402
|
+
return url
|
|
403
|
+
|
|
392
404
|
def get_tables(self) -> Response:
|
|
393
405
|
"""
|
|
394
406
|
Retrieves a list of tables (objects) in the S3 bucket.
|
|
@@ -402,21 +414,13 @@ class S3Handler(APIHandler):
|
|
|
402
414
|
# Get only the supported file formats.
|
|
403
415
|
# Wrap the object names with backticks to prevent SQL syntax errors.
|
|
404
416
|
supported_names = [
|
|
405
|
-
f"`{obj['Key']}`"
|
|
406
|
-
for obj in self.get_objects()
|
|
407
|
-
if obj['Key'].split('.')[-1] in self.supported_file_formats
|
|
417
|
+
f"`{obj['Key']}`" for obj in self.get_objects() if obj["Key"].split(".")[-1] in self.supported_file_formats
|
|
408
418
|
]
|
|
409
419
|
|
|
410
420
|
# virtual table with list of files
|
|
411
|
-
supported_names.insert(0,
|
|
421
|
+
supported_names.insert(0, "files")
|
|
412
422
|
|
|
413
|
-
response = Response(
|
|
414
|
-
RESPONSE_TYPE.TABLE,
|
|
415
|
-
data_frame=pd.DataFrame(
|
|
416
|
-
supported_names,
|
|
417
|
-
columns=['table_name']
|
|
418
|
-
)
|
|
419
|
-
)
|
|
423
|
+
response = Response(RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame(supported_names, columns=["table_name"]))
|
|
420
424
|
|
|
421
425
|
return response
|
|
422
426
|
|
|
@@ -433,11 +437,7 @@ class S3Handler(APIHandler):
|
|
|
433
437
|
Returns:
|
|
434
438
|
Response: A response object containing the column details, formatted as per the `Response` class.
|
|
435
439
|
"""
|
|
436
|
-
query = Select(
|
|
437
|
-
targets=[Star()],
|
|
438
|
-
from_table=Identifier(parts=[table_name]),
|
|
439
|
-
limit=Constant(1)
|
|
440
|
-
)
|
|
440
|
+
query = Select(targets=[Star()], from_table=Identifier(parts=[table_name]), limit=Constant(1))
|
|
441
441
|
|
|
442
442
|
result = self.query(query)
|
|
443
443
|
|
|
@@ -445,10 +445,12 @@ class S3Handler(APIHandler):
|
|
|
445
445
|
RESPONSE_TYPE.TABLE,
|
|
446
446
|
data_frame=pd.DataFrame(
|
|
447
447
|
{
|
|
448
|
-
|
|
449
|
-
|
|
448
|
+
"column_name": result.data_frame.columns,
|
|
449
|
+
"data_type": [
|
|
450
|
+
data_type if data_type != "object" else "string" for data_type in result.data_frame.dtypes
|
|
451
|
+
],
|
|
450
452
|
}
|
|
451
|
-
)
|
|
453
|
+
),
|
|
452
454
|
)
|
|
453
455
|
|
|
454
456
|
return response
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Constants for Salesforce handler.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_soql_instructions(integration_name):
|
|
7
|
+
return f"""This handler executes SOQL (Salesforce Object Query Language), NOT SQL! Follow these rules strictly:
|
|
8
|
+
|
|
9
|
+
**BASIC STRUCTURE:**
|
|
10
|
+
- NO "SELECT *" - must explicitly list all fields
|
|
11
|
+
SQL: SELECT * FROM Account;
|
|
12
|
+
SOQL: SELECT Id, Name, Industry FROM Account
|
|
13
|
+
- NO table aliases - use full table names only
|
|
14
|
+
SQL: SELECT a.Name FROM Account a;
|
|
15
|
+
SOQL: SELECT Name FROM Account
|
|
16
|
+
- NO column aliases - field names cannot be aliased
|
|
17
|
+
SQL: SELECT Name AS CompanyName FROM Account;
|
|
18
|
+
SOQL: SELECT Name FROM Account
|
|
19
|
+
- NO DISTINCT keyword - not supported in SOQL
|
|
20
|
+
SQL: SELECT DISTINCT Industry FROM Account;
|
|
21
|
+
SOQL: Not possible - use separate logic
|
|
22
|
+
- NO subqueries in FROM clause - only relationship-based subqueries allowed
|
|
23
|
+
SQL: SELECT * FROM (SELECT Name FROM Account) AS AccountNames;
|
|
24
|
+
SOQL: Not supported
|
|
25
|
+
|
|
26
|
+
**FIELD SELECTION:**
|
|
27
|
+
- Always include Id field when querying
|
|
28
|
+
CORRECT: SELECT Id, Name, Industry FROM Account
|
|
29
|
+
INCORRECT: SELECT Name, Industry FROM Account
|
|
30
|
+
- Field names are case-sensitive
|
|
31
|
+
CORRECT: SELECT CreatedDate FROM Account
|
|
32
|
+
INCORRECT: SELECT createddate FROM Account
|
|
33
|
+
- Use exact field names from the data catalog
|
|
34
|
+
CORRECT: SELECT CustomerPriority__c FROM Account
|
|
35
|
+
INCORRECT: SELECT customer_priority FROM Account
|
|
36
|
+
|
|
37
|
+
**FILTERING (WHERE clause):**
|
|
38
|
+
- Date/DateTime fields: Use unquoted literals in YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ format
|
|
39
|
+
CORRECT: WHERE CloseDate >= 2025-05-28
|
|
40
|
+
CORRECT: WHERE CreatedDate >= 2025-05-28T10:30:00Z
|
|
41
|
+
INCORRECT: WHERE CloseDate >= '2025-05-28'
|
|
42
|
+
INCORRECT: WHERE CreatedDate >= "2025-05-28"
|
|
43
|
+
- Special date literals: TODAY, YESTERDAY, LAST_WEEK, LAST_MONTH, LAST_QUARTER, LAST_YEAR, THIS_WEEK, THIS_MONTH, THIS_QUARTER, THIS_YEAR
|
|
44
|
+
CORRECT: WHERE CreatedDate = TODAY
|
|
45
|
+
CORRECT: WHERE LastModifiedDate >= LAST_MONTH
|
|
46
|
+
CORRECT: WHERE CloseDate = THIS_QUARTER
|
|
47
|
+
- LIKE operator: Only supports % wildcard, NO underscore (_) wildcard
|
|
48
|
+
CORRECT: WHERE Name LIKE '%Corp%'
|
|
49
|
+
CORRECT: WHERE Name LIKE 'Acme%'
|
|
50
|
+
INCORRECT: WHERE Name LIKE 'A_me%'
|
|
51
|
+
- BETWEEN operator: NOT supported, use >= AND <= instead
|
|
52
|
+
SQL: WHERE CreatedDate BETWEEN '2025-01-01' AND '2025-12-31'
|
|
53
|
+
SOQL: WHERE CreatedDate >= 2025-01-01 AND CreatedDate <= 2025-12-31
|
|
54
|
+
- Boolean values: Use lowercase true/false, NOT TRUE/FALSE
|
|
55
|
+
CORRECT: WHERE Active__c = true
|
|
56
|
+
CORRECT: WHERE IsDeleted = false
|
|
57
|
+
INCORRECT: WHERE Active__c = TRUE
|
|
58
|
+
INCORRECT: WHERE IsDeleted = FALSE
|
|
59
|
+
- NULL values: Use lowercase null, NOT NULL
|
|
60
|
+
CORRECT: WHERE ParentId = null
|
|
61
|
+
CORRECT: WHERE Description != null
|
|
62
|
+
INCORRECT: WHERE ParentId IS NULL
|
|
63
|
+
INCORRECT: WHERE Description IS NOT NULL
|
|
64
|
+
- String values: Use single quotes for strings
|
|
65
|
+
CORRECT: WHERE Industry = 'Technology'
|
|
66
|
+
CORRECT: WHERE Name = 'Acme Corp'
|
|
67
|
+
INCORRECT: WHERE Industry = "Technology"
|
|
68
|
+
- Multi-select picklist fields: Use INCLUDES('value1;value2') or EXCLUDES('value1;value2')
|
|
69
|
+
CORRECT: WHERE Services__c INCLUDES ('Consulting;Support')
|
|
70
|
+
CORRECT: WHERE Services__c EXCLUDES ('Training')
|
|
71
|
+
INCORRECT: WHERE Services__c = 'Consulting'
|
|
72
|
+
|
|
73
|
+
**JOINS:**
|
|
74
|
+
- NO explicit JOIN syntax supported
|
|
75
|
+
SQL: SELECT a.Name, c.FirstName FROM Account a JOIN Contact c ON a.Id = c.AccountId
|
|
76
|
+
SOQL: Not supported - use relationship traversal (not applicable in this use case)
|
|
77
|
+
|
|
78
|
+
**AGGREGATES:**
|
|
79
|
+
- NO COUNT(*) - use COUNT(Id) instead
|
|
80
|
+
SQL: SELECT COUNT(*) FROM Account
|
|
81
|
+
SOQL: SELECT COUNT(Id) FROM Account
|
|
82
|
+
- Cannot mix aggregate functions with non-aggregate fields unless using GROUP BY
|
|
83
|
+
CORRECT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
|
|
84
|
+
CORRECT: SELECT COUNT(Id) FROM Account
|
|
85
|
+
INCORRECT: SELECT Industry, Name, COUNT(Id) FROM Account
|
|
86
|
+
- NO GROUP_CONCAT or string aggregation functions
|
|
87
|
+
SQL: SELECT GROUP_CONCAT(Name) FROM Account
|
|
88
|
+
SOQL: Not supported
|
|
89
|
+
- NO HAVING clause
|
|
90
|
+
SQL: SELECT Industry, COUNT(*) FROM Account GROUP BY Industry HAVING COUNT(*) > 5
|
|
91
|
+
SOQL: Not supported - filter with separate logic
|
|
92
|
+
- GROUP BY has limited field type support
|
|
93
|
+
CORRECT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
|
|
94
|
+
INCORRECT: SELECT Description, COUNT(Id) FROM Account GROUP BY Description (textarea fields not supported)
|
|
95
|
+
|
|
96
|
+
**FUNCTIONS:**
|
|
97
|
+
- Date functions: CALENDAR_MONTH(), CALENDAR_YEAR(), CALENDAR_QUARTER(), DAY_IN_MONTH(), DAY_IN_WEEK(), DAY_IN_YEAR(), HOUR_IN_DAY(), WEEK_IN_MONTH(), WEEK_IN_YEAR()
|
|
98
|
+
CORRECT: SELECT Id, Name FROM Account WHERE CALENDAR_YEAR(CreatedDate) = 2025
|
|
99
|
+
CORRECT: SELECT Id, Name FROM Account WHERE CALENDAR_MONTH(CreatedDate) = 5
|
|
100
|
+
CORRECT: SELECT Id, Name FROM Account WHERE DAY_IN_WEEK(CreatedDate) = 2
|
|
101
|
+
- NO math functions: ROUND, FLOOR, CEILING, ABS, etc.
|
|
102
|
+
SQL: SELECT ROUND(AnnualRevenue, 2) FROM Account
|
|
103
|
+
SOQL: Not supported
|
|
104
|
+
- NO conditional functions: CASE WHEN, COALESCE, NULLIF, etc.
|
|
105
|
+
SQL: SELECT CASE WHEN Industry = 'Technology' THEN 'Tech' ELSE 'Other' END FROM Account
|
|
106
|
+
SOQL: Not supported
|
|
107
|
+
- NO string functions except INCLUDES/EXCLUDES for multi-select picklists
|
|
108
|
+
SQL: SELECT UPPER(Name) FROM Account
|
|
109
|
+
SOQL: Not supported
|
|
110
|
+
|
|
111
|
+
**OPERATORS:**
|
|
112
|
+
- Supported: =, !=, <, >, <=, >=, LIKE, IN, NOT IN, INCLUDES, EXCLUDES
|
|
113
|
+
CORRECT: WHERE Industry = 'Technology'
|
|
114
|
+
CORRECT: WHERE AnnualRevenue >= 1000000
|
|
115
|
+
CORRECT: WHERE Industry IN ('Technology', 'Finance')
|
|
116
|
+
CORRECT: WHERE Industry NOT IN ('Government', 'Non-Profit')
|
|
117
|
+
CORRECT: WHERE Services__c INCLUDES ('Consulting')
|
|
118
|
+
- NOT supported: REGEXP, BETWEEN, EXISTS, NOT EXISTS
|
|
119
|
+
SQL: WHERE Name REGEXP '^[A-Z]'
|
|
120
|
+
SOQL: Not supported
|
|
121
|
+
|
|
122
|
+
**SORTING & LIMITING:**
|
|
123
|
+
- ORDER BY: Fully supported
|
|
124
|
+
CORRECT: SELECT Id, Name FROM Account ORDER BY Name ASC
|
|
125
|
+
CORRECT: SELECT Id, Name FROM Account ORDER BY CreatedDate DESC, Name ASC
|
|
126
|
+
CORRECT: SELECT Id, Name FROM Account ORDER BY Name NULLS LAST
|
|
127
|
+
- LIMIT: Maximum 2000 records, use smaller limits for better performance
|
|
128
|
+
CORRECT: SELECT Id, Name FROM Account LIMIT 100
|
|
129
|
+
CORRECT: SELECT Id, Name FROM Account LIMIT 2000
|
|
130
|
+
INCORRECT: SELECT Id, Name FROM Account LIMIT 5000
|
|
131
|
+
- NO OFFSET: Not supported for pagination
|
|
132
|
+
SQL: SELECT Id, Name FROM Account LIMIT 10 OFFSET 20
|
|
133
|
+
SOQL: Not supported
|
|
134
|
+
|
|
135
|
+
**DATA TYPES:**
|
|
136
|
+
- picklist: Single-select dropdown, use = operator with string values
|
|
137
|
+
CORRECT: WHERE Industry = 'Technology'
|
|
138
|
+
CORRECT: WHERE Rating = 'Hot'
|
|
139
|
+
- reference: Foreign key field, typically ends with Id
|
|
140
|
+
CORRECT: WHERE OwnerId = '00530000003OOwn'
|
|
141
|
+
CORRECT: WHERE AccountId = '0013000000UzXyz'
|
|
142
|
+
- boolean: Use lowercase true/false
|
|
143
|
+
CORRECT: WHERE IsDeleted = false
|
|
144
|
+
CORRECT: WHERE Active__c = true
|
|
145
|
+
- currency: Numeric field for money values
|
|
146
|
+
CORRECT: WHERE AnnualRevenue > 1000000
|
|
147
|
+
CORRECT: WHERE AnnualRevenue >= 500000.50
|
|
148
|
+
- date: Date only, use YYYY-MM-DD format
|
|
149
|
+
CORRECT: WHERE LastActivityDate = 2025-05-28
|
|
150
|
+
CORRECT: WHERE SLAExpirationDate__c >= 2025-01-01
|
|
151
|
+
- datetime: Date and time, use YYYY-MM-DDThh:mm:ssZ format
|
|
152
|
+
CORRECT: WHERE CreatedDate >= 2025-05-28T10:30:00Z
|
|
153
|
+
CORRECT: WHERE LastModifiedDate = 2025-05-28T00:00:00Z
|
|
154
|
+
- double/int: Numeric fields
|
|
155
|
+
CORRECT: WHERE NumberOfEmployees > 100
|
|
156
|
+
CORRECT: WHERE NumberofLocations__c >= 5.5
|
|
157
|
+
- string/textarea: Text fields, use single quotes
|
|
158
|
+
CORRECT: WHERE Name = 'Acme Corporation'
|
|
159
|
+
CORRECT: WHERE Description = 'Leading tech company'
|
|
160
|
+
- phone/url/email: Specialized string fields, treat as strings
|
|
161
|
+
CORRECT: WHERE Phone = '555-1234'
|
|
162
|
+
CORRECT: WHERE Website = 'https://example.com'
|
|
163
|
+
|
|
164
|
+
**COMMON MISTAKES TO AVOID:**
|
|
165
|
+
- Using SELECT * (not allowed)
|
|
166
|
+
WRONG: SELECT * FROM Account
|
|
167
|
+
RIGHT: SELECT Id, Name, Industry FROM Account
|
|
168
|
+
- Quoting date literals (dates must be unquoted)
|
|
169
|
+
WRONG: WHERE CreatedDate >= '2025-01-01'
|
|
170
|
+
RIGHT: WHERE CreatedDate >= 2025-01-01
|
|
171
|
+
- Using SQL JOIN syntax (not supported)
|
|
172
|
+
WRONG: SELECT Account.Name FROM Account JOIN Contact ON Account.Id = Contact.AccountId
|
|
173
|
+
RIGHT: Use relationship traversal (not applicable in this use case)
|
|
174
|
+
- Using BETWEEN operator (not supported)
|
|
175
|
+
WRONG: WHERE CreatedDate BETWEEN 2025-01-01 AND 2025-12-31
|
|
176
|
+
RIGHT: WHERE CreatedDate >= 2025-01-01 AND CreatedDate <= 2025-12-31
|
|
177
|
+
- Using uppercase TRUE/FALSE/NULL (must be lowercase)
|
|
178
|
+
WRONG: WHERE Active__c = TRUE
|
|
179
|
+
RIGHT: WHERE Active__c = true
|
|
180
|
+
- Using underscore _ in LIKE patterns (only % supported)
|
|
181
|
+
WRONG: WHERE Name LIKE 'A_me%'
|
|
182
|
+
RIGHT: WHERE Name LIKE 'A%me%'
|
|
183
|
+
- Mixing aggregate and non-aggregate fields without GROUP BY
|
|
184
|
+
WRONG: SELECT Name, COUNT(Id) FROM Account
|
|
185
|
+
RIGHT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
|
|
186
|
+
|
|
187
|
+
**EXAMPLE QUERIES:**
|
|
188
|
+
- Basic selection: SELECT Id, Name, Industry FROM Account WHERE Industry = 'Technology'
|
|
189
|
+
- Date filtering: SELECT Id, Name FROM Account WHERE CreatedDate >= 2025-01-01
|
|
190
|
+
- Multiple conditions: SELECT Id, Name FROM Account WHERE Name LIKE '%Corp%' AND Industry IN ('Technology', 'Finance')
|
|
191
|
+
- Aggregation: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
|
|
192
|
+
- Boolean and numeric: SELECT Id, Name FROM Account WHERE Active__c = true AND NumberOfEmployees > 100
|
|
193
|
+
- Date functions: SELECT Id, Name FROM Account WHERE CALENDAR_YEAR(CreatedDate) = 2025
|
|
194
|
+
- Null checks: SELECT Id, Name FROM Account WHERE ParentId = null
|
|
195
|
+
- Multi-select picklist: SELECT Id, Name FROM Account WHERE Services__c INCLUDES ('Consulting;Support')
|
|
196
|
+
- Sorting and limiting: SELECT Id, Name FROM Account ORDER BY Name ASC LIMIT 50
|
|
197
|
+
|
|
198
|
+
***EXECUTION INSTRUCTIONS. IMPORTANT!***
|
|
199
|
+
After generating the core SOQL (and nothing else), always make sure you wrap it exactly as:
|
|
200
|
+
|
|
201
|
+
SELECT *
|
|
202
|
+
FROM {integration_name}(
|
|
203
|
+
/* your generated SOQL goes here, without a trailing semicolon */
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
Return only that wrapper call.
|
|
207
|
+
|
|
208
|
+
"""
|