MindsDB 25.7.3.0__py3-none-any.whl → 25.7.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (61) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/common/server/server.py +16 -6
  3. mindsdb/api/executor/command_executor.py +206 -135
  4. mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
  5. mindsdb/api/executor/planner/plan_join.py +3 -0
  6. mindsdb/api/executor/planner/plan_join_ts.py +117 -100
  7. mindsdb/api/executor/planner/query_planner.py +1 -0
  8. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
  9. mindsdb/api/http/initialize.py +16 -43
  10. mindsdb/api/http/namespaces/agents.py +23 -20
  11. mindsdb/api/http/namespaces/chatbots.py +83 -120
  12. mindsdb/api/http/namespaces/file.py +1 -1
  13. mindsdb/api/http/namespaces/jobs.py +38 -60
  14. mindsdb/api/http/namespaces/tree.py +69 -61
  15. mindsdb/api/mcp/start.py +2 -0
  16. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
  17. mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
  18. mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
  19. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
  20. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
  21. mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
  22. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
  23. mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
  24. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -76
  25. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  26. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +5 -2
  27. mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
  28. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
  29. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
  30. mindsdb/integrations/handlers/salesforce_handler/constants.py +208 -0
  31. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +141 -80
  32. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +0 -1
  33. mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
  34. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
  35. mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
  36. mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
  37. mindsdb/integrations/utilities/handler_utils.py +32 -12
  38. mindsdb/interfaces/agents/agents_controller.py +167 -108
  39. mindsdb/interfaces/agents/langchain_agent.py +10 -3
  40. mindsdb/interfaces/data_catalog/data_catalog_loader.py +4 -4
  41. mindsdb/interfaces/database/database.py +38 -13
  42. mindsdb/interfaces/database/integrations.py +20 -5
  43. mindsdb/interfaces/database/projects.py +63 -16
  44. mindsdb/interfaces/database/views.py +86 -60
  45. mindsdb/interfaces/jobs/jobs_controller.py +103 -110
  46. mindsdb/interfaces/knowledge_base/controller.py +26 -5
  47. mindsdb/interfaces/knowledge_base/evaluate.py +2 -1
  48. mindsdb/interfaces/knowledge_base/executor.py +24 -0
  49. mindsdb/interfaces/query_context/context_controller.py +100 -133
  50. mindsdb/interfaces/skills/skills_controller.py +18 -6
  51. mindsdb/interfaces/storage/db.py +40 -6
  52. mindsdb/interfaces/variables/variables_controller.py +8 -15
  53. mindsdb/utilities/config.py +3 -3
  54. mindsdb/utilities/functions.py +72 -60
  55. mindsdb/utilities/log.py +38 -6
  56. mindsdb/utilities/ps.py +7 -7
  57. {mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/METADATA +246 -247
  58. {mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/RECORD +61 -60
  59. {mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/WHEEL +0 -0
  60. {mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/licenses/LICENSE +0 -0
  61. {mindsdb-25.7.3.0.dist-info → mindsdb-25.7.4.0.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,19 @@ from urllib.parse import urlparse
5
5
 
6
6
  import pandas as pd
7
7
  import psycopg
8
- from mindsdb_sql_parser.ast import Parameter, Identifier, Update, BinaryOperation
8
+ from mindsdb_sql_parser.ast import (
9
+ Parameter,
10
+ Identifier,
11
+ BinaryOperation,
12
+ Tuple as AstTuple,
13
+ Constant,
14
+ Select,
15
+ OrderBy,
16
+ TypeCast,
17
+ Delete,
18
+ Update,
19
+ Function,
20
+ )
9
21
  from pgvector.psycopg import register_vector
10
22
 
11
23
  from mindsdb.integrations.handlers.postgres_handler.postgres_handler import (
@@ -17,6 +29,7 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
17
29
  VectorStoreHandler,
18
30
  DistanceFunction,
19
31
  TableField,
32
+ FilterOperator,
20
33
  )
21
34
  from mindsdb.integrations.libs.keyword_search_base import KeywordSearchBase
22
35
  from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs
@@ -169,31 +182,42 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
169
182
  embedding_condition = None
170
183
 
171
184
  for condition in conditions:
185
+ is_embedding = condition.column == "embeddings"
186
+
172
187
  parts = condition.column.split(".")
173
- key = parts[0]
188
+ key = Identifier(parts[0])
189
+
174
190
  # converts 'col.el1.el2' to col->'el1'->>'el2'
175
191
  if len(parts) > 1:
176
192
  # intermediate elements
177
193
  for el in parts[1:-1]:
178
- key += f" -> '{el}'"
194
+ key = BinaryOperation(op="->", args=[key, Constant(el)])
179
195
 
180
196
  # last element
181
- key += f" ->> '{parts[-1]}'"
197
+ key = BinaryOperation(op="->>", args=[key, Constant(parts[-1])])
182
198
 
183
199
  type_cast = None
184
- if isinstance(condition.value, int):
200
+ value = condition.value
201
+ if (
202
+ isinstance(value, list)
203
+ and len(value) > 0
204
+ and condition.op in (FilterOperator.IN, FilterOperator.NOT_IN)
205
+ ):
206
+ value = condition.value[0]
207
+
208
+ if isinstance(value, int):
185
209
  type_cast = "int"
186
- elif isinstance(condition.value, float):
210
+ elif isinstance(value, float):
187
211
  type_cast = "float"
188
212
  if type_cast is not None:
189
- key = f"({key})::{type_cast}"
213
+ key = TypeCast(type_cast, key)
190
214
 
191
215
  item = {
192
216
  "name": key,
193
217
  "op": condition.op.value,
194
218
  "value": condition.value,
195
219
  }
196
- if key == "embeddings":
220
+ if is_embedding:
197
221
  embedding_condition = item
198
222
  else:
199
223
  filter_conditions.append(item)
@@ -205,64 +229,24 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
205
229
  """
206
230
  Construct where clauses from filter conditions
207
231
  """
208
- if filter_conditions is None:
209
- return ""
210
232
 
211
- where_clauses = []
233
+ where_clause = None
212
234
 
213
235
  for item in filter_conditions:
214
236
  key = item["name"]
215
237
 
216
238
  if item["op"].lower() in ("in", "not in"):
217
- values = list(repr(i) for i in item["value"])
218
- item["value"] = "({})".format(", ".join(values))
239
+ values = [Constant(i) for i in item["value"]]
240
+ value = AstTuple(values)
219
241
  else:
220
- if item["value"] is None:
221
- item["value"] = "null"
222
- else:
223
- item["value"] = repr(item["value"])
224
- where_clauses.append(f"{key} {item['op']} {item['value']}")
225
-
226
- if len(where_clauses) > 1:
227
- return f"WHERE {' AND '.join(where_clauses)}"
228
- elif len(where_clauses) == 1:
229
- return f"WHERE {where_clauses[0]}"
230
- else:
231
- return ""
232
-
233
- @staticmethod
234
- def _construct_where_clause_with_keywords(filter_conditions=None, keyword_query=None, content_column_name=None):
235
- if not keyword_query or not content_column_name:
236
- return PgVectorHandler._construct_where_clause(filter_conditions)
237
-
238
- keyword_query_condition = (
239
- f"""to_tsvector('english', {content_column_name}) @@ websearch_to_tsquery('english', '{keyword_query}')"""
240
- )
241
- if filter_conditions is None:
242
- return ""
243
-
244
- where_clauses = []
245
-
246
- for item in filter_conditions:
247
- key = item["name"]
242
+ value = Constant(item["value"])
243
+ condition = BinaryOperation(op=item["op"], args=[key, value])
248
244
 
249
- if item["op"].lower() in ("in", "not in"):
250
- values = list(repr(i) for i in item["value"])
251
- item["value"] = "({})".format(", ".join(values))
245
+ if where_clause is None:
246
+ where_clause = condition
252
247
  else:
253
- if item["value"] is None:
254
- item["value"] = "null"
255
- else:
256
- item["value"] = repr(item["value"])
257
- where_clauses.append(f"{key} {item['op']} {item['value']}")
258
-
259
- where_clauses.append(keyword_query_condition)
260
- if len(where_clauses) > 1:
261
- return f"WHERE {' AND '.join(where_clauses)}"
262
- elif len(where_clauses) == 1:
263
- return f"WHERE {where_clauses[0]}"
264
- else:
265
- return ""
248
+ where_clause = BinaryOperation(op="AND", args=[where_clause, condition])
249
+ return where_clause
266
250
 
267
251
  @staticmethod
268
252
  def _construct_full_after_from_clause(
@@ -275,9 +259,8 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
275
259
  def _build_keyword_bm25_query(
276
260
  self,
277
261
  table_name: str,
278
- query: str,
262
+ keyword_search_args: KeywordSearchArgs,
279
263
  columns: List[str] = None,
280
- content_column_name: str = "content",
281
264
  conditions: List[FilterCondition] = None,
282
265
  limit: int = None,
283
266
  offset: int = None,
@@ -286,21 +269,44 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
286
269
  columns = ["id", "content", "metadata"]
287
270
 
288
271
  filter_conditions, _ = self._translate_conditions(conditions)
272
+ where_clause = self._construct_where_clause(filter_conditions)
289
273
 
290
- # given filter conditions, construct where clause
291
- where_clause = self._construct_where_clause_with_keywords(filter_conditions, query, content_column_name)
292
-
293
- query = f"""
294
- SELECT
295
- {", ".join(columns)},
296
- ts_rank_cd(to_tsvector('english', {content_column_name}), websearch_to_tsquery('english', '{query}')) as distance
297
- FROM
298
- {table_name}
299
- {where_clause if where_clause else ""}
300
- {f"LIMIT {limit}" if limit else ""}
301
- {f"OFFSET {offset}" if offset else ""};"""
274
+ if keyword_search_args:
275
+ keyword_query_condition = BinaryOperation(
276
+ op="@@",
277
+ args=[
278
+ Function("to_tsvector", args=[Constant("english"), Identifier(keyword_search_args.column)]),
279
+ Function("websearch_to_tsquery", args=[Constant("english"), Constant(keyword_search_args.query)]),
280
+ ],
281
+ )
302
282
 
303
- return query
283
+ if where_clause:
284
+ where_clause = BinaryOperation(op="AND", args=[where_clause, keyword_query_condition])
285
+ else:
286
+ where_clause = keyword_query_condition
287
+
288
+ distance = Function(
289
+ "ts_rank_cd",
290
+ args=[
291
+ Function("to_tsvector", args=[Constant("english"), Identifier(keyword_search_args.column)]),
292
+ Function("websearch_to_tsquery", args=[Constant("english"), Constant(keyword_search_args.query)]),
293
+ ],
294
+ alias=Identifier("distance"),
295
+ )
296
+
297
+ targets = [Identifier(col) for col in columns]
298
+ targets.append(distance)
299
+
300
+ limit_clause = Constant(limit) if limit else None
301
+ offset_clause = Constant(offset) if offset else None
302
+
303
+ return Select(
304
+ targets=targets,
305
+ from_table=Identifier(table_name),
306
+ where=where_clause,
307
+ limit=limit_clause,
308
+ offset=offset_clause,
309
+ )
304
310
 
305
311
  def _build_select_query(
306
312
  self,
@@ -309,12 +315,12 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
309
315
  conditions: List[FilterCondition] = None,
310
316
  limit: int = None,
311
317
  offset: int = None,
312
- ) -> str:
318
+ ) -> Select:
313
319
  """
314
320
  given inputs, build string query
315
321
  """
316
- limit_clause = f"LIMIT {limit}" if limit else ""
317
- offset_clause = f"OFFSET {offset}" if offset else ""
322
+ limit_clause = Constant(limit) if limit else None
323
+ offset_clause = Constant(offset) if offset else None
318
324
 
319
325
  # translate filter conditions to dictionary
320
326
  filter_conditions, embedding_search = self._translate_conditions(conditions)
@@ -335,7 +341,15 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
335
341
  modified_columns = ["id", "content", "embeddings", "metadata"]
336
342
  has_distance = True
337
343
 
338
- targets = ", ".join(modified_columns)
344
+ targets = [Identifier(col) for col in modified_columns]
345
+
346
+ query = Select(
347
+ targets=targets,
348
+ from_table=Identifier(table_name),
349
+ where=where_clause,
350
+ limit=limit_clause,
351
+ offset=offset_clause,
352
+ )
339
353
 
340
354
  if embedding_search:
341
355
  search_vector = embedding_search["value"]
@@ -352,15 +366,18 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
352
366
  if isinstance(search_vector, list):
353
367
  search_vector = f"[{','.join(str(x) for x in search_vector)}]"
354
368
 
369
+ vector_op = BinaryOperation(
370
+ op=self.distance_op,
371
+ args=[Identifier("embeddings"), Constant(search_vector)],
372
+ alias=Identifier("distance"),
373
+ )
355
374
  # Calculate distance as part of the query if needed
356
375
  if has_distance:
357
- targets = f"{targets}, (embeddings {self.distance_op} '{search_vector}') as distance"
376
+ query.targets.append(vector_op)
358
377
 
359
- return f"SELECT {targets} FROM {table_name} {where_clause} ORDER BY embeddings {self.distance_op} '{search_vector}' ASC {limit_clause} {offset_clause} "
378
+ query.order_by = [OrderBy(vector_op, direction="ASC")]
360
379
 
361
- else:
362
- # if filter conditions, return rows that satisfy the conditions
363
- return f"SELECT {targets} FROM {table_name} {where_clause} {limit_clause} {offset_clause}"
380
+ return query
364
381
 
365
382
  def _check_table(self, table_name: str):
366
383
  # Apply namespace for a user
@@ -386,8 +403,8 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
386
403
  columns = ["id", "content", "embeddings", "metadata"]
387
404
 
388
405
  query = self._build_select_query(table_name, columns, conditions, limit, offset)
389
-
390
- result = self.raw_query(query)
406
+ query_str = self.renderer.get_string(query, with_failback=True)
407
+ result = self.raw_query(query_str)
391
408
 
392
409
  # ensure embeddings are returned as string so they can be parsed by mindsdb
393
410
  if "embeddings" in columns:
@@ -408,12 +425,10 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
408
425
 
409
426
  if columns is None:
410
427
  columns = ["id", "content", "embeddings", "metadata"]
411
- content_column_name = keyword_search_args.column
412
- query = self._build_keyword_bm25_query(
413
- table_name, keyword_search_args.query, columns, content_column_name, conditions, limit, offset
414
- )
415
428
 
416
- result = self.raw_query(query)
429
+ query = self._build_keyword_bm25_query(table_name, keyword_search_args, columns, conditions, limit, offset)
430
+ query_str = self.renderer.get_string(query, with_failback=True)
431
+ result = self.raw_query(query_str)
417
432
 
418
433
  # ensure embeddings are returned as string so they can be parsed by mindsdb
419
434
  if "embeddings" in columns:
@@ -622,8 +637,9 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
622
637
  filter_conditions, _ = self._translate_conditions(conditions)
623
638
  where_clause = self._construct_where_clause(filter_conditions)
624
639
 
625
- query = f"DELETE FROM {table_name} {where_clause}"
626
- self.raw_query(query)
640
+ query = Delete(table=Identifier(table_name), where=where_clause)
641
+ query_str = self.renderer.get_string(query, with_failback=True)
642
+ self.raw_query(query_str)
627
643
 
628
644
  def drop_table(self, table_name: str, if_exists=True):
629
645
  """
@@ -1,5 +1,3 @@
1
- import csv
2
- import io
3
1
  import time
4
2
  import json
5
3
  from typing import Optional, Any
@@ -625,7 +623,7 @@ class PostgresHandler(MetaDatabaseHandler):
625
623
  result = self.native_query(query)
626
624
  return result
627
625
 
628
- def meta_get_column_statistics(self, table_names: Optional[list] = None) -> dict:
626
+ def meta_get_column_statistics(self, table_names: Optional[list] = None) -> Response:
629
627
  """
630
628
  Retrieves column statistics (e.g., most common values, frequencies, null percentage, and distinct value count)
631
629
  for the specified tables or all tables if no list is provided.
@@ -634,54 +632,58 @@ class PostgresHandler(MetaDatabaseHandler):
634
632
  table_names (list): A list of table names for which to retrieve column statistics.
635
633
 
636
634
  Returns:
637
- dict: A dictionary containing the column statistics.
635
+ Response: A response object containing the column statistics.
638
636
  """
639
- query = """
637
+ table_filter = ""
638
+ if table_names is not None and len(table_names) > 0:
639
+ quoted_names = [f"'{t}'" for t in table_names]
640
+ table_filter = f" AND ps.tablename IN ({','.join(quoted_names)})"
641
+
642
+ query = (
643
+ """
640
644
  SELECT
641
- ps.attname AS column_name,
642
- ps.tablename AS table_name,
643
- ps.most_common_vals AS most_common_values,
644
- ps.most_common_freqs::text AS most_common_frequencies,
645
- ps.null_frac * 100 AS null_percentage,
646
- ps.n_distinct AS distinct_values_count,
647
- ps.histogram_bounds AS histogram_bounds
645
+ ps.tablename AS TABLE_NAME,
646
+ ps.attname AS COLUMN_NAME,
647
+ ROUND(ps.null_frac::numeric * 100, 2) AS NULL_PERCENTAGE,
648
+ CASE
649
+ WHEN ps.n_distinct < 0 THEN NULL
650
+ ELSE ps.n_distinct::bigint
651
+ END AS DISTINCT_VALUES_COUNT,
652
+ ps.most_common_vals AS MOST_COMMON_VALUES,
653
+ ps.most_common_freqs AS MOST_COMMON_FREQUENCIES,
654
+ ps.histogram_bounds
648
655
  FROM pg_stats ps
649
656
  WHERE ps.schemaname = current_schema()
650
657
  AND ps.tablename NOT LIKE 'pg_%'
651
658
  AND ps.tablename NOT LIKE 'sql_%'
652
659
  """
653
-
654
- if table_names is not None and len(table_names) > 0:
655
- table_names = [f"'{t}'" for t in table_names]
656
- query += f" AND ps.tablename IN ({','.join(table_names)})"
660
+ + table_filter
661
+ + """
662
+ ORDER BY ps.tablename, ps.attname
663
+ """
664
+ )
657
665
 
658
666
  result = self.native_query(query)
659
- df = result.data_frame
660
667
 
661
- def parse_pg_array_string(x):
662
- try:
663
- return (
664
- [item.strip(" ,") for row in csv.reader(io.StringIO(x.strip("{}"))) for item in row if item.strip()]
665
- if x
666
- else []
667
- )
668
- except IndexError:
669
- logger.error(f"Error parsing PostgreSQL array string: {x}")
670
- return []
671
-
672
- # Convert most_common_values and most_common_frequencies from string representation to lists.
673
- df["most_common_values"] = df["most_common_values"].apply(lambda x: parse_pg_array_string(x))
674
- df["most_common_frequencies"] = df["most_common_frequencies"].apply(lambda x: parse_pg_array_string(x))
675
-
676
- # Get the minimum and maximum values from the histogram bounds.
677
- df["minimum_value"] = df["histogram_bounds"].apply(lambda x: parse_pg_array_string(x)[0] if x else None)
678
- df["maximum_value"] = df["histogram_bounds"].apply(lambda x: parse_pg_array_string(x)[-1] if x else None)
679
-
680
- # Handle cases where distinct_values_count is negative (indicating an approximation).
681
- df["distinct_values_count"] = df["distinct_values_count"].apply(lambda x: x if x >= 0 else None)
668
+ if result.type == RESPONSE_TYPE.TABLE and result.data_frame is not None:
669
+ df = result.data_frame
682
670
 
683
- result.data_frame = df.drop(columns=["histogram_bounds"])
671
+ # Extract min/max from histogram bounds
672
+ def extract_min_max(histogram_str):
673
+ if histogram_str and str(histogram_str) != "nan":
674
+ clean = str(histogram_str).strip("{}")
675
+ if clean:
676
+ values = clean.split(",")
677
+ min_val = values[0].strip(" \"'") if values else None
678
+ max_val = values[-1].strip(" \"'") if values else None
679
+ return min_val, max_val
680
+ return None, None
684
681
 
682
+ min_max_values = df["histogram_bounds"].apply(extract_min_max)
683
+ df["MINIMUM_VALUE"] = min_max_values.apply(lambda x: x[0])
684
+ df["MAXIMUM_VALUE"] = min_max_values.apply(lambda x: x[1])
685
+
686
+ result.data_frame = df.drop(columns=["histogram_bounds"])
685
687
  return result
686
688
 
687
689
  def meta_get_primary_keys(self, table_names: Optional[list] = None) -> Response:
@@ -0,0 +1,208 @@
1
+ """
2
+ Constants for Salesforce handler.
3
+ """
4
+
5
+
6
+ def get_soql_instructions(integration_name):
7
+ return f"""This handler executes SOQL (Salesforce Object Query Language), NOT SQL! Follow these rules strictly:
8
+
9
+ **BASIC STRUCTURE:**
10
+ - NO "SELECT *" - must explicitly list all fields
11
+ SQL: SELECT * FROM Account;
12
+ SOQL: SELECT Id, Name, Industry FROM Account
13
+ - NO table aliases - use full table names only
14
+ SQL: SELECT a.Name FROM Account a;
15
+ SOQL: SELECT Name FROM Account
16
+ - NO column aliases - field names cannot be aliased
17
+ SQL: SELECT Name AS CompanyName FROM Account;
18
+ SOQL: SELECT Name FROM Account
19
+ - NO DISTINCT keyword - not supported in SOQL
20
+ SQL: SELECT DISTINCT Industry FROM Account;
21
+ SOQL: Not possible - use separate logic
22
+ - NO subqueries in FROM clause - only relationship-based subqueries allowed
23
+ SQL: SELECT * FROM (SELECT Name FROM Account) AS AccountNames;
24
+ SOQL: Not supported
25
+
26
+ **FIELD SELECTION:**
27
+ - Always include Id field when querying
28
+ CORRECT: SELECT Id, Name, Industry FROM Account
29
+ INCORRECT: SELECT Name, Industry FROM Account
30
+ - Field names are case-sensitive
31
+ CORRECT: SELECT CreatedDate FROM Account
32
+ INCORRECT: SELECT createddate FROM Account
33
+ - Use exact field names from the data catalog
34
+ CORRECT: SELECT CustomerPriority__c FROM Account
35
+ INCORRECT: SELECT customer_priority FROM Account
36
+
37
+ **FILTERING (WHERE clause):**
38
+ - Date/DateTime fields: Use unquoted literals in YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ format
39
+ CORRECT: WHERE CloseDate >= 2025-05-28
40
+ CORRECT: WHERE CreatedDate >= 2025-05-28T10:30:00Z
41
+ INCORRECT: WHERE CloseDate >= '2025-05-28'
42
+ INCORRECT: WHERE CreatedDate >= "2025-05-28"
43
+ - Special date literals: TODAY, YESTERDAY, LAST_WEEK, LAST_MONTH, LAST_QUARTER, LAST_YEAR, THIS_WEEK, THIS_MONTH, THIS_QUARTER, THIS_YEAR
44
+ CORRECT: WHERE CreatedDate = TODAY
45
+ CORRECT: WHERE LastModifiedDate >= LAST_MONTH
46
+ CORRECT: WHERE CloseDate = THIS_QUARTER
47
+ - LIKE operator: Only supports % wildcard, NO underscore (_) wildcard
48
+ CORRECT: WHERE Name LIKE '%Corp%'
49
+ CORRECT: WHERE Name LIKE 'Acme%'
50
+ INCORRECT: WHERE Name LIKE 'A_me%'
51
+ - BETWEEN operator: NOT supported, use >= AND <= instead
52
+ SQL: WHERE CreatedDate BETWEEN '2025-01-01' AND '2025-12-31'
53
+ SOQL: WHERE CreatedDate >= 2025-01-01 AND CreatedDate <= 2025-12-31
54
+ - Boolean values: Use lowercase true/false, NOT TRUE/FALSE
55
+ CORRECT: WHERE Active__c = true
56
+ CORRECT: WHERE IsDeleted = false
57
+ INCORRECT: WHERE Active__c = TRUE
58
+ INCORRECT: WHERE IsDeleted = FALSE
59
+ - NULL values: Use lowercase null, NOT NULL
60
+ CORRECT: WHERE ParentId = null
61
+ CORRECT: WHERE Description != null
62
+ INCORRECT: WHERE ParentId IS NULL
63
+ INCORRECT: WHERE Description IS NOT NULL
64
+ - String values: Use single quotes for strings
65
+ CORRECT: WHERE Industry = 'Technology'
66
+ CORRECT: WHERE Name = 'Acme Corp'
67
+ INCORRECT: WHERE Industry = "Technology"
68
+ - Multi-select picklist fields: Use INCLUDES('value1;value2') or EXCLUDES('value1;value2')
69
+ CORRECT: WHERE Services__c INCLUDES ('Consulting;Support')
70
+ CORRECT: WHERE Services__c EXCLUDES ('Training')
71
+ INCORRECT: WHERE Services__c = 'Consulting'
72
+
73
+ **JOINS:**
74
+ - NO explicit JOIN syntax supported
75
+ SQL: SELECT a.Name, c.FirstName FROM Account a JOIN Contact c ON a.Id = c.AccountId
76
+ SOQL: Not supported - use relationship traversal (not applicable in this use case)
77
+
78
+ **AGGREGATES:**
79
+ - NO COUNT(*) - use COUNT(Id) instead
80
+ SQL: SELECT COUNT(*) FROM Account
81
+ SOQL: SELECT COUNT(Id) FROM Account
82
+ - Cannot mix aggregate functions with non-aggregate fields unless using GROUP BY
83
+ CORRECT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
84
+ CORRECT: SELECT COUNT(Id) FROM Account
85
+ INCORRECT: SELECT Industry, Name, COUNT(Id) FROM Account
86
+ - NO GROUP_CONCAT or string aggregation functions
87
+ SQL: SELECT GROUP_CONCAT(Name) FROM Account
88
+ SOQL: Not supported
89
+ - NO HAVING clause
90
+ SQL: SELECT Industry, COUNT(*) FROM Account GROUP BY Industry HAVING COUNT(*) > 5
91
+ SOQL: Not supported - filter with separate logic
92
+ - GROUP BY has limited field type support
93
+ CORRECT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
94
+ INCORRECT: SELECT Description, COUNT(Id) FROM Account GROUP BY Description (textarea fields not supported)
95
+
96
+ **FUNCTIONS:**
97
+ - Date functions: CALENDAR_MONTH(), CALENDAR_YEAR(), CALENDAR_QUARTER(), DAY_IN_MONTH(), DAY_IN_WEEK(), DAY_IN_YEAR(), HOUR_IN_DAY(), WEEK_IN_MONTH(), WEEK_IN_YEAR()
98
+ CORRECT: SELECT Id, Name FROM Account WHERE CALENDAR_YEAR(CreatedDate) = 2025
99
+ CORRECT: SELECT Id, Name FROM Account WHERE CALENDAR_MONTH(CreatedDate) = 5
100
+ CORRECT: SELECT Id, Name FROM Account WHERE DAY_IN_WEEK(CreatedDate) = 2
101
+ - NO math functions: ROUND, FLOOR, CEILING, ABS, etc.
102
+ SQL: SELECT ROUND(AnnualRevenue, 2) FROM Account
103
+ SOQL: Not supported
104
+ - NO conditional functions: CASE WHEN, COALESCE, NULLIF, etc.
105
+ SQL: SELECT CASE WHEN Industry = 'Technology' THEN 'Tech' ELSE 'Other' END FROM Account
106
+ SOQL: Not supported
107
+ - NO string functions except INCLUDES/EXCLUDES for multi-select picklists
108
+ SQL: SELECT UPPER(Name) FROM Account
109
+ SOQL: Not supported
110
+
111
+ **OPERATORS:**
112
+ - Supported: =, !=, <, >, <=, >=, LIKE, IN, NOT IN, INCLUDES, EXCLUDES
113
+ CORRECT: WHERE Industry = 'Technology'
114
+ CORRECT: WHERE AnnualRevenue >= 1000000
115
+ CORRECT: WHERE Industry IN ('Technology', 'Finance')
116
+ CORRECT: WHERE Industry NOT IN ('Government', 'Non-Profit')
117
+ CORRECT: WHERE Services__c INCLUDES ('Consulting')
118
+ - NOT supported: REGEXP, BETWEEN, EXISTS, NOT EXISTS
119
+ SQL: WHERE Name REGEXP '^[A-Z]'
120
+ SOQL: Not supported
121
+
122
+ **SORTING & LIMITING:**
123
+ - ORDER BY: Fully supported
124
+ CORRECT: SELECT Id, Name FROM Account ORDER BY Name ASC
125
+ CORRECT: SELECT Id, Name FROM Account ORDER BY CreatedDate DESC, Name ASC
126
+ CORRECT: SELECT Id, Name FROM Account ORDER BY Name NULLS LAST
127
+ - LIMIT: Maximum 2000 records, use smaller limits for better performance
128
+ CORRECT: SELECT Id, Name FROM Account LIMIT 100
129
+ CORRECT: SELECT Id, Name FROM Account LIMIT 2000
130
+ INCORRECT: SELECT Id, Name FROM Account LIMIT 5000
131
+ - NO OFFSET: Not supported for pagination
132
+ SQL: SELECT Id, Name FROM Account LIMIT 10 OFFSET 20
133
+ SOQL: Not supported
134
+
135
+ **DATA TYPES:**
136
+ - picklist: Single-select dropdown, use = operator with string values
137
+ CORRECT: WHERE Industry = 'Technology'
138
+ CORRECT: WHERE Rating = 'Hot'
139
+ - reference: Foreign key field, typically ends with Id
140
+ CORRECT: WHERE OwnerId = '00530000003OOwn'
141
+ CORRECT: WHERE AccountId = '0013000000UzXyz'
142
+ - boolean: Use lowercase true/false
143
+ CORRECT: WHERE IsDeleted = false
144
+ CORRECT: WHERE Active__c = true
145
+ - currency: Numeric field for money values
146
+ CORRECT: WHERE AnnualRevenue > 1000000
147
+ CORRECT: WHERE AnnualRevenue >= 500000.50
148
+ - date: Date only, use YYYY-MM-DD format
149
+ CORRECT: WHERE LastActivityDate = 2025-05-28
150
+ CORRECT: WHERE SLAExpirationDate__c >= 2025-01-01
151
+ - datetime: Date and time, use YYYY-MM-DDThh:mm:ssZ format
152
+ CORRECT: WHERE CreatedDate >= 2025-05-28T10:30:00Z
153
+ CORRECT: WHERE LastModifiedDate = 2025-05-28T00:00:00Z
154
+ - double/int: Numeric fields
155
+ CORRECT: WHERE NumberOfEmployees > 100
156
+ CORRECT: WHERE NumberofLocations__c >= 5.5
157
+ - string/textarea: Text fields, use single quotes
158
+ CORRECT: WHERE Name = 'Acme Corporation'
159
+ CORRECT: WHERE Description = 'Leading tech company'
160
+ - phone/url/email: Specialized string fields, treat as strings
161
+ CORRECT: WHERE Phone = '555-1234'
162
+ CORRECT: WHERE Website = 'https://example.com'
163
+
164
+ **COMMON MISTAKES TO AVOID:**
165
+ - Using SELECT * (not allowed)
166
+ WRONG: SELECT * FROM Account
167
+ RIGHT: SELECT Id, Name, Industry FROM Account
168
+ - Quoting date literals (dates must be unquoted)
169
+ WRONG: WHERE CreatedDate >= '2025-01-01'
170
+ RIGHT: WHERE CreatedDate >= 2025-01-01
171
+ - Using SQL JOIN syntax (not supported)
172
+ WRONG: SELECT Account.Name FROM Account JOIN Contact ON Account.Id = Contact.AccountId
173
+ RIGHT: Use relationship traversal (not applicable in this use case)
174
+ - Using BETWEEN operator (not supported)
175
+ WRONG: WHERE CreatedDate BETWEEN 2025-01-01 AND 2025-12-31
176
+ RIGHT: WHERE CreatedDate >= 2025-01-01 AND CreatedDate <= 2025-12-31
177
+ - Using uppercase TRUE/FALSE/NULL (must be lowercase)
178
+ WRONG: WHERE Active__c = TRUE
179
+ RIGHT: WHERE Active__c = true
180
+ - Using underscore _ in LIKE patterns (only % supported)
181
+ WRONG: WHERE Name LIKE 'A_me%'
182
+ RIGHT: WHERE Name LIKE 'A%me%'
183
+ - Mixing aggregate and non-aggregate fields without GROUP BY
184
+ WRONG: SELECT Name, COUNT(Id) FROM Account
185
+ RIGHT: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
186
+
187
+ **EXAMPLE QUERIES:**
188
+ - Basic selection: SELECT Id, Name, Industry FROM Account WHERE Industry = 'Technology'
189
+ - Date filtering: SELECT Id, Name FROM Account WHERE CreatedDate >= 2025-01-01
190
+ - Multiple conditions: SELECT Id, Name FROM Account WHERE Name LIKE '%Corp%' AND Industry IN ('Technology', 'Finance')
191
+ - Aggregation: SELECT Industry, COUNT(Id) FROM Account GROUP BY Industry
192
+ - Boolean and numeric: SELECT Id, Name FROM Account WHERE Active__c = true AND NumberOfEmployees > 100
193
+ - Date functions: SELECT Id, Name FROM Account WHERE CALENDAR_YEAR(CreatedDate) = 2025
194
+ - Null checks: SELECT Id, Name FROM Account WHERE ParentId = null
195
+ - Multi-select picklist: SELECT Id, Name FROM Account WHERE Services__c INCLUDES ('Consulting;Support')
196
+ - Sorting and limiting: SELECT Id, Name FROM Account ORDER BY Name ASC LIMIT 50
197
+
198
+ ***EXECUTION INSTRUCTIONS. IMPORTANT!***
199
+ After generating the core SOQL (and nothing else), always make sure you wrap it exactly as:
200
+
201
+ SELECT *
202
+ FROM {integration_name}(
203
+ /* your generated SOQL goes here, without a trailing semicolon */
204
+ )
205
+
206
+ Return only that wrapper call.
207
+
208
+ """