MindsDB 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (55) hide show
  1. {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/METADATA +254 -253
  2. {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/RECORD +55 -52
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/api/executor/__init__.py +0 -1
  5. mindsdb/api/executor/command_executor.py +2 -1
  6. mindsdb/api/executor/data_types/answer.py +1 -1
  7. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +7 -2
  8. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -1
  9. mindsdb/api/executor/sql_query/__init__.py +1 -0
  10. mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
  11. mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
  12. mindsdb/api/http/namespaces/sql.py +3 -1
  13. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
  14. mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
  15. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
  16. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
  17. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
  18. mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
  19. mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
  20. mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
  21. mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
  22. mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +9 -3
  24. mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
  25. mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
  26. mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
  27. mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +1 -1
  28. mindsdb/integrations/utilities/rag/pipelines/rag.py +73 -18
  29. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
  30. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +30 -12
  31. mindsdb/integrations/utilities/rag/settings.py +6 -2
  32. mindsdb/interfaces/agents/agents_controller.py +3 -5
  33. mindsdb/interfaces/agents/langchain_agent.py +112 -150
  34. mindsdb/interfaces/agents/langfuse_callback_handler.py +0 -37
  35. mindsdb/interfaces/agents/mindsdb_database_agent.py +15 -13
  36. mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
  37. mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
  38. mindsdb/interfaces/chatbot/memory.py +58 -13
  39. mindsdb/interfaces/database/projects.py +17 -15
  40. mindsdb/interfaces/database/views.py +12 -25
  41. mindsdb/interfaces/knowledge_base/controller.py +6 -1
  42. mindsdb/interfaces/model/functions.py +15 -4
  43. mindsdb/interfaces/model/model_controller.py +4 -7
  44. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +47 -38
  45. mindsdb/interfaces/skills/skill_tool.py +97 -53
  46. mindsdb/interfaces/skills/sql_agent.py +77 -36
  47. mindsdb/interfaces/storage/db.py +1 -1
  48. mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
  49. mindsdb/utilities/context.py +2 -1
  50. mindsdb/utilities/langfuse.py +264 -0
  51. mindsdb/utilities/partitioning.py +52 -0
  52. mindsdb/utilities/render/sqlalchemy_render.py +7 -1
  53. {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/LICENSE +0 -0
  54. {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/WHEEL +0 -0
  55. {MindsDB-25.1.3.0.dist-info → MindsDB-25.1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
1
 
2
+ from typing import Union
3
+
2
4
  from mindsdb_sql_parser.ast import Identifier, Select, BinaryOperation, Constant, OrderBy
3
5
 
4
6
  from mindsdb.interfaces.storage import db
5
-
6
-
7
7
  from .types import ChatBotMessage
8
8
 
9
9
 
@@ -60,7 +60,7 @@ class BaseMemory:
60
60
 
61
61
  # If the chat_id is a tuple, convert it to a string when storing the message in the database.
62
62
  self._add_to_history(
63
- str(chat_id) if isinstance(chat_id, tuple) else chat_id,
63
+ chat_id,
64
64
  chat_message,
65
65
  table_name=table_name
66
66
  )
@@ -74,7 +74,7 @@ class BaseMemory:
74
74
 
75
75
  else:
76
76
  history = self._get_chat_history(
77
- str(chat_id) if isinstance(chat_id, tuple) else chat_id,
77
+ chat_id,
78
78
  table_name
79
79
  )
80
80
  self._cache[key] = history
@@ -108,18 +108,44 @@ class HandlerMemory(BaseMemory):
108
108
  time_col = t_params['time_col']
109
109
  chat_id_cols = t_params['chat_id_col'] if isinstance(t_params['chat_id_col'], list) else [t_params['chat_id_col']]
110
110
 
111
- ast_query = Select(
112
- targets=[Identifier(text_col),
113
- Identifier(username_col),
114
- Identifier(time_col)],
115
- from_table=Identifier(t_params['name']),
116
- where=[BinaryOperation(
111
+ chat_id = chat_id if isinstance(chat_id, tuple) else (chat_id,)
112
+ # Add a WHERE clause for each chat_id column.
113
+ where_conditions = [
114
+ BinaryOperation(
117
115
  op='=',
118
116
  args=[
119
117
  Identifier(chat_id_col),
120
118
  Constant(chat_id[idx])
121
119
  ]
122
- ) for idx, chat_id_col in enumerate(chat_id_cols)],
120
+ ) for idx, chat_id_col in enumerate(chat_id_cols)
121
+ ]
122
+ # Add a WHERE clause to ignore holding messages from the bot.
123
+ from .chatbot_task import HOLDING_MESSAGE
124
+
125
+ where_conditions.append(
126
+ BinaryOperation(
127
+ op='!=',
128
+ args=[
129
+ Identifier(text_col),
130
+ Constant(HOLDING_MESSAGE)
131
+ ]
132
+ )
133
+ )
134
+
135
+ # Convert the WHERE conditions to a BinaryOperation object.
136
+ where_conditions_binary_operation = None
137
+ for condition in where_conditions:
138
+ if where_conditions_binary_operation is None:
139
+ where_conditions_binary_operation = condition
140
+ else:
141
+ where_conditions_binary_operation = BinaryOperation('and', args=[where_conditions_binary_operation, condition])
142
+
143
+ ast_query = Select(
144
+ targets=[Identifier(text_col),
145
+ Identifier(username_col),
146
+ Identifier(time_col)],
147
+ from_table=Identifier(t_params['name']),
148
+ where=where_conditions_binary_operation,
123
149
  order_by=[OrderBy(Identifier(time_col))],
124
150
  limit=Constant(self.MAX_DEPTH),
125
151
  )
@@ -151,9 +177,28 @@ class DBMemory(BaseMemory):
151
177
  uses mindsdb database to store messages
152
178
  '''
153
179
 
180
+ def _generate_chat_id_for_db(self, chat_id: Union[str, tuple], table_name: str = None) -> str:
181
+ """
182
+ Generate an ID for the chat to store in the database.
183
+ The ID is a string that includes the components of the chat ID and the table name (if provided) separated by underscores.
184
+
185
+ Args:
186
+ chat_id (str | tuple): The ID of the chat.
187
+ table_name (str): The name of the table the chat belongs to.
188
+ """
189
+ if isinstance(chat_id, tuple):
190
+ char_id_str = "_".join(str(val) for val in chat_id)
191
+ else:
192
+ char_id_str = str(chat_id)
193
+
194
+ if table_name:
195
+ chat_id_str = f"{table_name}_{char_id_str}"
196
+
197
+ return chat_id_str
198
+
154
199
  def _add_to_history(self, chat_id, message, table_name=None):
155
200
  chat_bot_id = self.chat_task.bot_id
156
- destination = str((chat_id, table_name)) if table_name else chat_id
201
+ destination = self._generate_chat_id_for_db(chat_id, table_name)
157
202
 
158
203
  message = db.ChatBotsHistory(
159
204
  chat_bot_id=chat_bot_id,
@@ -167,7 +212,7 @@ class DBMemory(BaseMemory):
167
212
 
168
213
  def _get_chat_history(self, chat_id, table_name=None):
169
214
  chat_bot_id = self.chat_task.bot_id
170
- destination = str((chat_id, table_name)) if table_name else chat_id
215
+ destination = self._generate_chat_id_for_db(chat_id, table_name)
171
216
 
172
217
  query = db.ChatBotsHistory.query\
173
218
  .filter(
@@ -24,19 +24,14 @@ class Project:
24
24
  p = Project()
25
25
  p.record = db_record
26
26
  p.name = db_record.name
27
- p.company_id = db_record.company_id
27
+ p.company_id = ctx.company_id
28
28
  p.id = db_record.id
29
29
  return p
30
30
 
31
31
  def create(self, name: str):
32
32
  name = name.lower()
33
- existing_record = db.Project.query.filter(
34
- (sa.func.lower(db.Project.name) == name)
35
- & (db.Project.company_id == ctx.company_id)
36
- & (db.Project.deleted_at == sa.null())
37
- ).first()
38
- if existing_record is not None:
39
- raise EntityExistsError('Project already exists', name)
33
+
34
+ company_id = ctx.company_id if ctx.company_id is not None else 0
40
35
 
41
36
  existing_record = db.Integration.query.filter(
42
37
  sa.func.lower(db.Integration.name) == name,
@@ -45,23 +40,28 @@ class Project:
45
40
  if existing_record is not None:
46
41
  raise EntityExistsError('Database exists with this name ', name)
47
42
 
43
+ existing_record = db.Project.query.filter(
44
+ (sa.func.lower(db.Project.name) == name)
45
+ & (db.Project.company_id == company_id)
46
+ & (db.Project.deleted_at == sa.null())
47
+ ).first()
48
+ if existing_record is not None:
49
+ raise EntityExistsError('Project already exists', name)
50
+
48
51
  record = db.Project(
49
52
  name=name,
50
- company_id=ctx.company_id
53
+ company_id=company_id
51
54
  )
52
55
 
53
56
  self.record = record
54
57
  self.name = name
55
- self.company_id = ctx.company_id
58
+ self.company_id = company_id
56
59
 
57
60
  db.session.add(record)
58
61
  db.session.commit()
59
62
 
60
63
  self.id = record.id
61
64
 
62
- def save(self):
63
- db.session.commit()
64
-
65
65
  def delete(self):
66
66
  tables = self.get_tables()
67
67
  tables = [key for key, val in tables.items() if val['type'] != 'table']
@@ -360,8 +360,9 @@ class ProjectController:
360
360
  pass
361
361
 
362
362
  def get_list(self) -> List[Project]:
363
+ company_id = ctx.company_id if ctx.company_id is not None else 0
363
364
  records = db.Project.query.filter(
364
- (db.Project.company_id == ctx.company_id)
365
+ (db.Project.company_id == company_id)
365
366
  & (db.Project.deleted_at == sa.null())
366
367
  ).order_by(db.Project.name)
367
368
 
@@ -371,7 +372,8 @@ class ProjectController:
371
372
  if id is not None and name is not None:
372
373
  raise ValueError("Both 'id' and 'name' is None")
373
374
 
374
- q = db.Project.query.filter_by(company_id=ctx.company_id)
375
+ company_id = ctx.company_id if ctx.company_id is not None else 0
376
+ q = db.Project.query.filter_by(company_id=company_id)
375
377
 
376
378
  if id is not None:
377
379
  q = q.filter_by(id=id)
@@ -3,6 +3,7 @@ from mindsdb.interfaces.storage import db
3
3
  from mindsdb.interfaces.query_context.context_controller import query_context_controller
4
4
  from mindsdb.utilities.context import context as ctx
5
5
  from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
6
+ from mindsdb.interfaces.model.functions import get_project_record, get_project_records
6
7
 
7
8
 
8
9
  class ViewController:
@@ -39,11 +40,8 @@ class ViewController:
39
40
 
40
41
  def update(self, name, query, project_name):
41
42
  name = name.lower()
42
- project_record = db.session.query(db.Project).filter_by(
43
- name=project_name,
44
- company_id=ctx.company_id,
45
- deleted_at=None
46
- ).first()
43
+ project_record = get_project_record(project_name)
44
+
47
45
  rec = db.session.query(db.View).filter(
48
46
  func.lower(db.View.name) == name,
49
47
  db.View.company_id == ctx.company_id,
@@ -56,11 +54,8 @@ class ViewController:
56
54
 
57
55
  def delete(self, name, project_name):
58
56
  name = name.lower()
59
- project_record = db.session.query(db.Project).filter_by(
60
- name=project_name,
61
- company_id=ctx.company_id,
62
- deleted_at=None
63
- ).first()
57
+ project_record = get_project_record(project_name)
58
+
64
59
  rec = db.session.query(db.View).filter(
65
60
  func.lower(db.View.name) == name,
66
61
  db.View.company_id == ctx.company_id,
@@ -74,17 +69,12 @@ class ViewController:
74
69
  query_context_controller.drop_query_context('view', rec.id)
75
70
 
76
71
  def list(self, project_name):
77
- query = db.session.query(db.Project).filter_by(
78
- company_id=ctx.company_id,
79
- deleted_at=None
80
- )
81
- if project_name is not None:
82
- query = query.filter_by(name=project_name)
83
72
 
84
- project_names = {
85
- i.id: i.name
86
- for i in query
87
- }
73
+ project_names = {}
74
+ for project in get_project_records():
75
+ if project_name is not None and project.name != project_name:
76
+ continue
77
+ project_names[project.id] = project.name
88
78
 
89
79
  query = db.session.query(db.View).filter(
90
80
  db.View.company_id == ctx.company_id,
@@ -112,11 +102,8 @@ class ViewController:
112
102
  }
113
103
 
114
104
  def get(self, id=None, name=None, project_name=None):
115
- project_record = db.session.query(db.Project).filter_by(
116
- name=project_name,
117
- company_id=ctx.company_id,
118
- deleted_at=None
119
- ).first()
105
+ project_record = get_project_record(project_name)
106
+
120
107
  if id is not None:
121
108
  records = db.session.query(db.View).filter_by(
122
109
  id=id,
@@ -52,6 +52,7 @@ class KnowledgeBaseTable:
52
52
  self.session = session
53
53
  self.document_preprocessor = None
54
54
  self.document_loader = None
55
+ self.model_params = None
55
56
 
56
57
  def configure_preprocessing(self, config: Optional[dict] = None):
57
58
  """Configure preprocessing for the knowledge base table"""
@@ -488,6 +489,7 @@ class KnowledgeBaseTable:
488
489
  df_out = project_datanode.predict(
489
490
  model_name=model_rec.name,
490
491
  df=df,
492
+ params=self.model_params
491
493
  )
492
494
 
493
495
  target = model_rec.to_predict[0]
@@ -859,16 +861,19 @@ class KnowledgeBaseController:
859
861
  )
860
862
  return kb
861
863
 
862
- def get_table(self, name: str, project_id: int) -> KnowledgeBaseTable:
864
+ def get_table(self, name: str, project_id: int, params: dict = None) -> KnowledgeBaseTable:
863
865
  """
864
866
  Returns kb table object with properly configured preprocessing
865
867
  :param name: table name
866
868
  :param project_id: project id
869
+ :param params: runtime parameters for KB. Keys: 'model' - parameters for embedding model
867
870
  :return: kb table object
868
871
  """
869
872
  kb = self.get(name, project_id)
870
873
  if kb is not None:
871
874
  table = KnowledgeBaseTable(kb, self.session)
875
+ if params:
876
+ table.model_params = params.get('model')
872
877
 
873
878
  # Always configure preprocessing - either from params or default
874
879
  if kb.params and 'preprocessing' in kb.params:
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Optional, List
2
2
 
3
3
  from sqlalchemy import null, func
4
4
 
@@ -41,9 +41,7 @@ def get_integration_record(name: str) -> db.Integration:
41
41
 
42
42
  @profiler.profile()
43
43
  def get_project_record(name: str) -> db.Project:
44
- company_id = ctx.company_id
45
- if company_id is None:
46
- company_id = null()
44
+ company_id = ctx.company_id if ctx.company_id is not None else 0
47
45
 
48
46
  project_record = (
49
47
  db.session.query(db.Project)
@@ -56,6 +54,19 @@ def get_project_record(name: str) -> db.Project:
56
54
  return project_record
57
55
 
58
56
 
57
+ @profiler.profile()
58
+ def get_project_records() -> List[db.Project]:
59
+ company_id = ctx.company_id if ctx.company_id is not None else 0
60
+
61
+ return (
62
+ db.session.query(db.Project)
63
+ .filter(
64
+ (db.Project.company_id == company_id)
65
+ & (db.Project.deleted_at == null())
66
+ ).all()
67
+ )
68
+
69
+
59
70
  @profiler.profile()
60
71
  def get_predictor_integration(record: db.Predictor) -> db.Integration:
61
72
  integration_record = (
@@ -7,14 +7,15 @@ from multiprocessing.pool import ThreadPool
7
7
  import pandas as pd
8
8
  from dateutil.parser import parse as parse_datetime
9
9
 
10
- from sqlalchemy import func, null
10
+ from sqlalchemy import func
11
11
  import numpy as np
12
12
 
13
13
  import mindsdb.interfaces.storage.db as db
14
14
  from mindsdb.utilities.config import Config
15
15
  from mindsdb.interfaces.model.functions import (
16
16
  get_model_record,
17
- get_model_records
17
+ get_model_records,
18
+ get_project_record
18
19
  )
19
20
  from mindsdb.interfaces.storage.json import get_json_storage
20
21
  from mindsdb.interfaces.storage.model_fs import ModelStorage
@@ -151,11 +152,7 @@ class ModelController():
151
152
  def delete_model(self, model_name: str, project_name: str = 'mindsdb', version=None):
152
153
  from mindsdb.interfaces.database.database import DatabaseController
153
154
 
154
- project_record = db.Project.query.filter(
155
- (func.lower(db.Project.name) == func.lower(project_name))
156
- & (db.Project.company_id == ctx.company_id)
157
- & (db.Project.deleted_at == null())
158
- ).first()
155
+ project_record = get_project_record(func.lower(project_name))
159
156
  if project_record is None:
160
157
  raise Exception(f"Project '{project_name}' does not exists")
161
158
 
@@ -1,4 +1,5 @@
1
1
  from typing import List
2
+ from textwrap import dedent
2
3
 
3
4
  from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
4
5
  from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool
@@ -11,7 +12,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
11
12
 
12
13
  def get_tools(self, prefix='') -> List[BaseTool]:
13
14
  """Get the tools in the toolkit."""
14
- list_sql_database_tool = ListSQLDatabaseTool(name=f'sql_db_list_tables{prefix}', db=self.db)
15
+ list_sql_database_tool = ListSQLDatabaseTool(
16
+ name=f'sql_db_list_tables{prefix}',
17
+ db=self.db,
18
+ description=(
19
+ "Input is an empty string, output is a comma-separated list of tables in the database. "
20
+ "Each table name in the list may be in one of two formats: database_name.table_name or "
21
+ "database_name.schema_name.table_name."
22
+ )
23
+ )
15
24
 
16
25
  info_sql_database_tool_description = (
17
26
  "Input: A comma-separated list of tables. Output: Schema and sample rows for those tables. "
@@ -25,43 +34,43 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
25
34
  db=self.db, description=info_sql_database_tool_description
26
35
  )
27
36
 
28
- query_sql_database_tool_description = (
29
- "Input: A detailed SQL query. Output: Database result or error message. "
30
- "For errors, rewrite and retry the query. For 'Unknown column' errors, use "
31
- f"{info_sql_database_tool.name} to check table fields. "
32
- "This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases. "
33
- "Follow these instructions with utmost precision: "
34
- "1. Query Output Format: "
35
- " - Always return results in well-formatted **Markdown tables**. "
36
- " - Ensure clarity and proper structure for easy readability. "
37
- "2. Sample Data: "
38
- " - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers. "
39
- "3. Categorical Data: "
40
- " - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first. "
41
- " - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses. "
42
- "4. Result Limiting and Counting: "
43
- " - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`. "
44
- " - If the count is greater than 10, limit the query to return only 10 results initially. "
45
- " - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results. "
46
- " - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped. "
47
- "5. Date Handling: "
48
- " - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date. "
49
- " - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..` "
50
- " - Do not compare date values without casting columns to date. "
51
- " - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples: "
52
- " SELECT NOW() + INTERVAL 5 DAY; "
53
- " SELECT NOW() - INTERVAL 3 HOUR; "
54
- " SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY; "
55
- " SELECT NOW() - INTERVAL 1 YEAR; "
56
- "6. Query Best Practices: "
57
- " - Query only necessary columns, not all. "
58
- " - Use only existing column names from correct tables. "
59
- " - Use database-specific syntax for date operations. "
60
- "7. Error Handling: "
61
- " - For errors, rewrite and retry the query. "
62
- " - For 'Unknown column' errors, check table fields using info_sql_database_tool. "
63
- "Adhere to these guidelines for all queries and responses. Ask for clarification if needed."
64
- )
37
+ query_sql_database_tool_description = dedent(f"""\
38
+ Input: A detailed SQL query.
39
+ Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
40
+ This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
41
+ Follow these instructions with utmost precision:
42
+ 1. Query Output Format:
43
+ - Always return results in well-formatted **Markdown tables**.
44
+ - Ensure clarity and proper structure for easy readability.
45
+ 2. Sample Data:
46
+ - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers.
47
+ 3. Categorical Data:
48
+ - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first.
49
+ - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses.
50
+ 4. Result Limiting and Counting:
51
+ - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`.
52
+ - If the count is greater than 10, limit the query to return only 10 results initially.
53
+ - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results.
54
+ - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
55
+ 5. Date Handling:
56
+ - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
57
+ - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
58
+ - Do not compare date values without casting columns to date.
59
+ - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
60
+ SELECT NOW() + INTERVAL 5 DAY;
61
+ SELECT NOW() - INTERVAL 3 HOUR;
62
+ SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY;
63
+ SELECT NOW() - INTERVAL 1 YEAR;
64
+ 6. Query Best Practices:
65
+ - Always send only one query at a time.
66
+ - Query only necessary columns, not all.
67
+ - Use only existing column names from correct tables.
68
+ - Use database-specific syntax for date operations.
69
+ 7. Error Handling:
70
+ - For errors, rewrite and retry the query.
71
+ - For 'Unknown column' errors, check table fields using info_sql_database_tool.
72
+ Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
73
+ """)
65
74
 
66
75
  query_sql_database_tool = QuerySQLDataBaseTool(
67
76
  name=f'sql_db_query{prefix}',