MindsDB 25.1.5.0__py3-none-any.whl → 25.1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- mindsdb/__about__.py,sha256=Hf4rpI4t4KpRan2XQZXlT2BYMNkK-Witu6DoRc7BrOM,444
1
+ mindsdb/__about__.py,sha256=yTKWTlVHwoFNow5QlIHB7ZMW57IFpBcKN7fnskXu75M,444
2
2
  mindsdb/__init__.py,sha256=fZopLiAYa9MzMZ0d48JgHc_LddfFKDzh7n_8icsjrVs,54
3
3
  mindsdb/__main__.py,sha256=VQ3RetGs34NhFRT9d76o5S3UpKxdr-G3c0138kz3f8Y,21435
4
4
  mindsdb/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -71,7 +71,7 @@ mindsdb/api/http/namespaces/default.py,sha256=r8PXn00Um2eyKB5e_Kj7fzk4e4LYH-JCzX
71
71
  mindsdb/api/http/namespaces/file.py,sha256=u6xYa_moAMb0UXWGkNtErGw9nk-FbloRuLHrLCANjoU,6644
72
72
  mindsdb/api/http/namespaces/handlers.py,sha256=zRWZvPOplwSAbKDKeQz93J38TsCQT89-GSlSug6Mtug,7911
73
73
  mindsdb/api/http/namespaces/jobs.py,sha256=Oif6biw5Bii1fboSbYbpkFJ7cZW9Ad1jpednWX14Xws,3186
74
- mindsdb/api/http/namespaces/knowledge_bases.py,sha256=bkuEaekMVpFiUTTcpqOJnxYphoEqkCDmNgth020Qszw,16564
74
+ mindsdb/api/http/namespaces/knowledge_bases.py,sha256=khPdoF5O0SQDtlGj5W0Q1gyPiuzoaq7DEgCYxpRPsXQ,16651
75
75
  mindsdb/api/http/namespaces/models.py,sha256=rCUFF02CQcF_QKeCQJcyAWIZzyyNXw-Jl-aX5lGnvBc,11240
76
76
  mindsdb/api/http/namespaces/projects.py,sha256=g2dv_f4MGy7xZRARRqpjghLGSxq_FjHx-fHqPBfRP-E,1407
77
77
  mindsdb/api/http/namespaces/skills.py,sha256=2eG5NtaqJSXQ_ex9Tus0sHA7oF4_SKOxPTdlpnz2tkk,5923
@@ -1401,7 +1401,7 @@ mindsdb/integrations/handlers/snowflake_handler/__about__.py,sha256=O2reZn6Jc5N1
1401
1401
  mindsdb/integrations/handlers/snowflake_handler/__init__.py,sha256=tPpKf8KwyX2DIgRy6XdrGgBjTf_H5G514XYH0fGFYsw,609
1402
1402
  mindsdb/integrations/handlers/snowflake_handler/connection_args.py,sha256=7pnJbHpbXMZwQbAS4U7LJUk8OWLLpPN2_q9IPr7wpec,1778
1403
1403
  mindsdb/integrations/handlers/snowflake_handler/icon.svg,sha256=Syi1A_eltgZH6HjPuKi8bi9Pzf8T879RfVAZnNzK0Qo,4088
1404
- mindsdb/integrations/handlers/snowflake_handler/requirements.txt,sha256=5r0GR-Pbs4w_Mxp6OmX83aVj7D_y7hagqozmHE_ijyE,63
1404
+ mindsdb/integrations/handlers/snowflake_handler/requirements.txt,sha256=RC9MdPLYC6oRsCC2k5sLfgxDTEuEPvqe8OpyjMYEECs,63
1405
1405
  mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py,sha256=0NpP-KVaxduKaAkb7yKA--WzdDTWhzyNrWW9BoxNF2o,11090
1406
1406
  mindsdb/integrations/handlers/snowflake_handler/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1407
1407
  mindsdb/integrations/handlers/snowflake_handler/tests/test_snowflake_handler.py,sha256=2_zTKNxqbvhzwVhU9JRmv5Chhh9rulGnMfj-GVIPA60,7369
@@ -1686,7 +1686,7 @@ mindsdb/integrations/libs/process_cache.py,sha256=Ad63SQKKVJiZemISb2RnWdjTsyKVeN
1686
1686
  mindsdb/integrations/libs/realtime_chat_handler.py,sha256=bJxlLKzYUb8tYShRUsecdubZ_E0kWxzExXK-v37gqYc,1171
1687
1687
  mindsdb/integrations/libs/response.py,sha256=iyadSLc5e7gY-rviaaoFNIrgIhDBJ-DZux062PxRRz8,3119
1688
1688
  mindsdb/integrations/libs/storage_handler.py,sha256=g4rcAD4TzmxWmEtS00235_NAnrdulIir4If6E4y_OUo,3512
1689
- mindsdb/integrations/libs/vectordatabase_handler.py,sha256=E5gYqD3e9rqspCNecxfYWwtM-itfX3kdkP7cUwUtrag,17448
1689
+ mindsdb/integrations/libs/vectordatabase_handler.py,sha256=-TcUIzSSX21DPOGh7zI0-nuumqaW0NgUCElXzouWXjg,17523
1690
1690
  mindsdb/integrations/libs/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1691
1691
  mindsdb/integrations/libs/llm/config.py,sha256=M14flGRcngP8n37sT8XLuJj5w-145B4IAyx3vLllogM,3548
1692
1692
  mindsdb/integrations/libs/llm/utils.py,sha256=vCiWWqCfmKElyyuka4Asd1UBhEZiH7YWn_xRAk3xies,24362
@@ -1770,7 +1770,7 @@ mindsdb/interfaces/agents/constants.py,sha256=VrtxjycDDsZ1z1kgVuz84yjfJicvDedeut
1770
1770
  mindsdb/interfaces/agents/langchain_agent.py,sha256=GswT0iPmQThJsoEIBOmWZG3K1eqwuLMuLQtCI36LQPg,25878
1771
1771
  mindsdb/interfaces/agents/langfuse_callback_handler.py,sha256=EIea9jsKgcGANPCZpdLe929bJy85SVA_bjdsyPiwp_g,4900
1772
1772
  mindsdb/interfaces/agents/mindsdb_chat_model.py,sha256=9e_LxCKrCSOZWqURHWavw-FQUK9PLJ5O18IGYSHD9us,6051
1773
- mindsdb/interfaces/agents/mindsdb_database_agent.py,sha256=85kDikJSld6fUg8DKcVad41fzfdoQRaN7hRG08hXBQ8,2184
1773
+ mindsdb/interfaces/agents/mindsdb_database_agent.py,sha256=lk7UyE7tK807GXLBDr4-b2VVFUUzDtpMx2GjVtywv3o,2459
1774
1774
  mindsdb/interfaces/agents/safe_output_parser.py,sha256=x2G27UPT42iVjjj44vGUVNPEUDSHH3nlKJwe3GZDh9A,1605
1775
1775
  mindsdb/interfaces/chatbot/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
1776
1776
  mindsdb/interfaces/chatbot/chatbot_controller.py,sha256=Ex-_CoZayYW3GAde0XozTL5s5M3rwWJqxt_c1uU09vg,14181
@@ -1794,7 +1794,7 @@ mindsdb/interfaces/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
1794
1794
  mindsdb/interfaces/jobs/jobs_controller.py,sha256=xBleXIpGLZ_Sg3j5e7BeTRV-Hp6ELMuFuQwtVZyQ72s,18247
1795
1795
  mindsdb/interfaces/jobs/scheduler.py,sha256=m_C-QiTExljq0ilpe4vQiQv56AIWsrtfcdo0krMYQes,3664
1796
1796
  mindsdb/interfaces/knowledge_base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1797
- mindsdb/interfaces/knowledge_base/controller.py,sha256=kCoqvJrBqrsGqmGXsF6kXEiJNPszrciUYOR3FOHxnPY,35657
1797
+ mindsdb/interfaces/knowledge_base/controller.py,sha256=2DhhWQmZXDtWuQnSEa-J4m-HEvr6fsvBZ5Chs8xS9QA,36304
1798
1798
  mindsdb/interfaces/knowledge_base/preprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1799
1799
  mindsdb/interfaces/knowledge_base/preprocessing/constants.py,sha256=0sLB2GOQhh3d46WNcVPF0iTmJc01CIXJoPT99XktuMo,295
1800
1800
  mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py,sha256=t0ilsEKWLAC0iJrWNPnZXY4DxRNQjvwv4CweeHR9u0g,5542
@@ -1808,13 +1808,13 @@ mindsdb/interfaces/query_context/context_controller.py,sha256=YAmdcSFEzd3aOr4nRF
1808
1808
  mindsdb/interfaces/query_context/last_query.py,sha256=LbZwvPtDYJFVBRonJr6RgGZyCbCNGcJJdhS22pW_YE0,9331
1809
1809
  mindsdb/interfaces/skills/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
1810
1810
  mindsdb/interfaces/skills/retrieval_tool.py,sha256=zuEEPky--GdKHa1rqd4VhM2wgtlGas5G72eFbooj-Hg,4480
1811
- mindsdb/interfaces/skills/skill_tool.py,sha256=rcs5i5SVuBEowjyCPqZk2etGHvGq2Gi89kmOt2uddSw,12567
1811
+ mindsdb/interfaces/skills/skill_tool.py,sha256=8YjAmQ8PM0dhO8pRfKuwO2Bf3bKizLARelztjbwNz4c,12925
1812
1812
  mindsdb/interfaces/skills/skills_controller.py,sha256=CUY0B_9DBCUX7LzeODrdBs4WDNRivGPTPHYcGtH7b-M,6146
1813
- mindsdb/interfaces/skills/sql_agent.py,sha256=2INHwWNzUQNYf3dWo9MQ55y5EyIe3TI-YpfucQ5ivXE,13523
1813
+ mindsdb/interfaces/skills/sql_agent.py,sha256=bZBrv2Ya-eRXvEkd4-BCXvXIzKMpTrERUmqEDh2_b7Y,14286
1814
1814
  mindsdb/interfaces/skills/custom/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1815
1815
  mindsdb/interfaces/skills/custom/text2sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1816
1816
  mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_tool.py,sha256=CDi2v2Ym3u-0nr8jq7wyf8CymWRFy_wziCov4Y9b3Iw,1253
1817
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py,sha256=G6H8u6EfCa5a6yOOeX0h5vLRb7SmEP_z4lRiDxwNtYA,6278
1817
+ mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py,sha256=xDMSe00nxdYe0m-rQM-awJnb6j5A2uR9Ve_Zx0HPMcc,7002
1818
1818
  mindsdb/interfaces/storage/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
1819
1819
  mindsdb/interfaces/storage/db.py,sha256=L-nXGVVkt4izM2VgORfCitLUg3xVup8nwLi7B9PyKCg,19351
1820
1820
  mindsdb/interfaces/storage/fs.py,sha256=4Nyo-h23UtZc2nz_LWyVzboC_e1jlU58aph1_en8MdE,21155
@@ -1929,8 +1929,8 @@ mindsdb/utilities/profiler/__init__.py,sha256=d4VXl80uSm1IotR-WwbBInPmLmACiK0Azx
1929
1929
  mindsdb/utilities/profiler/profiler.py,sha256=KCUtOupkbM_nCoof9MtiuhUzDGezx4a4NsBX6vGWbPA,3936
1930
1930
  mindsdb/utilities/render/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1931
1931
  mindsdb/utilities/render/sqlalchemy_render.py,sha256=XnG2IvB5tTF65EK-xV14HXrfGxyz2cQw7K6zEr9dclI,28287
1932
- MindsDB-25.1.5.0.dist-info/LICENSE,sha256=ziqdjujs6WDn-9g3t0SISjHCBc2pLRht3gnRbQoXmIs,5804
1933
- MindsDB-25.1.5.0.dist-info/METADATA,sha256=s852law0RiNoumZ_U3A_1MgIiwUsoutFz-QguUrawtU,42706
1934
- MindsDB-25.1.5.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1935
- MindsDB-25.1.5.0.dist-info/top_level.txt,sha256=10wPR96JDf3hM8aMP7Fz0lDlmClEP480zgXISJKr5jE,8
1936
- MindsDB-25.1.5.0.dist-info/RECORD,,
1932
+ MindsDB-25.1.5.2.dist-info/LICENSE,sha256=ziqdjujs6WDn-9g3t0SISjHCBc2pLRht3gnRbQoXmIs,5804
1933
+ MindsDB-25.1.5.2.dist-info/METADATA,sha256=nY4sr8A-xoqcA4-VUSC8qeEjPeDFKsEh-YFBWjBEZAk,42706
1934
+ MindsDB-25.1.5.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1935
+ MindsDB-25.1.5.2.dist-info/top_level.txt,sha256=10wPR96JDf3hM8aMP7Fz0lDlmClEP480zgXISJKr5jE,8
1936
+ MindsDB-25.1.5.2.dist-info/RECORD,,
mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = 'MindsDB'
2
2
  __package_name__ = 'mindsdb'
3
- __version__ = '25.1.5.0'
3
+ __version__ = '25.1.5.2'
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = 'MindsDB Inc'
@@ -105,7 +105,10 @@ class KnowledgeBasesResource(Resource):
105
105
  f'Knowledge Base with name {kb_name} already exists'
106
106
  )
107
107
 
108
- embedding_model_identifier = Identifier(parts=[knowledge_base['model']])
108
+ embedding_model_identifier = None
109
+ if knowledge_base.get('model'):
110
+ embedding_model_identifier = Identifier(parts=[knowledge_base['model']])
111
+
109
112
  storage = knowledge_base.get('storage')
110
113
  embedding_table_identifier = None
111
114
  if storage is not None:
@@ -1,2 +1,2 @@
1
- snowflake-connector-python==3.12.3
1
+ snowflake-connector-python==3.13.1
2
2
  snowflake-sqlalchemy==1.7.0
@@ -337,7 +337,7 @@ class VectorStoreHandler(BaseHandler):
337
337
  # dispatch delete
338
338
  return self.delete(table_name, conditions=conditions)
339
339
 
340
- def _dispatch_select(self, query: Select):
340
+ def dispatch_select(self, query: Select, conditions: List[FilterCondition] = None):
341
341
  """
342
342
  Dispatch select query to the appropriate method.
343
343
  """
@@ -357,7 +357,8 @@ class VectorStoreHandler(BaseHandler):
357
357
 
358
358
  # check if columns are allowed
359
359
  where_statement = query.where
360
- conditions = self._extract_conditions(where_statement)
360
+ if conditions is None:
361
+ conditions = self._extract_conditions(where_statement)
361
362
 
362
363
  # get offset and limit
363
364
  offset = query.offset.value if query.offset is not None else None
@@ -382,7 +383,7 @@ class VectorStoreHandler(BaseHandler):
382
383
  Insert: self._dispatch_insert,
383
384
  Update: self._dispatch_update,
384
385
  Delete: self._dispatch_delete,
385
- Select: self._dispatch_select,
386
+ Select: self.dispatch_select,
386
387
  }
387
388
  if type(query) in dispatch_router:
388
389
  resp = dispatch_router[type(query)](query)
@@ -11,6 +11,17 @@ from mindsdb.interfaces.skills.sql_agent import SQLAgent
11
11
  logger = log.getLogger(__name__)
12
12
 
13
13
 
14
+ def extract_essential(input: str) -> str:
15
+ """ Sometimes LLM include to input unnecessary data. We can't control stochastic nature of LLM, so we need to
16
+ 'clean' input somehow. LLM prompt contains instruction to enclose input between '$START$' and '$STOP$'.
17
+ """
18
+ if '$START$' in input:
19
+ input = input.partition('$START$')[-1]
20
+ if '$STOP$' in input:
21
+ input = input.partition('$STOP$')[0]
22
+ return input.strip(' ')
23
+
24
+
14
25
  class MindsDBSQL(SQLDatabase):
15
26
  @staticmethod
16
27
  def custom_init(
@@ -51,12 +62,9 @@ class MindsDBSQL(SQLDatabase):
51
62
 
52
63
  def get_table_info_no_throw(self, table_names: Optional[List[str]] = None) -> str:
53
64
  for i in range(len(table_names)):
54
- if '$START$' in table_names[i]:
55
- table_names[i] = table_names[i].partition('$START$')[-1]
56
- if '$END$' in table_names[i]:
57
- table_names[i] = table_names[i].partition('$END$')[0]
58
- table_names[i] = table_names[i].strip(' ')
65
+ table_names[i] = extract_essential(table_names[i])
59
66
  return self._sql_agent.get_table_info_safe(table_names)
60
67
 
61
68
  def run_no_throw(self, command: str, fetch: str = "all") -> str:
69
+ command = extract_essential(command)
62
70
  return self._sql_agent.query_safe(command)
@@ -26,6 +26,9 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
26
26
  )
27
27
  from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
28
28
  from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
29
+ from mindsdb.integrations.utilities.sql_utils import (
30
+ extract_comparison_conditions, filter_dataframe, FilterCondition, FilterOperator
31
+ )
29
32
  from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
30
33
  from mindsdb.interfaces.agents.langchain_agent import create_chat_model, get_llm_provider
31
34
  from mindsdb.interfaces.database.projects import ProjectController
@@ -101,18 +104,30 @@ class KnowledgeBaseTable:
101
104
  # Get response from vector db
102
105
  db_handler = self.get_vector_db()
103
106
  logger.debug(f"Using vector db handler: {type(db_handler)}")
104
- resp = db_handler.query(query)
105
107
 
106
- if resp.data_frame is not None:
107
- logger.debug(f"Query returned {len(resp.data_frame)} rows")
108
- logger.debug(f"Columns in response: {resp.data_frame.columns.tolist()}")
108
+ vector_filters, outer_filters = [], []
109
+ # update vector handlers, mark conditions as applied inside
110
+ for op, arg1, arg2 in extract_comparison_conditions(query.where):
111
+ condition = FilterCondition(arg1, FilterOperator(op.upper()), arg2)
112
+ if arg1 in (TableField.ID.value, TableField.CONTENT.value, TableField.EMBEDDINGS.value):
113
+ vector_filters.append(condition)
114
+ else:
115
+ outer_filters.append([op, arg1, arg2])
116
+
117
+ df = db_handler.dispatch_select(query, conditions=vector_filters)
118
+
119
+ if df is not None:
120
+ df = filter_dataframe(df, outer_filters)
121
+
122
+ logger.debug(f"Query returned {len(df)} rows")
123
+ logger.debug(f"Columns in response: {df.columns.tolist()}")
109
124
  # Log a sample of IDs to help diagnose issues
110
- if not resp.data_frame.empty:
111
- logger.debug(f"Sample of IDs in response: {resp.data_frame['id'].head().tolist()}")
125
+ if not df.empty:
126
+ logger.debug(f"Sample of IDs in response: {df['id'].head().tolist()}")
112
127
  else:
113
128
  logger.warning("Query returned no data")
114
129
 
115
- return resp.data_frame
130
+ return df
116
131
 
117
132
  def insert_files(self, file_names: List[str]):
118
133
  """Process and insert files"""
@@ -15,20 +15,25 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
15
15
  list_sql_database_tool = ListSQLDatabaseTool(
16
16
  name=f'sql_db_list_tables{prefix}',
17
17
  db=self.db,
18
- description=(
19
- "Input is an empty string, output is a comma-separated list of tables in the database. "
20
- "Each table name in the list may be in one of two formats: database_name.table_name or "
21
- "database_name.schema_name.table_name."
22
- "If the table name is enclosed in backticks marks, then always use the table name with backticks marks in subsequent queries."
23
- )
18
+ description=dedent("""\n
19
+ Input is an empty string, output is a comma-separated list of tables in the database. Each table name is escaped using backticks.
20
+ Each table name in the list may be in one of two formats: database_name.`table_name` or database_name.schema_name.`table_name`.
21
+ Table names in response to the user must be escaped using backticks.
22
+ """)
24
23
  )
25
24
 
26
25
  info_sql_database_tool_description = (
27
- "Input: A comma-separated list of tables enclosed between the symbols $START$ and $END$. Output: Schema and sample rows for those tables. "
28
- f"Ensure tables exist by calling {list_sql_database_tool.name} first. "
26
+ "Input: A comma-separated list of tables enclosed between the symbols $START$ and $STOP$. The tables names itself must be escaped using backticks.\n"
27
+ "Output: Schema and sample rows for those tables. \n"
29
28
  "Use this tool to investigate table schemas for needed columns. "
30
- "Get sample data with 'SELECT * FROM table LIMIT 3' before answering questions. "
31
- "Example Input: $START$ table1, table2, table3 $END$"
29
+ f"Ensure tables exist by calling {list_sql_database_tool.name} first. "
30
+ # "The names of tables, schemas, and databases must be escaped using backticks. "
31
+ # "Always enclose the names of tables, schemas, and databases in backticks. "
32
+ "Get sample data with 'SELECT * FROM `database`.`table` LIMIT 3' before answering questions. \n"
33
+ "Example of correct Input:\n $START$ `database`.`table1`, `database`.`table2`, `database`.`table3` $STOP$\n"
34
+ " $START$ `table1` `table2` `table3` $STOP$\n"
35
+ "Example of wrong Input:\n $START$ `database.table1`, `database.table2`, `database.table3` $STOP$\n"
36
+ " $START$ table1 table2 table3 $STOP$\n"
32
37
  )
33
38
  info_sql_database_tool = InfoSQLDatabaseTool(
34
39
  name=f'sql_db_schema{prefix}',
@@ -36,7 +41,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
36
41
  )
37
42
 
38
43
  query_sql_database_tool_description = dedent(f"""\
39
- Input: A detailed SQL query.
44
+ Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
40
45
  Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
41
46
  This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
42
47
  Follow these instructions with utmost precision:
@@ -64,6 +69,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
64
69
  SELECT NOW() - INTERVAL 1 YEAR;
65
70
  6. Query Best Practices:
66
71
  - Always send only one query at a time.
72
+ - Always enclose the names of tables, schemas, and databases in backticks.
67
73
  - The input SQL query must end with a semicolon.
68
74
  - Query only necessary columns, not all.
69
75
  - Use only existing column names from correct tables.
@@ -126,6 +126,10 @@ class SkillToolController:
126
126
 
127
127
  command_executor = self.get_command_executor()
128
128
 
129
+ def escape_table_name(name: str) -> str:
130
+ name = name.strip(' `')
131
+ return f'`{name}`'
132
+
129
133
  tables_list = []
130
134
  for skill in skills:
131
135
  database = skill.params['database']
@@ -137,19 +141,22 @@ class SkillToolController:
137
141
  else:
138
142
  response = handler.get_tables()
139
143
  # no restrictions
144
+ columns = [c.lower() for c in response.data_frame.columns]
145
+ name_idx = columns.index('table_name') if 'table_name' in columns else 0
146
+
140
147
  if 'table_schema' in response.data_frame.columns:
141
148
  for _, row in response.data_frame.iterrows():
142
- tables_list.append(f"{database}.{row['table_schema']}.{row['table_name']}")
149
+ tables_list.append(f"{database}.{row['table_schema']}.{escape_table_name(row[name_idx])}")
143
150
  else:
144
- for _, row in response.data_frame.iterrows():
145
- tables_list.append(f"{database}.{row['table_name']}")
151
+ for table_name in response.data_frame.iloc[:, name_idx]:
152
+ tables_list.append(f"{database}.{escape_table_name(table_name)}")
146
153
  continue
147
154
  for schema_name, tables in restriction_on_tables.items():
148
155
  for table in tables:
149
156
  if schema_name is None:
150
- tables_list.append(f'{database}.{table}')
157
+ tables_list.append(f'{database}.{escape_table_name(table)}')
151
158
  else:
152
- tables_list.append(f'{database}.{schema_name}.{table}')
159
+ tables_list.append(f'{database}.{schema_name}.{escape_table_name(table)}')
153
160
 
154
161
  sql_agent = SQLAgent(
155
162
  command_executor=command_executor,
@@ -1,7 +1,9 @@
1
1
 
2
2
  import re
3
+ import csv
3
4
  import inspect
4
- from typing import Iterable, List, Optional
5
+ from io import StringIO
6
+ from typing import Iterable, List, Optional, Any
5
7
 
6
8
  import pandas as pd
7
9
  from mindsdb_sql_parser import parse_sql
@@ -14,6 +16,22 @@ from mindsdb.integrations.utilities.query_traversal import query_traversal
14
16
  logger = log.getLogger(__name__)
15
17
 
16
18
 
19
+ def list_to_csv_str(array: List[List[Any]]) -> str:
20
+ """Convert a 2D array into a CSV string.
21
+
22
+ Args:
23
+ array (List[List[Any]]): A 2D array/list of values to convert to CSV format
24
+
25
+ Returns:
26
+ str: The array formatted as a CSV string using Excel dialect
27
+ """
28
+ output = StringIO()
29
+ writer = csv.writer(output, dialect='excel')
30
+ str_array = [[str(item) for item in row] for row in array]
31
+ writer.writerows(str_array)
32
+ return output.getvalue()
33
+
34
+
17
35
  def split_table_name(table_name: str) -> List[str]:
18
36
  """Split table name from llm to parst
19
37
 
@@ -24,40 +42,36 @@ def split_table_name(table_name: str) -> List[str]:
24
42
  List[str]: parts of table identifier like ['database', 'schema', 'table']
25
43
 
26
44
  Example:
27
- Input: 'aaa.bbb', Output: ['aaa', 'bbb']
28
- Input: '`aaa.bbb`', Output: ['aaa', 'bbb']
29
- Input: '`aaa.`bbb``', Output: ['aaa', 'bbb']
30
- Input: 'aaa.bbb.ccc', Output: ['aaa', 'bbb', 'ccc']
31
- Input: '`aaa.bbb.ccc`', Output: ['aaa', 'bbb', 'ccc']
32
- Input: '`aaa.`bbb.ccc``', Output: ['aaa', 'bbb.ccc']
33
- Input: 'aaa.`bbb.ccc`', Output: ['aaa', 'bbb.ccc']
34
- Input: 'aaa.`bbb.ccc`', Output: ['aaa', 'bbb.ccc']
35
- Input: '`` aaa.`bbb.ccc`` \n`', Output: ['aaa', 'bbb.ccc']
45
+ 'input': '`aaa`.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
46
+ 'input': '`aaa`.`bbb`.`ccc`', 'output': ['aaa', 'bbb', 'ccc']
47
+ 'input': 'aaa.bbb', 'output': ['aaa', 'bbb']
48
+ 'input': '`aaa.bbb`', 'output': ['aaa.bbb']
49
+ 'input': '`aaa.bbb.ccc`', 'output': ['aaa.bbb.ccc']
50
+ 'input': 'aaa.`bbb`', 'output': ['aaa', 'bbb']
51
+ 'input': 'aaa.bbb.ccc', 'output': ['aaa', 'bbb', 'ccc']
52
+ 'input': 'aaa.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
53
+ 'input': '`aaa`.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
36
54
  """
37
- table_name = table_name.strip(' "\'\n\r')
38
- while table_name.startswith('`') and table_name.endswith('`'):
39
- table_name = table_name[1:-1]
40
- table_name = table_name.strip(' "\'\n\r')
41
-
42
55
  result = []
43
- part = []
44
- inside_quotes = False
45
-
46
- for char in table_name:
47
- if char == '`':
48
- inside_quotes = not inside_quotes
49
- continue
50
-
51
- if char == '.' and not inside_quotes:
52
- result.append(''.join(part))
53
- part = []
56
+ current = ''
57
+ in_backticks = False
58
+
59
+ i = 0
60
+ while i < len(table_name):
61
+ if table_name[i] == '`':
62
+ in_backticks = not in_backticks
63
+ elif table_name[i] == '.' and not in_backticks:
64
+ if current:
65
+ result.append(current.strip('`'))
66
+ current = ''
54
67
  else:
55
- part.append(char)
68
+ current += table_name[i]
69
+ i += 1
56
70
 
57
- if part:
58
- result.append(''.join(part))
71
+ if current:
72
+ result.append(current.strip('`'))
59
73
 
60
- return [x for x in result if len(x) > 0]
74
+ return result
61
75
 
62
76
 
63
77
  class SQLAgent:
@@ -208,12 +222,15 @@ class SQLAgent:
208
222
 
209
223
  # Some LLMs (e.g. gpt-4o) may include backticks or quotes when invoking tools.
210
224
  table_parts = split_table_name(table_name)
225
+ if len(table_parts) == 1:
226
+ # most likely LLM enclosed all table name in backticks `database.table`
227
+ table_parts = split_table_name(table_name)
211
228
 
212
229
  # resolved table
213
230
  table_identifier = tables_idx.get(tuple(table_parts))
214
231
 
215
232
  if table_identifier is None:
216
- raise ValueError(f"Table {table} not found in database")
233
+ raise ValueError(f"Table {table} not found in the database")
217
234
  tables.append(table_identifier)
218
235
 
219
236
  return tables
@@ -262,8 +279,7 @@ class SQLAgent:
262
279
  dtypes.append(column.get('type', ''))
263
280
 
264
281
  info = f'Table named `{table_str}`:\n'
265
- info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str}:\n"
266
- info += "\t".join([field for field in fields])
282
+ info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str} in CSV format (dialect is 'excel'):\n"
267
283
  info += self._get_sample_rows(table_str, fields) + "\n"
268
284
  info += '\nColumn data types: ' + ",\t".join(
269
285
  [f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n' # noqa
@@ -274,9 +290,14 @@ class SQLAgent:
274
290
  try:
275
291
  ret = self._call_engine(command)
276
292
  sample_rows = ret.data.to_lists()
293
+
294
+ def truncate_value(val):
295
+ str_val = str(val)
296
+ return str_val if len(str_val) < 100 else (str_val[:100] + '...')
297
+
277
298
  sample_rows = list(
278
- map(lambda ls: [str(i) if len(str(i)) < 100 else str[:100] + '...' for i in ls], sample_rows))
279
- sample_rows_str = "\n" + "\n".join(["\t".join(row) for row in sample_rows])
299
+ map(lambda row: [truncate_value(value) for value in row], sample_rows))
300
+ sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
280
301
  except Exception as e:
281
302
  logger.warning(e)
282
303
  sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
@@ -294,9 +315,6 @@ class SQLAgent:
294
315
  If the statement returns no rows, an empty string is returned.
295
316
  """
296
317
 
297
- def _tidy(result: List) -> str:
298
- return '\n'.join(['\t'.join([str(value) for value in row]) for row in result])
299
-
300
318
  def _repr_result(ret):
301
319
  limit_rows = 30
302
320
 
@@ -312,16 +330,16 @@ class SQLAgent:
312
330
  res += f'First {limit_rows} rows:\n'
313
331
 
314
332
  else:
315
- res += 'Result:\n'
316
-
317
- res += _tidy(data[:limit_rows])
333
+ res += "Result in CSV format (dialect is 'excel'):\n"
334
+ res += list_to_csv_str([[col.name for col in ret.columns]] + data[:limit_rows])
318
335
  return res
319
336
 
320
337
  ret = self._call_engine(self._clean_query(command))
321
338
  if fetch == "all":
322
339
  result = _repr_result(ret.data)
323
340
  elif fetch == "one":
324
- result = _tidy(ret.data.to_lists()[0])
341
+ result = "Result in CSV format (dialect is 'excel'):\n"
342
+ result += list_to_csv_str([[col.name for col in ret.data.columns]] + [ret.data.to_lists()[0]])
325
343
  else:
326
344
  raise ValueError("Fetch parameter must be either 'one' or 'all'")
327
345
  return str(result)