MindsDB 25.1.5.0__py3-none-any.whl → 25.1.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.5.0.dist-info → MindsDB-25.1.5.2.dist-info}/METADATA +242 -242
- {MindsDB-25.1.5.0.dist-info → MindsDB-25.1.5.2.dist-info}/RECORD +14 -14
- mindsdb/__about__.py +1 -1
- mindsdb/api/http/namespaces/knowledge_bases.py +4 -1
- mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
- mindsdb/integrations/libs/vectordatabase_handler.py +4 -3
- mindsdb/interfaces/agents/mindsdb_database_agent.py +13 -5
- mindsdb/interfaces/knowledge_base/controller.py +22 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +17 -11
- mindsdb/interfaces/skills/skill_tool.py +12 -5
- mindsdb/interfaces/skills/sql_agent.py +60 -42
- {MindsDB-25.1.5.0.dist-info → MindsDB-25.1.5.2.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.5.0.dist-info → MindsDB-25.1.5.2.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.5.0.dist-info → MindsDB-25.1.5.2.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
mindsdb/__about__.py,sha256=
|
|
1
|
+
mindsdb/__about__.py,sha256=yTKWTlVHwoFNow5QlIHB7ZMW57IFpBcKN7fnskXu75M,444
|
|
2
2
|
mindsdb/__init__.py,sha256=fZopLiAYa9MzMZ0d48JgHc_LddfFKDzh7n_8icsjrVs,54
|
|
3
3
|
mindsdb/__main__.py,sha256=VQ3RetGs34NhFRT9d76o5S3UpKxdr-G3c0138kz3f8Y,21435
|
|
4
4
|
mindsdb/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -71,7 +71,7 @@ mindsdb/api/http/namespaces/default.py,sha256=r8PXn00Um2eyKB5e_Kj7fzk4e4LYH-JCzX
|
|
|
71
71
|
mindsdb/api/http/namespaces/file.py,sha256=u6xYa_moAMb0UXWGkNtErGw9nk-FbloRuLHrLCANjoU,6644
|
|
72
72
|
mindsdb/api/http/namespaces/handlers.py,sha256=zRWZvPOplwSAbKDKeQz93J38TsCQT89-GSlSug6Mtug,7911
|
|
73
73
|
mindsdb/api/http/namespaces/jobs.py,sha256=Oif6biw5Bii1fboSbYbpkFJ7cZW9Ad1jpednWX14Xws,3186
|
|
74
|
-
mindsdb/api/http/namespaces/knowledge_bases.py,sha256=
|
|
74
|
+
mindsdb/api/http/namespaces/knowledge_bases.py,sha256=khPdoF5O0SQDtlGj5W0Q1gyPiuzoaq7DEgCYxpRPsXQ,16651
|
|
75
75
|
mindsdb/api/http/namespaces/models.py,sha256=rCUFF02CQcF_QKeCQJcyAWIZzyyNXw-Jl-aX5lGnvBc,11240
|
|
76
76
|
mindsdb/api/http/namespaces/projects.py,sha256=g2dv_f4MGy7xZRARRqpjghLGSxq_FjHx-fHqPBfRP-E,1407
|
|
77
77
|
mindsdb/api/http/namespaces/skills.py,sha256=2eG5NtaqJSXQ_ex9Tus0sHA7oF4_SKOxPTdlpnz2tkk,5923
|
|
@@ -1401,7 +1401,7 @@ mindsdb/integrations/handlers/snowflake_handler/__about__.py,sha256=O2reZn6Jc5N1
|
|
|
1401
1401
|
mindsdb/integrations/handlers/snowflake_handler/__init__.py,sha256=tPpKf8KwyX2DIgRy6XdrGgBjTf_H5G514XYH0fGFYsw,609
|
|
1402
1402
|
mindsdb/integrations/handlers/snowflake_handler/connection_args.py,sha256=7pnJbHpbXMZwQbAS4U7LJUk8OWLLpPN2_q9IPr7wpec,1778
|
|
1403
1403
|
mindsdb/integrations/handlers/snowflake_handler/icon.svg,sha256=Syi1A_eltgZH6HjPuKi8bi9Pzf8T879RfVAZnNzK0Qo,4088
|
|
1404
|
-
mindsdb/integrations/handlers/snowflake_handler/requirements.txt,sha256=
|
|
1404
|
+
mindsdb/integrations/handlers/snowflake_handler/requirements.txt,sha256=RC9MdPLYC6oRsCC2k5sLfgxDTEuEPvqe8OpyjMYEECs,63
|
|
1405
1405
|
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py,sha256=0NpP-KVaxduKaAkb7yKA--WzdDTWhzyNrWW9BoxNF2o,11090
|
|
1406
1406
|
mindsdb/integrations/handlers/snowflake_handler/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1407
1407
|
mindsdb/integrations/handlers/snowflake_handler/tests/test_snowflake_handler.py,sha256=2_zTKNxqbvhzwVhU9JRmv5Chhh9rulGnMfj-GVIPA60,7369
|
|
@@ -1686,7 +1686,7 @@ mindsdb/integrations/libs/process_cache.py,sha256=Ad63SQKKVJiZemISb2RnWdjTsyKVeN
|
|
|
1686
1686
|
mindsdb/integrations/libs/realtime_chat_handler.py,sha256=bJxlLKzYUb8tYShRUsecdubZ_E0kWxzExXK-v37gqYc,1171
|
|
1687
1687
|
mindsdb/integrations/libs/response.py,sha256=iyadSLc5e7gY-rviaaoFNIrgIhDBJ-DZux062PxRRz8,3119
|
|
1688
1688
|
mindsdb/integrations/libs/storage_handler.py,sha256=g4rcAD4TzmxWmEtS00235_NAnrdulIir4If6E4y_OUo,3512
|
|
1689
|
-
mindsdb/integrations/libs/vectordatabase_handler.py,sha256
|
|
1689
|
+
mindsdb/integrations/libs/vectordatabase_handler.py,sha256=-TcUIzSSX21DPOGh7zI0-nuumqaW0NgUCElXzouWXjg,17523
|
|
1690
1690
|
mindsdb/integrations/libs/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1691
1691
|
mindsdb/integrations/libs/llm/config.py,sha256=M14flGRcngP8n37sT8XLuJj5w-145B4IAyx3vLllogM,3548
|
|
1692
1692
|
mindsdb/integrations/libs/llm/utils.py,sha256=vCiWWqCfmKElyyuka4Asd1UBhEZiH7YWn_xRAk3xies,24362
|
|
@@ -1770,7 +1770,7 @@ mindsdb/interfaces/agents/constants.py,sha256=VrtxjycDDsZ1z1kgVuz84yjfJicvDedeut
|
|
|
1770
1770
|
mindsdb/interfaces/agents/langchain_agent.py,sha256=GswT0iPmQThJsoEIBOmWZG3K1eqwuLMuLQtCI36LQPg,25878
|
|
1771
1771
|
mindsdb/interfaces/agents/langfuse_callback_handler.py,sha256=EIea9jsKgcGANPCZpdLe929bJy85SVA_bjdsyPiwp_g,4900
|
|
1772
1772
|
mindsdb/interfaces/agents/mindsdb_chat_model.py,sha256=9e_LxCKrCSOZWqURHWavw-FQUK9PLJ5O18IGYSHD9us,6051
|
|
1773
|
-
mindsdb/interfaces/agents/mindsdb_database_agent.py,sha256=
|
|
1773
|
+
mindsdb/interfaces/agents/mindsdb_database_agent.py,sha256=lk7UyE7tK807GXLBDr4-b2VVFUUzDtpMx2GjVtywv3o,2459
|
|
1774
1774
|
mindsdb/interfaces/agents/safe_output_parser.py,sha256=x2G27UPT42iVjjj44vGUVNPEUDSHH3nlKJwe3GZDh9A,1605
|
|
1775
1775
|
mindsdb/interfaces/chatbot/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
1776
1776
|
mindsdb/interfaces/chatbot/chatbot_controller.py,sha256=Ex-_CoZayYW3GAde0XozTL5s5M3rwWJqxt_c1uU09vg,14181
|
|
@@ -1794,7 +1794,7 @@ mindsdb/interfaces/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
|
1794
1794
|
mindsdb/interfaces/jobs/jobs_controller.py,sha256=xBleXIpGLZ_Sg3j5e7BeTRV-Hp6ELMuFuQwtVZyQ72s,18247
|
|
1795
1795
|
mindsdb/interfaces/jobs/scheduler.py,sha256=m_C-QiTExljq0ilpe4vQiQv56AIWsrtfcdo0krMYQes,3664
|
|
1796
1796
|
mindsdb/interfaces/knowledge_base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1797
|
-
mindsdb/interfaces/knowledge_base/controller.py,sha256=
|
|
1797
|
+
mindsdb/interfaces/knowledge_base/controller.py,sha256=2DhhWQmZXDtWuQnSEa-J4m-HEvr6fsvBZ5Chs8xS9QA,36304
|
|
1798
1798
|
mindsdb/interfaces/knowledge_base/preprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1799
1799
|
mindsdb/interfaces/knowledge_base/preprocessing/constants.py,sha256=0sLB2GOQhh3d46WNcVPF0iTmJc01CIXJoPT99XktuMo,295
|
|
1800
1800
|
mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py,sha256=t0ilsEKWLAC0iJrWNPnZXY4DxRNQjvwv4CweeHR9u0g,5542
|
|
@@ -1808,13 +1808,13 @@ mindsdb/interfaces/query_context/context_controller.py,sha256=YAmdcSFEzd3aOr4nRF
|
|
|
1808
1808
|
mindsdb/interfaces/query_context/last_query.py,sha256=LbZwvPtDYJFVBRonJr6RgGZyCbCNGcJJdhS22pW_YE0,9331
|
|
1809
1809
|
mindsdb/interfaces/skills/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
1810
1810
|
mindsdb/interfaces/skills/retrieval_tool.py,sha256=zuEEPky--GdKHa1rqd4VhM2wgtlGas5G72eFbooj-Hg,4480
|
|
1811
|
-
mindsdb/interfaces/skills/skill_tool.py,sha256=
|
|
1811
|
+
mindsdb/interfaces/skills/skill_tool.py,sha256=8YjAmQ8PM0dhO8pRfKuwO2Bf3bKizLARelztjbwNz4c,12925
|
|
1812
1812
|
mindsdb/interfaces/skills/skills_controller.py,sha256=CUY0B_9DBCUX7LzeODrdBs4WDNRivGPTPHYcGtH7b-M,6146
|
|
1813
|
-
mindsdb/interfaces/skills/sql_agent.py,sha256=
|
|
1813
|
+
mindsdb/interfaces/skills/sql_agent.py,sha256=bZBrv2Ya-eRXvEkd4-BCXvXIzKMpTrERUmqEDh2_b7Y,14286
|
|
1814
1814
|
mindsdb/interfaces/skills/custom/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1815
1815
|
mindsdb/interfaces/skills/custom/text2sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1816
1816
|
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_tool.py,sha256=CDi2v2Ym3u-0nr8jq7wyf8CymWRFy_wziCov4Y9b3Iw,1253
|
|
1817
|
-
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py,sha256=
|
|
1817
|
+
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py,sha256=xDMSe00nxdYe0m-rQM-awJnb6j5A2uR9Ve_Zx0HPMcc,7002
|
|
1818
1818
|
mindsdb/interfaces/storage/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
1819
1819
|
mindsdb/interfaces/storage/db.py,sha256=L-nXGVVkt4izM2VgORfCitLUg3xVup8nwLi7B9PyKCg,19351
|
|
1820
1820
|
mindsdb/interfaces/storage/fs.py,sha256=4Nyo-h23UtZc2nz_LWyVzboC_e1jlU58aph1_en8MdE,21155
|
|
@@ -1929,8 +1929,8 @@ mindsdb/utilities/profiler/__init__.py,sha256=d4VXl80uSm1IotR-WwbBInPmLmACiK0Azx
|
|
|
1929
1929
|
mindsdb/utilities/profiler/profiler.py,sha256=KCUtOupkbM_nCoof9MtiuhUzDGezx4a4NsBX6vGWbPA,3936
|
|
1930
1930
|
mindsdb/utilities/render/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1931
1931
|
mindsdb/utilities/render/sqlalchemy_render.py,sha256=XnG2IvB5tTF65EK-xV14HXrfGxyz2cQw7K6zEr9dclI,28287
|
|
1932
|
-
MindsDB-25.1.5.
|
|
1933
|
-
MindsDB-25.1.5.
|
|
1934
|
-
MindsDB-25.1.5.
|
|
1935
|
-
MindsDB-25.1.5.
|
|
1936
|
-
MindsDB-25.1.5.
|
|
1932
|
+
MindsDB-25.1.5.2.dist-info/LICENSE,sha256=ziqdjujs6WDn-9g3t0SISjHCBc2pLRht3gnRbQoXmIs,5804
|
|
1933
|
+
MindsDB-25.1.5.2.dist-info/METADATA,sha256=nY4sr8A-xoqcA4-VUSC8qeEjPeDFKsEh-YFBWjBEZAk,42706
|
|
1934
|
+
MindsDB-25.1.5.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1935
|
+
MindsDB-25.1.5.2.dist-info/top_level.txt,sha256=10wPR96JDf3hM8aMP7Fz0lDlmClEP480zgXISJKr5jE,8
|
|
1936
|
+
MindsDB-25.1.5.2.dist-info/RECORD,,
|
mindsdb/__about__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
__title__ = 'MindsDB'
|
|
2
2
|
__package_name__ = 'mindsdb'
|
|
3
|
-
__version__ = '25.1.5.
|
|
3
|
+
__version__ = '25.1.5.2'
|
|
4
4
|
__description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
|
|
5
5
|
__email__ = "jorge@mindsdb.com"
|
|
6
6
|
__author__ = 'MindsDB Inc'
|
|
@@ -105,7 +105,10 @@ class KnowledgeBasesResource(Resource):
|
|
|
105
105
|
f'Knowledge Base with name {kb_name} already exists'
|
|
106
106
|
)
|
|
107
107
|
|
|
108
|
-
embedding_model_identifier =
|
|
108
|
+
embedding_model_identifier = None
|
|
109
|
+
if knowledge_base.get('model'):
|
|
110
|
+
embedding_model_identifier = Identifier(parts=[knowledge_base['model']])
|
|
111
|
+
|
|
109
112
|
storage = knowledge_base.get('storage')
|
|
110
113
|
embedding_table_identifier = None
|
|
111
114
|
if storage is not None:
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
snowflake-connector-python==3.
|
|
1
|
+
snowflake-connector-python==3.13.1
|
|
2
2
|
snowflake-sqlalchemy==1.7.0
|
|
@@ -337,7 +337,7 @@ class VectorStoreHandler(BaseHandler):
|
|
|
337
337
|
# dispatch delete
|
|
338
338
|
return self.delete(table_name, conditions=conditions)
|
|
339
339
|
|
|
340
|
-
def
|
|
340
|
+
def dispatch_select(self, query: Select, conditions: List[FilterCondition] = None):
|
|
341
341
|
"""
|
|
342
342
|
Dispatch select query to the appropriate method.
|
|
343
343
|
"""
|
|
@@ -357,7 +357,8 @@ class VectorStoreHandler(BaseHandler):
|
|
|
357
357
|
|
|
358
358
|
# check if columns are allowed
|
|
359
359
|
where_statement = query.where
|
|
360
|
-
conditions
|
|
360
|
+
if conditions is None:
|
|
361
|
+
conditions = self._extract_conditions(where_statement)
|
|
361
362
|
|
|
362
363
|
# get offset and limit
|
|
363
364
|
offset = query.offset.value if query.offset is not None else None
|
|
@@ -382,7 +383,7 @@ class VectorStoreHandler(BaseHandler):
|
|
|
382
383
|
Insert: self._dispatch_insert,
|
|
383
384
|
Update: self._dispatch_update,
|
|
384
385
|
Delete: self._dispatch_delete,
|
|
385
|
-
Select: self.
|
|
386
|
+
Select: self.dispatch_select,
|
|
386
387
|
}
|
|
387
388
|
if type(query) in dispatch_router:
|
|
388
389
|
resp = dispatch_router[type(query)](query)
|
|
@@ -11,6 +11,17 @@ from mindsdb.interfaces.skills.sql_agent import SQLAgent
|
|
|
11
11
|
logger = log.getLogger(__name__)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
def extract_essential(input: str) -> str:
|
|
15
|
+
""" Sometimes LLM include to input unnecessary data. We can't control stochastic nature of LLM, so we need to
|
|
16
|
+
'clean' input somehow. LLM prompt contains instruction to enclose input between '$START$' and '$STOP$'.
|
|
17
|
+
"""
|
|
18
|
+
if '$START$' in input:
|
|
19
|
+
input = input.partition('$START$')[-1]
|
|
20
|
+
if '$STOP$' in input:
|
|
21
|
+
input = input.partition('$STOP$')[0]
|
|
22
|
+
return input.strip(' ')
|
|
23
|
+
|
|
24
|
+
|
|
14
25
|
class MindsDBSQL(SQLDatabase):
|
|
15
26
|
@staticmethod
|
|
16
27
|
def custom_init(
|
|
@@ -51,12 +62,9 @@ class MindsDBSQL(SQLDatabase):
|
|
|
51
62
|
|
|
52
63
|
def get_table_info_no_throw(self, table_names: Optional[List[str]] = None) -> str:
|
|
53
64
|
for i in range(len(table_names)):
|
|
54
|
-
|
|
55
|
-
table_names[i] = table_names[i].partition('$START$')[-1]
|
|
56
|
-
if '$END$' in table_names[i]:
|
|
57
|
-
table_names[i] = table_names[i].partition('$END$')[0]
|
|
58
|
-
table_names[i] = table_names[i].strip(' ')
|
|
65
|
+
table_names[i] = extract_essential(table_names[i])
|
|
59
66
|
return self._sql_agent.get_table_info_safe(table_names)
|
|
60
67
|
|
|
61
68
|
def run_no_throw(self, command: str, fetch: str = "all") -> str:
|
|
69
|
+
command = extract_essential(command)
|
|
62
70
|
return self._sql_agent.query_safe(command)
|
|
@@ -26,6 +26,9 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
|
|
|
26
26
|
)
|
|
27
27
|
from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
|
|
28
28
|
from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
|
|
29
|
+
from mindsdb.integrations.utilities.sql_utils import (
|
|
30
|
+
extract_comparison_conditions, filter_dataframe, FilterCondition, FilterOperator
|
|
31
|
+
)
|
|
29
32
|
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
|
|
30
33
|
from mindsdb.interfaces.agents.langchain_agent import create_chat_model, get_llm_provider
|
|
31
34
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
@@ -101,18 +104,30 @@ class KnowledgeBaseTable:
|
|
|
101
104
|
# Get response from vector db
|
|
102
105
|
db_handler = self.get_vector_db()
|
|
103
106
|
logger.debug(f"Using vector db handler: {type(db_handler)}")
|
|
104
|
-
resp = db_handler.query(query)
|
|
105
107
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
108
|
+
vector_filters, outer_filters = [], []
|
|
109
|
+
# update vector handlers, mark conditions as applied inside
|
|
110
|
+
for op, arg1, arg2 in extract_comparison_conditions(query.where):
|
|
111
|
+
condition = FilterCondition(arg1, FilterOperator(op.upper()), arg2)
|
|
112
|
+
if arg1 in (TableField.ID.value, TableField.CONTENT.value, TableField.EMBEDDINGS.value):
|
|
113
|
+
vector_filters.append(condition)
|
|
114
|
+
else:
|
|
115
|
+
outer_filters.append([op, arg1, arg2])
|
|
116
|
+
|
|
117
|
+
df = db_handler.dispatch_select(query, conditions=vector_filters)
|
|
118
|
+
|
|
119
|
+
if df is not None:
|
|
120
|
+
df = filter_dataframe(df, outer_filters)
|
|
121
|
+
|
|
122
|
+
logger.debug(f"Query returned {len(df)} rows")
|
|
123
|
+
logger.debug(f"Columns in response: {df.columns.tolist()}")
|
|
109
124
|
# Log a sample of IDs to help diagnose issues
|
|
110
|
-
if not
|
|
111
|
-
logger.debug(f"Sample of IDs in response: {
|
|
125
|
+
if not df.empty:
|
|
126
|
+
logger.debug(f"Sample of IDs in response: {df['id'].head().tolist()}")
|
|
112
127
|
else:
|
|
113
128
|
logger.warning("Query returned no data")
|
|
114
129
|
|
|
115
|
-
return
|
|
130
|
+
return df
|
|
116
131
|
|
|
117
132
|
def insert_files(self, file_names: List[str]):
|
|
118
133
|
"""Process and insert files"""
|
|
@@ -15,20 +15,25 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
15
15
|
list_sql_database_tool = ListSQLDatabaseTool(
|
|
16
16
|
name=f'sql_db_list_tables{prefix}',
|
|
17
17
|
db=self.db,
|
|
18
|
-
description=(
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
)
|
|
18
|
+
description=dedent("""\n
|
|
19
|
+
Input is an empty string, output is a comma-separated list of tables in the database. Each table name is escaped using backticks.
|
|
20
|
+
Each table name in the list may be in one of two formats: database_name.`table_name` or database_name.schema_name.`table_name`.
|
|
21
|
+
Table names in response to the user must be escaped using backticks.
|
|
22
|
+
""")
|
|
24
23
|
)
|
|
25
24
|
|
|
26
25
|
info_sql_database_tool_description = (
|
|
27
|
-
"Input: A comma-separated list of tables enclosed between the symbols $START$ and $
|
|
28
|
-
|
|
26
|
+
"Input: A comma-separated list of tables enclosed between the symbols $START$ and $STOP$. The tables names itself must be escaped using backticks.\n"
|
|
27
|
+
"Output: Schema and sample rows for those tables. \n"
|
|
29
28
|
"Use this tool to investigate table schemas for needed columns. "
|
|
30
|
-
"
|
|
31
|
-
"
|
|
29
|
+
f"Ensure tables exist by calling {list_sql_database_tool.name} first. "
|
|
30
|
+
# "The names of tables, schemas, and databases must be escaped using backticks. "
|
|
31
|
+
# "Always enclose the names of tables, schemas, and databases in backticks. "
|
|
32
|
+
"Get sample data with 'SELECT * FROM `database`.`table` LIMIT 3' before answering questions. \n"
|
|
33
|
+
"Example of correct Input:\n $START$ `database`.`table1`, `database`.`table2`, `database`.`table3` $STOP$\n"
|
|
34
|
+
" $START$ `table1` `table2` `table3` $STOP$\n"
|
|
35
|
+
"Example of wrong Input:\n $START$ `database.table1`, `database.table2`, `database.table3` $STOP$\n"
|
|
36
|
+
" $START$ table1 table2 table3 $STOP$\n"
|
|
32
37
|
)
|
|
33
38
|
info_sql_database_tool = InfoSQLDatabaseTool(
|
|
34
39
|
name=f'sql_db_schema{prefix}',
|
|
@@ -36,7 +41,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
36
41
|
)
|
|
37
42
|
|
|
38
43
|
query_sql_database_tool_description = dedent(f"""\
|
|
39
|
-
Input: A detailed SQL query.
|
|
44
|
+
Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
|
|
40
45
|
Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
|
|
41
46
|
This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
|
|
42
47
|
Follow these instructions with utmost precision:
|
|
@@ -64,6 +69,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
64
69
|
SELECT NOW() - INTERVAL 1 YEAR;
|
|
65
70
|
6. Query Best Practices:
|
|
66
71
|
- Always send only one query at a time.
|
|
72
|
+
- Always enclose the names of tables, schemas, and databases in backticks.
|
|
67
73
|
- The input SQL query must end with a semicolon.
|
|
68
74
|
- Query only necessary columns, not all.
|
|
69
75
|
- Use only existing column names from correct tables.
|
|
@@ -126,6 +126,10 @@ class SkillToolController:
|
|
|
126
126
|
|
|
127
127
|
command_executor = self.get_command_executor()
|
|
128
128
|
|
|
129
|
+
def escape_table_name(name: str) -> str:
|
|
130
|
+
name = name.strip(' `')
|
|
131
|
+
return f'`{name}`'
|
|
132
|
+
|
|
129
133
|
tables_list = []
|
|
130
134
|
for skill in skills:
|
|
131
135
|
database = skill.params['database']
|
|
@@ -137,19 +141,22 @@ class SkillToolController:
|
|
|
137
141
|
else:
|
|
138
142
|
response = handler.get_tables()
|
|
139
143
|
# no restrictions
|
|
144
|
+
columns = [c.lower() for c in response.data_frame.columns]
|
|
145
|
+
name_idx = columns.index('table_name') if 'table_name' in columns else 0
|
|
146
|
+
|
|
140
147
|
if 'table_schema' in response.data_frame.columns:
|
|
141
148
|
for _, row in response.data_frame.iterrows():
|
|
142
|
-
tables_list.append(f"{database}.{row['table_schema']}.{row[
|
|
149
|
+
tables_list.append(f"{database}.{row['table_schema']}.{escape_table_name(row[name_idx])}")
|
|
143
150
|
else:
|
|
144
|
-
for
|
|
145
|
-
tables_list.append(f"{database}.{
|
|
151
|
+
for table_name in response.data_frame.iloc[:, name_idx]:
|
|
152
|
+
tables_list.append(f"{database}.{escape_table_name(table_name)}")
|
|
146
153
|
continue
|
|
147
154
|
for schema_name, tables in restriction_on_tables.items():
|
|
148
155
|
for table in tables:
|
|
149
156
|
if schema_name is None:
|
|
150
|
-
tables_list.append(f'{database}.{table}')
|
|
157
|
+
tables_list.append(f'{database}.{escape_table_name(table)}')
|
|
151
158
|
else:
|
|
152
|
-
tables_list.append(f'{database}.{schema_name}.{table}')
|
|
159
|
+
tables_list.append(f'{database}.{schema_name}.{escape_table_name(table)}')
|
|
153
160
|
|
|
154
161
|
sql_agent = SQLAgent(
|
|
155
162
|
command_executor=command_executor,
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
|
|
2
2
|
import re
|
|
3
|
+
import csv
|
|
3
4
|
import inspect
|
|
4
|
-
from
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from typing import Iterable, List, Optional, Any
|
|
5
7
|
|
|
6
8
|
import pandas as pd
|
|
7
9
|
from mindsdb_sql_parser import parse_sql
|
|
@@ -14,6 +16,22 @@ from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
|
14
16
|
logger = log.getLogger(__name__)
|
|
15
17
|
|
|
16
18
|
|
|
19
|
+
def list_to_csv_str(array: List[List[Any]]) -> str:
|
|
20
|
+
"""Convert a 2D array into a CSV string.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
array (List[List[Any]]): A 2D array/list of values to convert to CSV format
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
str: The array formatted as a CSV string using Excel dialect
|
|
27
|
+
"""
|
|
28
|
+
output = StringIO()
|
|
29
|
+
writer = csv.writer(output, dialect='excel')
|
|
30
|
+
str_array = [[str(item) for item in row] for row in array]
|
|
31
|
+
writer.writerows(str_array)
|
|
32
|
+
return output.getvalue()
|
|
33
|
+
|
|
34
|
+
|
|
17
35
|
def split_table_name(table_name: str) -> List[str]:
|
|
18
36
|
"""Split table name from llm to parst
|
|
19
37
|
|
|
@@ -24,40 +42,36 @@ def split_table_name(table_name: str) -> List[str]:
|
|
|
24
42
|
List[str]: parts of table identifier like ['database', 'schema', 'table']
|
|
25
43
|
|
|
26
44
|
Example:
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
45
|
+
'input': '`aaa`.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
|
|
46
|
+
'input': '`aaa`.`bbb`.`ccc`', 'output': ['aaa', 'bbb', 'ccc']
|
|
47
|
+
'input': 'aaa.bbb', 'output': ['aaa', 'bbb']
|
|
48
|
+
'input': '`aaa.bbb`', 'output': ['aaa.bbb']
|
|
49
|
+
'input': '`aaa.bbb.ccc`', 'output': ['aaa.bbb.ccc']
|
|
50
|
+
'input': 'aaa.`bbb`', 'output': ['aaa', 'bbb']
|
|
51
|
+
'input': 'aaa.bbb.ccc', 'output': ['aaa', 'bbb', 'ccc']
|
|
52
|
+
'input': 'aaa.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
|
|
53
|
+
'input': '`aaa`.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
|
|
36
54
|
"""
|
|
37
|
-
table_name = table_name.strip(' "\'\n\r')
|
|
38
|
-
while table_name.startswith('`') and table_name.endswith('`'):
|
|
39
|
-
table_name = table_name[1:-1]
|
|
40
|
-
table_name = table_name.strip(' "\'\n\r')
|
|
41
|
-
|
|
42
55
|
result = []
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
56
|
+
current = ''
|
|
57
|
+
in_backticks = False
|
|
58
|
+
|
|
59
|
+
i = 0
|
|
60
|
+
while i < len(table_name):
|
|
61
|
+
if table_name[i] == '`':
|
|
62
|
+
in_backticks = not in_backticks
|
|
63
|
+
elif table_name[i] == '.' and not in_backticks:
|
|
64
|
+
if current:
|
|
65
|
+
result.append(current.strip('`'))
|
|
66
|
+
current = ''
|
|
54
67
|
else:
|
|
55
|
-
|
|
68
|
+
current += table_name[i]
|
|
69
|
+
i += 1
|
|
56
70
|
|
|
57
|
-
if
|
|
58
|
-
result.append(''
|
|
71
|
+
if current:
|
|
72
|
+
result.append(current.strip('`'))
|
|
59
73
|
|
|
60
|
-
return
|
|
74
|
+
return result
|
|
61
75
|
|
|
62
76
|
|
|
63
77
|
class SQLAgent:
|
|
@@ -208,12 +222,15 @@ class SQLAgent:
|
|
|
208
222
|
|
|
209
223
|
# Some LLMs (e.g. gpt-4o) may include backticks or quotes when invoking tools.
|
|
210
224
|
table_parts = split_table_name(table_name)
|
|
225
|
+
if len(table_parts) == 1:
|
|
226
|
+
# most likely LLM enclosed all table name in backticks `database.table`
|
|
227
|
+
table_parts = split_table_name(table_name)
|
|
211
228
|
|
|
212
229
|
# resolved table
|
|
213
230
|
table_identifier = tables_idx.get(tuple(table_parts))
|
|
214
231
|
|
|
215
232
|
if table_identifier is None:
|
|
216
|
-
raise ValueError(f"Table {table} not found in database")
|
|
233
|
+
raise ValueError(f"Table {table} not found in the database")
|
|
217
234
|
tables.append(table_identifier)
|
|
218
235
|
|
|
219
236
|
return tables
|
|
@@ -262,8 +279,7 @@ class SQLAgent:
|
|
|
262
279
|
dtypes.append(column.get('type', ''))
|
|
263
280
|
|
|
264
281
|
info = f'Table named `{table_str}`:\n'
|
|
265
|
-
info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str}:\n"
|
|
266
|
-
info += "\t".join([field for field in fields])
|
|
282
|
+
info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str} in CSV format (dialect is 'excel'):\n"
|
|
267
283
|
info += self._get_sample_rows(table_str, fields) + "\n"
|
|
268
284
|
info += '\nColumn data types: ' + ",\t".join(
|
|
269
285
|
[f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n' # noqa
|
|
@@ -274,9 +290,14 @@ class SQLAgent:
|
|
|
274
290
|
try:
|
|
275
291
|
ret = self._call_engine(command)
|
|
276
292
|
sample_rows = ret.data.to_lists()
|
|
293
|
+
|
|
294
|
+
def truncate_value(val):
|
|
295
|
+
str_val = str(val)
|
|
296
|
+
return str_val if len(str_val) < 100 else (str_val[:100] + '...')
|
|
297
|
+
|
|
277
298
|
sample_rows = list(
|
|
278
|
-
map(lambda
|
|
279
|
-
sample_rows_str = "\n" +
|
|
299
|
+
map(lambda row: [truncate_value(value) for value in row], sample_rows))
|
|
300
|
+
sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
|
|
280
301
|
except Exception as e:
|
|
281
302
|
logger.warning(e)
|
|
282
303
|
sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
|
|
@@ -294,9 +315,6 @@ class SQLAgent:
|
|
|
294
315
|
If the statement returns no rows, an empty string is returned.
|
|
295
316
|
"""
|
|
296
317
|
|
|
297
|
-
def _tidy(result: List) -> str:
|
|
298
|
-
return '\n'.join(['\t'.join([str(value) for value in row]) for row in result])
|
|
299
|
-
|
|
300
318
|
def _repr_result(ret):
|
|
301
319
|
limit_rows = 30
|
|
302
320
|
|
|
@@ -312,16 +330,16 @@ class SQLAgent:
|
|
|
312
330
|
res += f'First {limit_rows} rows:\n'
|
|
313
331
|
|
|
314
332
|
else:
|
|
315
|
-
res += '
|
|
316
|
-
|
|
317
|
-
res += _tidy(data[:limit_rows])
|
|
333
|
+
res += "Result in CSV format (dialect is 'excel'):\n"
|
|
334
|
+
res += list_to_csv_str([[col.name for col in ret.columns]] + data[:limit_rows])
|
|
318
335
|
return res
|
|
319
336
|
|
|
320
337
|
ret = self._call_engine(self._clean_query(command))
|
|
321
338
|
if fetch == "all":
|
|
322
339
|
result = _repr_result(ret.data)
|
|
323
340
|
elif fetch == "one":
|
|
324
|
-
result =
|
|
341
|
+
result = "Result in CSV format (dialect is 'excel'):\n"
|
|
342
|
+
result += list_to_csv_str([[col.name for col in ret.data.columns]] + [ret.data.to_lists()[0]])
|
|
325
343
|
else:
|
|
326
344
|
raise ValueError("Fetch parameter must be either 'one' or 'all'")
|
|
327
345
|
return str(result)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|