MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (163) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +40 -29
  3. mindsdb/api/a2a/__init__.py +1 -1
  4. mindsdb/api/a2a/agent.py +16 -10
  5. mindsdb/api/a2a/common/server/server.py +7 -3
  6. mindsdb/api/a2a/common/server/task_manager.py +12 -5
  7. mindsdb/api/a2a/common/types.py +66 -0
  8. mindsdb/api/a2a/task_manager.py +65 -17
  9. mindsdb/api/common/middleware.py +10 -12
  10. mindsdb/api/executor/command_executor.py +51 -40
  11. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  12. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
  13. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
  14. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  15. mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
  16. mindsdb/api/executor/exceptions.py +29 -10
  17. mindsdb/api/executor/planner/plan_join.py +17 -3
  18. mindsdb/api/executor/planner/query_prepare.py +2 -20
  19. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  20. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  21. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  22. mindsdb/api/executor/utilities/functions.py +6 -6
  23. mindsdb/api/executor/utilities/sql.py +37 -20
  24. mindsdb/api/http/gui.py +5 -11
  25. mindsdb/api/http/initialize.py +75 -61
  26. mindsdb/api/http/namespaces/agents.py +10 -15
  27. mindsdb/api/http/namespaces/analysis.py +13 -20
  28. mindsdb/api/http/namespaces/auth.py +1 -1
  29. mindsdb/api/http/namespaces/chatbots.py +0 -5
  30. mindsdb/api/http/namespaces/config.py +15 -11
  31. mindsdb/api/http/namespaces/databases.py +140 -201
  32. mindsdb/api/http/namespaces/file.py +17 -4
  33. mindsdb/api/http/namespaces/handlers.py +17 -7
  34. mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
  35. mindsdb/api/http/namespaces/models.py +94 -126
  36. mindsdb/api/http/namespaces/projects.py +13 -22
  37. mindsdb/api/http/namespaces/sql.py +33 -25
  38. mindsdb/api/http/namespaces/tab.py +27 -37
  39. mindsdb/api/http/namespaces/views.py +1 -1
  40. mindsdb/api/http/start.py +16 -10
  41. mindsdb/api/mcp/__init__.py +2 -1
  42. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  43. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  44. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  45. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
  46. mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
  47. mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
  48. mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
  49. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  50. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
  51. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  52. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
  53. mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
  54. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
  55. mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
  56. mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
  57. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
  58. mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
  59. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
  60. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
  61. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  62. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  63. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  64. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  65. mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
  66. mindsdb/integrations/libs/api_handler.py +10 -10
  67. mindsdb/integrations/libs/base.py +4 -4
  68. mindsdb/integrations/libs/llm/utils.py +2 -2
  69. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  70. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  71. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  72. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  73. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  74. mindsdb/integrations/libs/process_cache.py +132 -140
  75. mindsdb/integrations/libs/response.py +18 -12
  76. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  77. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  78. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
  79. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
  80. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  81. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
  82. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  83. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  84. mindsdb/integrations/utilities/rag/settings.py +58 -133
  85. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  86. mindsdb/interfaces/agents/agents_controller.py +2 -3
  87. mindsdb/interfaces/agents/constants.py +0 -2
  88. mindsdb/interfaces/agents/litellm_server.py +34 -58
  89. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  90. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  91. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  92. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  93. mindsdb/interfaces/chatbot/polling.py +30 -18
  94. mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
  95. mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
  96. mindsdb/interfaces/database/data_handlers_cache.py +190 -0
  97. mindsdb/interfaces/database/database.py +3 -3
  98. mindsdb/interfaces/database/integrations.py +7 -110
  99. mindsdb/interfaces/database/projects.py +2 -6
  100. mindsdb/interfaces/database/views.py +1 -4
  101. mindsdb/interfaces/file/file_controller.py +6 -6
  102. mindsdb/interfaces/functions/controller.py +1 -1
  103. mindsdb/interfaces/functions/to_markdown.py +2 -2
  104. mindsdb/interfaces/jobs/jobs_controller.py +5 -9
  105. mindsdb/interfaces/jobs/scheduler.py +3 -9
  106. mindsdb/interfaces/knowledge_base/controller.py +244 -128
  107. mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
  108. mindsdb/interfaces/knowledge_base/executor.py +11 -0
  109. mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
  110. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  111. mindsdb/interfaces/model/model_controller.py +172 -168
  112. mindsdb/interfaces/query_context/context_controller.py +14 -2
  113. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
  114. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  115. mindsdb/interfaces/skills/skill_tool.py +2 -2
  116. mindsdb/interfaces/skills/skills_controller.py +1 -4
  117. mindsdb/interfaces/skills/sql_agent.py +25 -19
  118. mindsdb/interfaces/storage/db.py +16 -6
  119. mindsdb/interfaces/storage/fs.py +114 -169
  120. mindsdb/interfaces/storage/json.py +19 -18
  121. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  122. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  123. mindsdb/interfaces/tasks/task_thread.py +7 -9
  124. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  125. mindsdb/interfaces/triggers/triggers_controller.py +47 -52
  126. mindsdb/migrations/migrate.py +16 -16
  127. mindsdb/utilities/api_status.py +58 -0
  128. mindsdb/utilities/config.py +68 -2
  129. mindsdb/utilities/exception.py +40 -1
  130. mindsdb/utilities/fs.py +0 -1
  131. mindsdb/utilities/hooks/profiling.py +17 -14
  132. mindsdb/utilities/json_encoder.py +24 -10
  133. mindsdb/utilities/langfuse.py +40 -45
  134. mindsdb/utilities/log.py +272 -0
  135. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  136. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  137. mindsdb/utilities/render/sqlalchemy_render.py +22 -20
  138. mindsdb/utilities/starters.py +0 -10
  139. mindsdb/utilities/utils.py +2 -2
  140. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/METADATA +293 -276
  141. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/RECORD +144 -158
  142. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  143. mindsdb/api/postgres/__init__.py +0 -0
  144. mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
  145. mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
  146. mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
  147. mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
  148. mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
  149. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
  150. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
  151. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
  152. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
  153. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
  154. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
  155. mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
  156. mindsdb/api/postgres/start.py +0 -11
  157. mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
  158. mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
  159. mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
  160. mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
  161. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/WHEEL +0 -0
  162. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  163. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/top_level.txt +0 -0
@@ -55,7 +55,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
55
55
  f"""\
56
56
  Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
57
57
  Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
58
- This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
58
+ This system is a highly intelligent and reliable SQL skill designed to work with databases.
59
59
  Follow these instructions with utmost precision:
60
60
  1. Final Response Format:
61
61
  - Assume the frontend fully supports Markdown unless the user specifies otherwise.
@@ -73,7 +73,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
73
73
  - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
74
74
  5. Date Handling:
75
75
  - **System current date and time: {current_date_time} (UTC or local timezone based on server settings).**
76
- - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
76
+ - **Always** use `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
77
77
  - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
78
78
  - Do not compare date values without casting columns to date.
79
79
  - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
@@ -95,6 +95,8 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
95
95
  8. Identity and Purpose:
96
96
  - When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
97
97
  - When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
98
+ 9. Important: you can use only mysql quoting rules to compose queries: backticks (`) for identifiers, and single quotes (') for constants
99
+
98
100
  Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
99
101
  """
100
102
  )
@@ -110,7 +112,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
110
112
  "If the query is correct, it will be parsed and returned. "
111
113
  f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. "
112
114
  )
113
- mindsdb_sql_parser_tool = MindsDBSQLParserTool(
115
+ mindsdb_sql_parser_tool = MindsDBSQLParserTool( # noqa: F841
114
116
  name=f"mindsdb_sql_parser_tool{prefix}", description=mindsdb_sql_parser_tool_description
115
117
  )
116
118
 
@@ -118,7 +120,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
118
120
  query_sql_database_tool,
119
121
  info_sql_database_tool,
120
122
  list_sql_database_tool,
121
- mindsdb_sql_parser_tool,
123
+ # mindsdb_sql_parser_tool,
122
124
  ]
123
125
  if not self.include_knowledge_base_tools:
124
126
  return sql_tools
@@ -175,29 +177,23 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
175
177
 
176
178
  Query Types and Examples:
177
179
  1. Basic semantic search:
178
- kb_query_tool("SELECT * FROM kb_name WHERE content = 'your search query';")
180
+ kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'your search query';")
179
181
 
180
182
  2. Metadata filtering:
181
183
  kb_query_tool("SELECT * FROM kb_name WHERE metadata_field = 'value';")
182
184
 
183
185
  3. Combined search:
184
- kb_query_tool("SELECT * FROM kb_name WHERE content = 'query' AND metadata_field = 'value';")
186
+ kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND metadata_field = 'value';")
185
187
 
186
188
  4. Setting relevance threshold:
187
- kb_query_tool("SELECT * FROM kb_name WHERE content = 'query' AND relevance_threshold = 0.7;")
189
+ kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND relevance_threshold = 0.7;")
188
190
 
189
191
  5. Limiting results:
190
- kb_query_tool("SELECT * FROM kb_name WHERE content = 'query' LIMIT 5;")
192
+ kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' LIMIT 5;")
191
193
 
192
194
  6. Getting sample data:
193
195
  kb_query_tool("SELECT * FROM kb_name LIMIT 3;")
194
196
 
195
- 7. Don't use LIKE operator on content filter ie semantic search:
196
- SELECT * FROM `test_kb` WHERE content LIKE '%population of New York%' $STOP$
197
-
198
- Like is not supported, use the following instead:
199
- SELECT * FROM `test_kb` WHERE content = 'population of New York'
200
-
201
197
  Result Format:
202
198
  - Results include: id, chunk_id, chunk_content, metadata, distance, and relevance columns
203
199
  - The metadata column contains a JSON object with all metadata fields
@@ -1,32 +1,32 @@
1
- import traceback
1
+ from langchain_core.documents import Document
2
+ from langchain_core.tools import Tool
2
3
 
3
4
  from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
4
5
  from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
5
6
  from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
6
7
  from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
7
-
8
+ from mindsdb.integrations.libs.response import RESPONSE_TYPE
9
+ from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
10
+ construct_model_from_args,
11
+ )
8
12
  from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
9
13
  from mindsdb.interfaces.skills.skill_tool import skill_tool
10
14
  from mindsdb.interfaces.storage import db
11
15
  from mindsdb.interfaces.storage.db import KnowledgeBase
12
16
  from mindsdb.utilities import log
13
- from langchain_core.documents import Document
14
- from langchain_core.tools import Tool
15
- from mindsdb.integrations.libs.response import RESPONSE_TYPE
16
- from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
17
17
 
18
18
  logger = log.getLogger(__name__)
19
19
 
20
20
 
21
21
  def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipelineModel:
22
- tools_config = tool['config']
22
+ tools_config = tool["config"]
23
23
  tools_config.update(pred_args)
24
24
 
25
25
  kb_params = {}
26
26
  embeddings_model = None
27
27
 
28
- if 'source' in tool:
29
- kb_name = tool['source']
28
+ if "source" in tool:
29
+ kb_name = tool["source"]
30
30
  executor = skill_tool.get_command_executor()
31
31
  kb = _get_knowledge_base(kb_name, skill.project_id, executor)
32
32
 
@@ -34,30 +34,26 @@ def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipeli
34
34
  raise ValueError(f"Knowledge base not found: {kb_name}")
35
35
 
36
36
  kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
37
- vector_store_config = {
38
- 'kb_table': kb_table
39
- }
40
- is_sparse = tools_config.pop('is_sparse', None)
41
- vector_size = tools_config.pop('vector_size', None)
37
+ vector_store_config = {"kb_table": kb_table}
38
+ is_sparse = tools_config.pop("is_sparse", None)
39
+ vector_size = tools_config.pop("vector_size", None)
42
40
  if is_sparse is not None:
43
- vector_store_config['is_sparse'] = is_sparse
41
+ vector_store_config["is_sparse"] = is_sparse
44
42
  if vector_size is not None:
45
- vector_store_config['vector_size'] = vector_size
46
- kb_params = {
47
- 'vector_store_config': vector_store_config
48
- }
43
+ vector_store_config["vector_size"] = vector_size
44
+ kb_params = {"vector_store_config": vector_store_config}
49
45
 
50
46
  # Get embedding model from knowledge base table
51
47
  if kb_table._kb.embedding_model:
52
48
  # Extract embedding model args from knowledge base table
53
- embedding_args = kb_table._kb.embedding_model.learn_args.get('using', {})
49
+ embedding_args = kb_table._kb.embedding_model.learn_args.get("using", {})
54
50
  # Construct the embedding model directly
55
51
  embeddings_model = construct_model_from_args(embedding_args)
56
52
  logger.debug(f"Using knowledge base embedding model with args: {embedding_args}")
57
53
  else:
58
54
  embeddings_model = DEFAULT_EMBEDDINGS_MODEL_CLASS()
59
55
  logger.debug("Using default embedding model as knowledge base has no embedding model")
60
- elif 'embedding_model' not in tools_config:
56
+ elif "embedding_model" not in tools_config:
61
57
  embeddings_model = DEFAULT_EMBEDDINGS_MODEL_CLASS()
62
58
  logger.debug("Using default embedding model as no knowledge base provided")
63
59
 
@@ -75,29 +71,28 @@ def _build_rag_pipeline_tool(tool: dict, pred_args: dict, skill: db.Skills):
75
71
  try:
76
72
  result = rag_pipeline(query)
77
73
  logger.debug(f"RAG pipeline result: {result}")
78
- return result['answer']
74
+ return result["answer"]
79
75
  except Exception as e:
80
- logger.error(f"Error in RAG pipeline: {str(e)}")
81
- logger.error(traceback.format_exc())
76
+ logger.exception("Error in RAG pipeline:")
82
77
  return f"Error in retrieval: {str(e)}"
83
78
 
84
79
  # Create RAG tool
85
- tools_config = tool['config']
80
+ tools_config = tool["config"]
86
81
  tools_config.update(pred_args)
87
82
  return Tool(
88
83
  func=rag_wrapper,
89
- name=tool['name'],
90
- description=tool['description'],
91
- response_format='content',
84
+ name=tool["name"],
85
+ description=tool["description"],
86
+ response_format="content",
92
87
  # Return directly by default since we already use an LLM against retrieved context to generate a response.
93
- return_direct=tools_config.get('return_direct', True)
88
+ return_direct=tools_config.get("return_direct", True),
94
89
  )
95
90
 
96
91
 
97
92
  def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
98
- if 'source' not in tool:
93
+ if "source" not in tool:
99
94
  raise ValueError("Knowledge base for tool not found")
100
- kb_name = tool['source']
95
+ kb_name = tool["source"]
101
96
  executor = skill_tool.get_command_executor()
102
97
  kb = _get_knowledge_base(kb_name, skill.project_id, executor)
103
98
  if not kb:
@@ -110,16 +105,16 @@ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
110
105
 
111
106
  def _get_document_by_name(name: str):
112
107
  if metadata_config.name_column_index is not None:
113
- tsquery_str = ' & '.join(name.split(' '))
108
+ tsquery_str = " & ".join(name.split(" "))
114
109
  documents_response = vector_db_handler.native_query(
115
- f'SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery(\'{tsquery_str}\') LIMIT 1;'
110
+ f"SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery('{tsquery_str}') LIMIT 1;"
116
111
  )
117
112
  else:
118
113
  documents_response = vector_db_handler.native_query(
119
- f'SELECT * FROM {metadata_config.table} WHERE "{metadata_config.name_column}" ILIKE \'%{name}%\' LIMIT 1;'
114
+ f"SELECT * FROM {metadata_config.table} WHERE \"{metadata_config.name_column}\" ILIKE '%{name}%' LIMIT 1;"
120
115
  )
121
116
  if documents_response.resp_type == RESPONSE_TYPE.ERROR:
122
- raise RuntimeError(f'There was an error looking up documents: {documents_response.error_message}')
117
+ raise RuntimeError(f"There was an error looking up documents: {documents_response.error_message}")
123
118
  if documents_response.data_frame.empty:
124
119
  return None
125
120
  document_row = documents_response.data_frame.head(1)
@@ -127,38 +122,36 @@ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
127
122
  id_filter_condition = FilterCondition(
128
123
  f"{metadata_config.embeddings_metadata_column}->>'{metadata_config.doc_id_key}'",
129
124
  FilterOperator.EQUAL,
130
- str(document_row.get(metadata_config.id_column).item())
125
+ str(document_row.get(metadata_config.id_column).item()),
131
126
  )
132
127
  document_chunks_df = vector_db_handler.select(
133
- metadata_config.embeddings_table,
134
- conditions=[id_filter_condition]
128
+ metadata_config.embeddings_table, conditions=[id_filter_condition]
135
129
  )
136
130
  if document_chunks_df.empty:
137
131
  return None
138
- sort_col = 'chunk_id' if 'chunk_id' in document_chunks_df.columns else 'id'
132
+ sort_col = "chunk_id" if "chunk_id" in document_chunks_df.columns else "id"
139
133
  document_chunks_df.sort_values(by=sort_col)
140
- content = ''
134
+ content = ""
141
135
  for _, chunk in document_chunks_df.iterrows():
142
136
  if len(content) > metadata_config.max_document_context:
143
137
  break
144
- content += chunk.get(metadata_config.content_column, '')
138
+ content += chunk.get(metadata_config.content_column, "")
145
139
 
146
- return Document(
147
- page_content=content,
148
- metadata=document_row.to_dict(orient='records')[0]
149
- )
140
+ return Document(page_content=content, metadata=document_row.to_dict(orient="records")[0])
150
141
 
151
142
  def _lookup_document_by_name(name: str):
152
143
  found_document = _get_document_by_name(name)
153
144
  if found_document is None:
154
- return f'I could not find any document with name {name}. Please make sure the document name matches exactly.'
145
+ return (
146
+ f"I could not find any document with name {name}. Please make sure the document name matches exactly."
147
+ )
155
148
  return f"I found document {found_document.metadata.get(metadata_config.id_column)} with name {found_document.metadata.get(metadata_config.name_column)}. Here is the full document to use as context:\n\n{found_document.page_content}"
156
149
 
157
150
  return Tool(
158
151
  func=_lookup_document_by_name,
159
- name=tool.get('name', '') + '_name_lookup',
160
- description='You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.',
161
- return_direct=False
152
+ name=tool.get("name", "") + "_name_lookup",
153
+ description="You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.",
154
+ return_direct=False,
162
155
  )
163
156
 
164
157
 
@@ -181,7 +174,7 @@ def build_retrieval_tools(tool: dict, pred_args: dict, skill: db.Skills):
181
174
  try:
182
175
  rag_config = _load_rag_config(tool, pred_args, skill)
183
176
  except Exception as e:
184
- logger.error(f"Error building RAG pipeline: {str(e)}")
177
+ logger.exception("Error building RAG pipeline:")
185
178
  raise ValueError(f"Failed to build RAG pipeline: {str(e)}")
186
179
  tools = [_build_rag_pipeline_tool(tool, pred_args, skill)]
187
180
  if rag_config.metadata_config is None:
@@ -274,8 +274,8 @@ class SkillToolController:
274
274
  else:
275
275
  for table_name in response.data_frame.iloc[:, name_idx]:
276
276
  tables_list.append(f"{database}.{escape_table_name(table_name)}")
277
- except Exception as e:
278
- logger.warning(f"Could not get tables from database {database}: {str(e)}")
277
+ except Exception:
278
+ logger.warning(f"Could not get tables from database {database}:", exc_info=True)
279
279
  continue
280
280
 
281
281
  # Handle table restrictions
@@ -100,10 +100,7 @@ class SkillsController:
100
100
  project_name = default_project
101
101
  project = self.project_controller.get(name=project_name)
102
102
 
103
- if not name.islower():
104
- raise ValueError(f"The name must be in lower case: {name}")
105
-
106
- skill = self.get_skill(name, project_name)
103
+ skill = self.get_skill(name, project_name, strict_case=True)
107
104
 
108
105
  if skill is not None:
109
106
  raise ValueError(f"Skill with name already exists: {name}")
@@ -1,7 +1,6 @@
1
1
  import re
2
2
  import csv
3
3
  import inspect
4
- import traceback
5
4
  from io import StringIO
6
5
  from typing import Iterable, List, Optional, Any, Tuple
7
6
  from collections import defaultdict
@@ -254,8 +253,16 @@ class SQLAgent:
254
253
  self.check_table_permission(node)
255
254
  except ValueError as origin_exc:
256
255
  # was it badly quoted by llm?
257
- if len(node.parts) == 1 and node.is_quoted[0] and "." in node.parts[0]:
258
- node2 = Identifier(node.parts[0])
256
+ #
257
+ if "." in node.parts[0]:
258
+ # extract quoted parts (with dots) to sub-parts
259
+ parts = []
260
+ for i, item in enumerate(node.parts):
261
+ if node.is_quoted[i] and "." in item:
262
+ parts.extend(Identifier(item).parts)
263
+ else:
264
+ parts.append(item)
265
+ node2 = Identifier(parts=parts)
259
266
  try:
260
267
  _check_f(node2, is_table=True)
261
268
  return node2
@@ -382,9 +389,9 @@ class SQLAgent:
382
389
  # self._cache.set(cache_key, set(kb_names))
383
390
 
384
391
  return kb_names
385
- except Exception as e:
392
+ except Exception:
386
393
  # If there's an error, log it and return an empty list
387
- logger.error(f"Error in get_usable_knowledge_base_names: {str(e)}")
394
+ logger.exception("Error in get_usable_knowledge_base_names")
388
395
  return []
389
396
 
390
397
  def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifier]) -> List[Identifier]:
@@ -483,9 +490,9 @@ class SQLAgent:
483
490
  # remove backticks
484
491
  name = name.replace("`", "")
485
492
 
486
- split = name.split(".")
487
- if len(split) > 1:
488
- all_tables.append(Identifier(parts=[split[0], split[-1]]))
493
+ parts = name.split(".")
494
+ if len(parts) > 1:
495
+ all_tables.append(Identifier(parts=parts))
489
496
  else:
490
497
  all_tables.append(Identifier(name))
491
498
 
@@ -526,8 +533,8 @@ class SQLAgent:
526
533
 
527
534
  sample_rows = list(map(lambda row: [truncate_value(value) for value in row], sample_rows))
528
535
  sample_rows_str = "\n" + f"{kb_name}:" + list_to_csv_str(sample_rows)
529
- except Exception as e:
530
- logger.info(f"_get_sample_rows error: {e}")
536
+ except Exception:
537
+ logger.info("_get_sample_rows error:", exc_info=True)
531
538
  sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
532
539
 
533
540
  return sample_rows_str
@@ -560,7 +567,7 @@ class SQLAgent:
560
567
  )
561
568
  ]
562
569
  except Exception as e:
563
- logger.error(f"Failed processing column info for {table_str}: {e}", exc_info=True)
570
+ logger.exception(f"Failed processing column info for {table_str}:")
564
571
  raise ValueError(f"Failed to process column info for {table_str}") from e
565
572
 
566
573
  if not fields:
@@ -569,8 +576,8 @@ class SQLAgent:
569
576
 
570
577
  try:
571
578
  sample_rows_info = self._get_sample_rows(table_str, fields)
572
- except Exception as e:
573
- logger.warning(f"Could not get sample rows for {table_str}: {e}")
579
+ except Exception:
580
+ logger.warning(f"Could not get sample rows for {table_str}:", exc_info=True)
574
581
  sample_rows_info = "\n\t [error] Couldn't retrieve sample rows!"
575
582
 
576
583
  info = f"Table named `{table_str}`:\n"
@@ -585,7 +592,7 @@ class SQLAgent:
585
592
 
586
593
  def _get_sample_rows(self, table: str, fields: List[str]) -> str:
587
594
  logger.info(f"_get_sample_rows: table={table} fields={fields}")
588
- command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
595
+ command = f"select * from {table} limit {self._sample_rows_in_table_info};"
589
596
  try:
590
597
  ret = self._call_engine(command)
591
598
  sample_rows = ret.data.to_lists()
@@ -596,8 +603,8 @@ class SQLAgent:
596
603
 
597
604
  sample_rows = list(map(lambda row: [truncate_value(value) for value in row], sample_rows))
598
605
  sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
599
- except Exception as e:
600
- logger.info(f"_get_sample_rows error: {e}")
606
+ except Exception:
607
+ logger.info("_get_sample_rows error:", exc_info=True)
601
608
  sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
602
609
 
603
610
  return sample_rows_str
@@ -647,7 +654,7 @@ class SQLAgent:
647
654
  logger.info(f"get_table_info_safe: {table_names}")
648
655
  return self.get_table_info(table_names)
649
656
  except Exception as e:
650
- logger.info(f"get_table_info_safe error: {e}")
657
+ logger.info("get_table_info_safe error:", exc_info=True)
651
658
  return f"Error: {e}"
652
659
 
653
660
  def query_safe(self, command: str, fetch: str = "all") -> str:
@@ -655,8 +662,7 @@ class SQLAgent:
655
662
  logger.info(f"query_safe (fetch={fetch}): {command}")
656
663
  return self.query(command, fetch)
657
664
  except Exception as e:
658
- logger.error(f"Error in query_safe: {str(e)}\n{traceback.format_exc()}")
659
- logger.info(f"query_safe error: {e}")
665
+ logger.exception("Error in query_safe:")
660
666
  msg = f"Error: {e}"
661
667
  if "does not exist" in msg and " relation " in msg:
662
668
  msg += "\nAvailable tables: " + ", ".join(self.get_usable_table_names())
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import orjson
2
3
  import datetime
3
4
  from typing import Dict, List, Optional
4
5
 
@@ -47,10 +48,20 @@ def init(connection_str: str = None):
47
48
  global Base, session, engine
48
49
  if connection_str is None:
49
50
  connection_str = config["storage_db"]
51
+ # Use orjson with our CustomJSONEncoder.default for JSON serialization
52
+ _default_json = CustomJSONEncoder().default
53
+
54
+ def _json_serializer(value):
55
+ return orjson.dumps(
56
+ value,
57
+ default=_default_json,
58
+ option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME,
59
+ ).decode("utf-8")
60
+
50
61
  base_args = {
51
62
  "pool_size": 30,
52
63
  "max_overflow": 200,
53
- "json_serializer": CustomJSONEncoder().encode,
64
+ "json_serializer": _json_serializer,
54
65
  }
55
66
  engine = create_engine(connection_str, echo=False, **base_args)
56
67
  session = scoped_session(sessionmaker(bind=engine, autoflush=True))
@@ -534,11 +545,10 @@ class KnowledgeBase(Base):
534
545
  reranking_model = params.pop("reranking_model", None)
535
546
 
536
547
  if not with_secrets:
537
- if embedding_model and "api_key" in embedding_model:
538
- embedding_model["api_key"] = "******"
539
-
540
- if reranking_model and "api_key" in reranking_model:
541
- reranking_model["api_key"] = "******"
548
+ for key in ("api_key", "private_key"):
549
+ for el in (embedding_model, reranking_model):
550
+ if el and key in el:
551
+ el[key] = "******"
542
552
 
543
553
  return {
544
554
  "id": self.id,