MindsDB 25.5.4.1__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (70) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +28 -25
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/a2a/run_a2a.py +1 -1
  5. mindsdb/api/executor/command_executor.py +69 -14
  6. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  7. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  8. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  9. mindsdb/api/executor/planner/plan_join.py +67 -77
  10. mindsdb/api/executor/planner/query_planner.py +176 -155
  11. mindsdb/api/executor/planner/steps.py +37 -12
  12. mindsdb/api/executor/sql_query/result_set.py +45 -64
  13. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  14. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  15. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  16. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  17. mindsdb/api/executor/utilities/sql.py +42 -48
  18. mindsdb/api/http/namespaces/config.py +1 -1
  19. mindsdb/api/http/namespaces/file.py +14 -23
  20. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  21. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  22. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  23. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  24. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  25. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  26. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  27. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  28. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
  29. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  30. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  31. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
  32. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  33. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
  34. mindsdb/integrations/libs/api_handler.py +261 -57
  35. mindsdb/integrations/libs/base.py +100 -29
  36. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  37. mindsdb/integrations/utilities/handler_utils.py +23 -8
  38. mindsdb/integrations/utilities/sql_utils.py +35 -40
  39. mindsdb/interfaces/agents/agents_controller.py +196 -192
  40. mindsdb/interfaces/agents/constants.py +7 -1
  41. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  42. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  43. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  44. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  45. mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
  46. mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
  47. mindsdb/interfaces/database/database.py +81 -57
  48. mindsdb/interfaces/database/integrations.py +220 -234
  49. mindsdb/interfaces/database/log.py +72 -104
  50. mindsdb/interfaces/database/projects.py +156 -193
  51. mindsdb/interfaces/file/file_controller.py +21 -65
  52. mindsdb/interfaces/knowledge_base/controller.py +63 -10
  53. mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
  54. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  55. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  56. mindsdb/interfaces/skills/skills_controller.py +54 -36
  57. mindsdb/interfaces/skills/sql_agent.py +109 -86
  58. mindsdb/interfaces/storage/db.py +223 -79
  59. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  60. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  61. mindsdb/utilities/config.py +9 -2
  62. mindsdb/utilities/log.py +35 -26
  63. mindsdb/utilities/ml_task_queue/task.py +19 -22
  64. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  65. mindsdb/utilities/starters.py +49 -1
  66. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +268 -268
  67. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +70 -62
  68. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
  69. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
  70. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = "MindsDB"
2
2
  __package_name__ = "mindsdb"
3
- __version__ = "25.5.4.1"
3
+ __version__ = "25.6.2.0"
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = "MindsDB Inc"
mindsdb/api/a2a/agent.py CHANGED
@@ -28,9 +28,7 @@ class MindsDBAgent:
28
28
  self.host = host
29
29
  self.port = port
30
30
  self.base_url = f"http://{host}:{port}"
31
- self.agent_url = (
32
- f"{self.base_url}/api/projects/{project_name}/agents/{agent_name}"
33
- )
31
+ self.agent_url = f"{self.base_url}/api/projects/{project_name}/agents/{agent_name}"
34
32
  self.sql_url = f"{self.base_url}/api/sql/query"
35
33
  logger.info(f"Initialized MindsDB agent connector to {self.base_url}")
36
34
 
@@ -65,9 +63,7 @@ class MindsDBAgent:
65
63
  for column in ["response", "result", "answer", "completion", "output"]:
66
64
  if column in result_row:
67
65
  content = result_row[column]
68
- logger.info(
69
- f"Found result in column '{column}': {content[:100]}..."
70
- )
66
+ logger.info(f"Found result in column '{column}': {content[:100]}...")
71
67
  return {
72
68
  "content": content,
73
69
  "parts": [{"type": "text", "text": content}],
@@ -122,9 +118,7 @@ class MindsDBAgent:
122
118
  "parts": [{"type": "text", "text": error_msg}],
123
119
  }
124
120
 
125
- def streaming_invoke(
126
- self, messages: List[dict], timeout: int = DEFAULT_STREAM_TIMEOUT
127
- ) -> Iterator[Dict[str, Any]]:
121
+ def streaming_invoke(self, messages: List[dict], timeout: int = DEFAULT_STREAM_TIMEOUT) -> Iterator[Dict[str, Any]]:
128
122
  """Stream responses from the MindsDB agent using the direct API endpoint.
129
123
 
130
124
  Args:
@@ -140,15 +134,11 @@ class MindsDBAgent:
140
134
  url = f"{self.base_url}/api/projects/{self.project_name}/agents/{self.agent_name}/completions/stream"
141
135
 
142
136
  # Log request for debugging
143
- logger.info(
144
- f"Sending streaming request to MindsDB agent: {self.agent_name}"
145
- )
137
+ logger.info(f"Sending streaming request to MindsDB agent: {self.agent_name}")
146
138
  logger.debug(f"Request messages: {json.dumps(messages)[:200]}...")
147
139
 
148
140
  # Send the request to MindsDB streaming API with timeout
149
- stream = requests.post(
150
- url, json={"messages": messages}, stream=True, timeout=timeout
151
- )
141
+ stream = requests.post(url, json={"messages": messages}, stream=True, timeout=timeout)
152
142
  stream.raise_for_status()
153
143
 
154
144
  # Process the streaming response directly
@@ -165,9 +155,7 @@ class MindsDBAgent:
165
155
  # Pass through the chunk with minimal modifications
166
156
  yield chunk
167
157
  except json.JSONDecodeError as e:
168
- logger.warning(
169
- f"Failed to parse JSON from line: {data}. Error: {str(e)}"
170
- )
158
+ logger.warning(f"Failed to parse JSON from line: {data}. Error: {str(e)}")
171
159
  # Yield error information but continue processing
172
160
  yield {
173
161
  "error": f"JSON parse error: {str(e)}",
@@ -186,9 +174,7 @@ class MindsDBAgent:
186
174
  logger.debug(f"Received non-data line: {line}")
187
175
 
188
176
  # If it looks like a raw text response (not SSE format), wrap it
189
- if not line.startswith("event:") and not line.startswith(
190
- ":"
191
- ):
177
+ if not line.startswith("event:") and not line.startswith(":"):
192
178
  yield {"content": line, "is_task_complete": False}
193
179
  except UnicodeDecodeError as e:
194
180
  logger.warning(f"Failed to decode line: {str(e)}")
@@ -252,13 +238,30 @@ class MindsDBAgent:
252
238
  # Send a final completion message
253
239
  yield {"is_task_complete": True, "metadata": {"complete": True}}
254
240
 
255
- async def stream(self, query, session_id) -> AsyncIterable[Dict[str, Any]]:
256
- """Stream responses from the MindsDB agent (uses streaming API endpoint)."""
241
+ async def stream(
242
+ self,
243
+ query: str,
244
+ session_id: str,
245
+ history: List[dict] | None = None,
246
+ ) -> AsyncIterable[Dict[str, Any]]:
247
+ """Stream responses from the MindsDB agent (uses streaming API endpoint).
248
+
249
+ Args:
250
+ query: The current query to send to the agent.
251
+ session_id: Unique identifier for the conversation session.
252
+ history: Optional list of previous messages in the conversation.
253
+
254
+ Returns:
255
+ AsyncIterable yielding chunks of the streaming response.
256
+ """
257
257
  try:
258
258
  logger.info(f"Using streaming API for query: {query[:100]}...")
259
259
 
260
- # Format the query into the message structure expected by streaming_invoke
261
- messages = [{"question": query, "answer": None}]
260
+ # Start with history if provided, otherwise empty list
261
+ messages = history or []
262
+
263
+ # Add the current query to the messages
264
+ messages.append({"question": query, "answer": None})
262
265
 
263
266
  # Use the streaming_invoke method to get real streaming responses
264
267
  streaming_response = self.streaming_invoke(messages)
@@ -20,7 +20,8 @@ from ...common.types import (
20
20
  )
21
21
  from pydantic import ValidationError
22
22
  import json
23
- from typing import AsyncIterable, Any
23
+ import time
24
+ from typing import AsyncIterable, Any, Dict
24
25
  from ...common.server.task_manager import TaskManager
25
26
 
26
27
  import logging
@@ -44,9 +45,9 @@ class A2AServer:
44
45
  self.agent_card = agent_card
45
46
  self.app = Starlette()
46
47
  self.app.add_route(self.endpoint, self._process_request, methods=["POST"])
47
- self.app.add_route(
48
- "/.well-known/agent.json", self._get_agent_card, methods=["GET"]
49
- )
48
+ self.app.add_route("/.well-known/agent.json", self._get_agent_card, methods=["GET"])
49
+ # Add status endpoint
50
+ self.app.add_route("/status", self._get_status, methods=["GET"])
50
51
  # TODO: Remove this when we have a proper CORS policy
51
52
  self.app.add_middleware(
52
53
  CORSMiddleware,
@@ -55,6 +56,7 @@ class A2AServer:
55
56
  allow_methods=["*"],
56
57
  allow_headers=["*"],
57
58
  )
59
+ self.start_time = time.time()
58
60
 
59
61
  def start(self):
60
62
  if self.agent_card is None:
@@ -66,18 +68,30 @@ class A2AServer:
66
68
  import uvicorn
67
69
 
68
70
  # Configure uvicorn with optimized settings for streaming
69
- uvicorn.run(
70
- self.app,
71
- host=self.host,
72
- port=self.port,
73
- http="h11",
74
- timeout_keep_alive=65,
75
- log_level="info"
76
- )
71
+ uvicorn.run(self.app, host=self.host, port=self.port, http="h11", timeout_keep_alive=65, log_level="info")
77
72
 
78
73
  def _get_agent_card(self, request: Request) -> JSONResponse:
79
74
  return JSONResponse(self.agent_card.model_dump(exclude_none=True))
80
75
 
76
+ def _get_status(self, request: Request) -> JSONResponse:
77
+ """
78
+ Status endpoint that returns basic server information.
79
+ This endpoint can be used by the frontend to check if the A2A server is running.
80
+ """
81
+ uptime_seconds = time.time() - self.start_time
82
+
83
+ status_info: Dict[str, Any] = {
84
+ "status": "ok",
85
+ "service": "mindsdb-a2a",
86
+ "uptime_seconds": round(uptime_seconds, 2),
87
+ "host": self.host,
88
+ "port": self.port,
89
+ "agent_name": self.agent_card.name if self.agent_card else None,
90
+ "version": self.agent_card.version if self.agent_card else "unknown",
91
+ }
92
+
93
+ return JSONResponse(status_info)
94
+
81
95
  async def _process_request(self, request: Request):
82
96
  try:
83
97
  body = await request.json()
@@ -89,23 +103,15 @@ class A2AServer:
89
103
  result = await self.task_manager.on_send_task(json_rpc_request)
90
104
  elif isinstance(json_rpc_request, SendTaskStreamingRequest):
91
105
  # Don't await the async generator, just pass it to _create_response
92
- result = self.task_manager.on_send_task_subscribe(
93
- json_rpc_request
94
- )
106
+ result = self.task_manager.on_send_task_subscribe(json_rpc_request)
95
107
  elif isinstance(json_rpc_request, CancelTaskRequest):
96
108
  result = await self.task_manager.on_cancel_task(json_rpc_request)
97
109
  elif isinstance(json_rpc_request, SetTaskPushNotificationRequest):
98
- result = await self.task_manager.on_set_task_push_notification(
99
- json_rpc_request
100
- )
110
+ result = await self.task_manager.on_set_task_push_notification(json_rpc_request)
101
111
  elif isinstance(json_rpc_request, GetTaskPushNotificationRequest):
102
- result = await self.task_manager.on_get_task_push_notification(
103
- json_rpc_request
104
- )
112
+ result = await self.task_manager.on_get_task_push_notification(json_rpc_request)
105
113
  elif isinstance(json_rpc_request, TaskResubscriptionRequest):
106
- result = await self.task_manager.on_resubscribe_to_task(
107
- json_rpc_request
108
- )
114
+ result = await self.task_manager.on_resubscribe_to_task(json_rpc_request)
109
115
  else:
110
116
  logger.warning(f"Unexpected request type: {type(json_rpc_request)}")
111
117
  raise ValueError(f"Unexpected request type: {type(request)}")
@@ -152,10 +158,10 @@ class A2AServer:
152
158
  "X-Accel-Buffering": "no",
153
159
  "Connection": "keep-alive",
154
160
  "Content-Type": "text/event-stream",
155
- "Transfer-Encoding": "chunked"
161
+ "Transfer-Encoding": "chunked",
156
162
  },
157
163
  # Explicitly set media_type
158
- media_type="text/event-stream"
164
+ media_type="text/event-stream",
159
165
  )
160
166
  elif isinstance(result, JSONRPCResponse):
161
167
  return JSONResponse(result.model_dump(exclude_none=True))
@@ -57,7 +57,7 @@ def main(config_override: Optional[Dict[str, Any]] = None, *args, **kwargs):
57
57
  logger.info("Successfully imported a2a module")
58
58
 
59
59
  # Get configuration from config system or use provided override
60
- a2a_config = config_override if config_override is not None else config.get("a2a", {})
60
+ a2a_config = config_override if config_override is not None else config.get("api", {}).get("a2a", {})
61
61
 
62
62
  # Set log level if specified
63
63
  if a2a_config.get("log_level"):
@@ -6,6 +6,7 @@ from functools import reduce
6
6
 
7
7
  import pandas as pd
8
8
  from mindsdb_sql_parser import parse_sql
9
+ from mindsdb_sql_parser.ast.mindsdb import AlterDatabase
9
10
  from mindsdb_sql_parser.ast import (
10
11
  Alter,
11
12
  ASTNode,
@@ -39,6 +40,7 @@ from mindsdb_sql_parser.ast import (
39
40
 
40
41
  # typed models
41
42
  from mindsdb_sql_parser.ast.mindsdb import (
43
+ AlterView,
42
44
  CreateAgent,
43
45
  CreateAnomalyDetectionModel,
44
46
  CreateChatBot,
@@ -51,6 +53,7 @@ from mindsdb_sql_parser.ast.mindsdb import (
51
53
  CreateTrigger,
52
54
  CreateView,
53
55
  CreateKnowledgeBaseIndex,
56
+ EvaluateKnowledgeBase,
54
57
  DropAgent,
55
58
  DropChatBot,
56
59
  DropDatasource,
@@ -189,6 +192,8 @@ class ExecuteCommands:
189
192
  return self.answer_drop_tables(statement, database_name)
190
193
  elif statement_type is DropDatasource or statement_type is DropDatabase:
191
194
  return self.answer_drop_database(statement)
195
+ elif statement_type is AlterDatabase:
196
+ return self.answer_alter_database(statement)
192
197
  elif statement_type is Describe:
193
198
  # NOTE in sql 'describe table' is same as 'show columns'
194
199
  obj_type = statement.type
@@ -551,7 +556,9 @@ class ExecuteCommands:
551
556
  ):
552
557
  return self.answer_create_predictor(statement, database_name)
553
558
  elif statement_type is CreateView:
554
- return self.answer_create_view(statement, database_name)
559
+ return self.answer_create_or_alter_view(statement, database_name)
560
+ elif statement_type is AlterView:
561
+ return self.answer_create_or_alter_view(statement, database_name)
555
562
  elif statement_type is DropView:
556
563
  return self.answer_drop_view(statement, database_name)
557
564
  elif statement_type is Delete:
@@ -618,6 +625,8 @@ class ExecuteCommands:
618
625
  return self.answer_evaluate_metric(statement, database_name)
619
626
  elif statement_type is CreateKnowledgeBaseIndex:
620
627
  return self.answer_create_kb_index(statement, database_name)
628
+ elif statement_type is EvaluateKnowledgeBase:
629
+ return self.answer_evaluate_kb(statement, database_name)
621
630
  else:
622
631
  logger.warning(f"Unknown SQL statement: {sql}")
623
632
  raise NotSupportedYet(f"Unknown SQL statement: {sql}")
@@ -906,6 +915,14 @@ class ExecuteCommands:
906
915
  self.session.kb_controller.create_index(table_name=table_name, project_name=project_name)
907
916
  return ExecuteAnswer()
908
917
 
918
+ def answer_evaluate_kb(self, statement: EvaluateKnowledgeBase, database_name):
919
+ table_name = statement.name.parts[-1]
920
+ project_name = statement.name.parts[0] if len(statement.name.parts) > 1 else database_name
921
+ scores = self.session.kb_controller.evaluate(
922
+ table_name=table_name, project_name=project_name, params=statement.params
923
+ )
924
+ return ExecuteAnswer(data=ResultSet.from_df(scores))
925
+
909
926
  def _get_model_info(self, identifier, except_absent=True, database_name=None):
910
927
  if len(identifier.parts) == 1:
911
928
  identifier.parts = [database_name, identifier.parts[0]]
@@ -1181,6 +1198,13 @@ class ExecuteCommands:
1181
1198
  raise
1182
1199
  return ExecuteAnswer()
1183
1200
 
1201
+ def answer_alter_database(self, statement):
1202
+ if len(statement.name.parts) != 1:
1203
+ raise Exception("Database name should contain only 1 part.")
1204
+ db_name = statement.name.parts[0]
1205
+ self.session.database_controller.update(db_name, data=statement.params)
1206
+ return ExecuteAnswer()
1207
+
1184
1208
  def answer_drop_tables(self, statement, database_name):
1185
1209
  """answer on 'drop table [if exists] {name}'
1186
1210
  Args:
@@ -1214,17 +1238,35 @@ class ExecuteCommands:
1214
1238
 
1215
1239
  return ExecuteAnswer()
1216
1240
 
1217
- def answer_create_view(self, statement, database_name):
1241
+ def answer_create_or_alter_view(self, statement: ASTNode, database_name: str) -> ExecuteAnswer:
1242
+ """Process CREATE and ALTER VIEW commands
1243
+
1244
+ Args:
1245
+ statement (ASTNode): data for creating or altering view
1246
+ database_name (str): name of the current database
1247
+
1248
+ Returns:
1249
+ ExecuteAnswer: answer for the command
1250
+ """
1218
1251
  project_name = database_name
1219
- # TEMP
1220
- if isinstance(statement.name, Identifier):
1252
+
1253
+ if isinstance(statement.name, str):
1254
+ parts = statement.name.split(".")
1255
+ elif isinstance(statement.name, Identifier):
1221
1256
  parts = statement.name.parts
1222
1257
  else:
1223
- parts = statement.name.split(".")
1258
+ raise ValueError(f"Unknown type of view name: {statement.name}")
1224
1259
 
1225
- view_name = parts[-1]
1226
- if len(parts) == 2:
1227
- project_name = parts[0]
1260
+ match parts:
1261
+ case [project_name, view_name]:
1262
+ pass
1263
+ case [view_name]:
1264
+ pass
1265
+ case _:
1266
+ raise ValueError(
1267
+ 'View name should be in the form "project_name.view_name" '
1268
+ f'or "view_name", got {statement.name.parts}'
1269
+ )
1228
1270
 
1229
1271
  query_str = statement.query_str
1230
1272
 
@@ -1233,7 +1275,7 @@ class ExecuteCommands:
1233
1275
  targets=[Star()],
1234
1276
  from_table=NativeQuery(integration=statement.from_table, query=statement.query_str),
1235
1277
  )
1236
- query_str = str(query)
1278
+ query_str = query.to_string()
1237
1279
  else:
1238
1280
  query = parse_sql(query_str)
1239
1281
 
@@ -1248,11 +1290,21 @@ class ExecuteCommands:
1248
1290
  query_context_controller.release_context(query_context_controller.IGNORE_CONTEXT)
1249
1291
 
1250
1292
  project = self.session.database_controller.get_project(project_name)
1251
- try:
1252
- project.create_view(view_name, query=query_str)
1253
- except EntityExistsError:
1254
- if getattr(statement, "if_not_exists", False) is False:
1255
- raise
1293
+
1294
+ if isinstance(statement, CreateView):
1295
+ try:
1296
+ project.create_view(view_name, query=query_str)
1297
+ except EntityExistsError:
1298
+ if getattr(statement, "if_not_exists", False) is False:
1299
+ raise
1300
+ elif isinstance(statement, AlterView):
1301
+ try:
1302
+ project.update_view(view_name, query=query_str)
1303
+ except EntityNotExistsError:
1304
+ raise ExecutorException(f"View {view_name} does not exist in {project_name}")
1305
+ else:
1306
+ raise ValueError(f"Unknown view DDL statement: {statement}")
1307
+
1256
1308
  return ExecuteAnswer()
1257
1309
 
1258
1310
  def answer_drop_view(self, statement, database_name):
@@ -1467,6 +1519,9 @@ class ExecuteCommands:
1467
1519
  is_full=False,
1468
1520
  database_name=None,
1469
1521
  ):
1522
+ if isinstance(target, Identifier) is False:
1523
+ raise TableNotExistError("The table name is required for the query.")
1524
+
1470
1525
  if len(target.parts) > 1:
1471
1526
  db = target.parts[0]
1472
1527
  elif isinstance(database_name, str) and len(database_name) > 0:
@@ -1,13 +1,11 @@
1
1
  import time
2
2
  import inspect
3
3
  from dataclasses import astuple
4
- from typing import Iterable
4
+ from typing import Iterable, List
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
- from sqlalchemy.types import (
9
- Integer, Float
10
- )
8
+ from sqlalchemy.types import Integer, Float
11
9
 
12
10
  from mindsdb_sql_parser.ast.base import ASTNode
13
11
  from mindsdb_sql_parser.ast import Insert, Identifier, CreateTable, TableColumn, DropTables
@@ -32,7 +30,7 @@ class DBHandlerException(Exception):
32
30
 
33
31
 
34
32
  class IntegrationDataNode(DataNode):
35
- type = 'integration'
33
+ type = "integration"
36
34
 
37
35
  def __init__(self, integration_name, ds_type, integration_controller):
38
36
  self.integration_name = integration_name
@@ -46,15 +44,17 @@ class IntegrationDataNode(DataNode):
46
44
  def get_tables(self):
47
45
  response = self.integration_handler.get_tables()
48
46
  if response.type == RESPONSE_TYPE.TABLE:
49
- result_dict = response.data_frame.to_dict(orient='records')
47
+ result_dict = response.data_frame.to_dict(orient="records")
50
48
  result = []
51
49
  for row in result_dict:
52
-
53
50
  result.append(TablesRow.from_dict(row))
54
51
  return result
55
52
  else:
56
53
  raise Exception(f"Can't get tables: {response.error_message}")
57
54
 
55
+ result_dict = response.data_frame.to_dict(orient="records")
56
+ return [TablesRow.from_dict(row) for row in result_dict]
57
+
58
58
  def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> pd.DataFrame:
59
59
  """Get a DataFrame containing representation of information_schema.columns for the specified table.
60
60
 
@@ -66,7 +66,7 @@ class IntegrationDataNode(DataNode):
66
66
  pd.DataFrame: A DataFrame containing representation of information_schema.columns for the specified table.
67
67
  The DataFrame has list of columns as in the integrations.libs.response.INF_SCHEMA_COLUMNS_NAMES.
68
68
  """
69
- if 'schema_name' in inspect.signature(self.integration_handler.get_columns).parameters:
69
+ if "schema_name" in inspect.signature(self.integration_handler.get_columns).parameters:
70
70
  response = self.integration_handler.get_columns(table_name, schema_name)
71
71
  else:
72
72
  response = self.integration_handler.get_columns(table_name)
@@ -81,18 +81,18 @@ class IntegrationDataNode(DataNode):
81
81
  # region fallback for old handlers
82
82
  df = response.data_frame
83
83
  df.columns = [name.upper() for name in df.columns]
84
- if 'FIELD' not in df.columns or 'TYPE' not in df.columns:
84
+ if "FIELD" not in df.columns or "TYPE" not in df.columns:
85
85
  logger.warning(
86
86
  f"Response from the handler's `get_columns` call does not contain required columns: f{df.columns}"
87
87
  )
88
88
  return pd.DataFrame([], columns=astuple(INF_SCHEMA_COLUMNS_NAMES))
89
89
 
90
- new_df = df[['FIELD', 'TYPE']]
91
- new_df.columns = ['COLUMN_NAME', 'DATA_TYPE']
90
+ new_df = df[["FIELD", "TYPE"]]
91
+ new_df.columns = ["COLUMN_NAME", "DATA_TYPE"]
92
92
 
93
- new_df[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = new_df[
94
- INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE
95
- ].apply(lambda x: infer_mysql_type(x).value)
93
+ new_df[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = new_df[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE].apply(
94
+ lambda x: infer_mysql_type(x).value
95
+ )
96
96
 
97
97
  for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES):
98
98
  if column_name in new_df.columns:
@@ -116,54 +116,50 @@ class IntegrationDataNode(DataNode):
116
116
  return df[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME].to_list()
117
117
 
118
118
  def drop_table(self, name: Identifier, if_exists=False):
119
- drop_ast = DropTables(
120
- tables=[name],
121
- if_exists=if_exists
122
- )
119
+ drop_ast = DropTables(tables=[name], if_exists=if_exists)
123
120
  self.query(drop_ast)
124
121
 
125
- def create_table(self, table_name: Identifier, result_set: ResultSet = None, columns=None,
126
- is_replace=False, is_create=False, **kwargs) -> DataHubResponse:
122
+ def create_table(
123
+ self,
124
+ table_name: Identifier,
125
+ result_set: ResultSet = None,
126
+ columns: List[TableColumn] = None,
127
+ is_replace: bool = False,
128
+ is_create: bool = False,
129
+ raise_if_exists: bool = True,
130
+ **kwargs,
131
+ ) -> DataHubResponse:
127
132
  # is_create - create table
133
+ # if !raise_if_exists: error will be skipped
128
134
  # is_replace - drop table if exists
129
135
  # is_create==False and is_replace==False: just insert
130
136
 
131
137
  table_columns_meta = {}
132
138
 
133
139
  if columns is None:
134
- columns = []
135
-
136
- df = result_set.get_raw_df()
137
-
138
140
  columns: list[TableColumn] = result_set.get_ast_columns()
139
- table_columns_meta = {
140
- column.name: column.type
141
- for column in columns
142
- }
141
+ table_columns_meta = {column.name: column.type for column in columns}
143
142
 
144
143
  if is_replace:
145
144
  # drop
146
- drop_ast = DropTables(
147
- tables=[table_name],
148
- if_exists=True
149
- )
145
+ drop_ast = DropTables(tables=[table_name], if_exists=True)
150
146
  self.query(drop_ast)
151
147
  is_create = True
152
148
 
153
149
  if is_create:
154
- create_table_ast = CreateTable(
155
- name=table_name,
156
- columns=columns,
157
- is_replace=is_replace
158
- )
159
- self.query(create_table_ast)
150
+ create_table_ast = CreateTable(name=table_name, columns=columns, is_replace=is_replace)
151
+ try:
152
+ self.query(create_table_ast)
153
+ except Exception as e:
154
+ if raise_if_exists:
155
+ raise e
160
156
 
161
157
  if result_set is None:
162
158
  # it is just a 'create table'
163
159
  return DataHubResponse()
164
160
 
165
161
  # native insert
166
- if hasattr(self.integration_handler, 'insert'):
162
+ if hasattr(self.integration_handler, "insert"):
167
163
  df = result_set.to_df()
168
164
 
169
165
  result: HandlerResponse = self.integration_handler.insert(table_name.parts[-1], df)
@@ -176,9 +172,9 @@ class IntegrationDataNode(DataNode):
176
172
  column_type = table_columns_meta[col.alias]
177
173
 
178
174
  if column_type == Integer:
179
- type_name = 'int'
175
+ type_name = "int"
180
176
  elif column_type == Float:
181
- type_name = 'float'
177
+ type_name = "float"
182
178
  else:
183
179
  continue
184
180
 
@@ -193,24 +189,19 @@ class IntegrationDataNode(DataNode):
193
189
  # not need to insert
194
190
  return DataHubResponse()
195
191
 
196
- insert_ast = Insert(
197
- table=table_name,
198
- columns=insert_columns,
199
- values=values,
200
- is_plain=True
201
- )
192
+ insert_ast = Insert(table=table_name, columns=insert_columns, values=values, is_plain=True)
202
193
 
203
194
  try:
204
195
  result: DataHubResponse = self.query(insert_ast)
205
196
  except Exception as e:
206
- msg = f'[{self.ds_type}/{self.integration_name}]: {str(e)}'
197
+ msg = f"[{self.ds_type}/{self.integration_name}]: {str(e)}"
207
198
  raise DBHandlerException(msg) from e
208
199
 
209
200
  return DataHubResponse(affected_rows=result.affected_rows)
210
201
 
211
202
  def has_support_stream(self) -> bool:
212
203
  # checks if data handler has query_stream method
213
- return hasattr(self.integration_handler, 'query_stream') and callable(self.integration_handler.query_stream)
204
+ return hasattr(self.integration_handler, "query_stream") and callable(self.integration_handler.query_stream)
214
205
 
215
206
  @profiler.profile()
216
207
  def query_stream(self, query: ASTNode, fetch_size: int = None) -> Iterable:
@@ -230,24 +221,26 @@ class IntegrationDataNode(DataNode):
230
221
  # metrics
231
222
  elapsed_seconds = time.perf_counter() - time_before_query
232
223
  query_time_with_labels = metrics.INTEGRATION_HANDLER_QUERY_TIME.labels(
233
- get_class_name(self.integration_handler), result.type)
224
+ get_class_name(self.integration_handler), result.type
225
+ )
234
226
  query_time_with_labels.observe(elapsed_seconds)
235
227
 
236
228
  num_rows = 0
237
229
  if result.data_frame is not None:
238
230
  num_rows = len(result.data_frame.index)
239
231
  response_size_with_labels = metrics.INTEGRATION_HANDLER_RESPONSE_SIZE.labels(
240
- get_class_name(self.integration_handler), result.type)
232
+ get_class_name(self.integration_handler), result.type
233
+ )
241
234
  response_size_with_labels.observe(num_rows)
242
235
  except Exception as e:
243
236
  msg = str(e).strip()
244
- if msg == '':
237
+ if msg == "":
245
238
  msg = e.__class__.__name__
246
- msg = f'[{self.ds_type}/{self.integration_name}]: {msg}'
239
+ msg = f"[{self.ds_type}/{self.integration_name}]: {msg}"
247
240
  raise DBHandlerException(msg) from e
248
241
 
249
242
  if result.type == RESPONSE_TYPE.ERROR:
250
- raise Exception(f'Error in {self.integration_name}: {result.error_message}')
243
+ raise Exception(f"Error in {self.integration_name}: {result.error_message}")
251
244
  if result.type == RESPONSE_TYPE.OK:
252
245
  return DataHubResponse(affected_rows=result.affected_rows)
253
246
 
@@ -265,17 +258,8 @@ class IntegrationDataNode(DataNode):
265
258
  logger.error(f"Issue with clearing DF from NaN values: {e}")
266
259
  # endregion
267
260
 
268
- columns_info = [
269
- {
270
- 'name': k,
271
- 'type': v
272
- }
273
- for k, v in df.dtypes.items()
274
- ]
261
+ columns_info = [{"name": k, "type": v} for k, v in df.dtypes.items()]
275
262
 
276
263
  return DataHubResponse(
277
- data_frame=df,
278
- columns=columns_info,
279
- affected_rows=result.affected_rows,
280
- mysql_types=result.mysql_types
264
+ data_frame=df, columns=columns_info, affected_rows=result.affected_rows, mysql_types=result.mysql_types
281
265
  )