MindsDB 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (69) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +28 -25
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/executor/command_executor.py +69 -14
  5. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  6. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  7. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  8. mindsdb/api/executor/planner/plan_join.py +67 -77
  9. mindsdb/api/executor/planner/query_planner.py +176 -155
  10. mindsdb/api/executor/planner/steps.py +37 -12
  11. mindsdb/api/executor/sql_query/result_set.py +45 -64
  12. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  13. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  14. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  15. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  16. mindsdb/api/executor/utilities/sql.py +42 -48
  17. mindsdb/api/http/namespaces/config.py +1 -1
  18. mindsdb/api/http/namespaces/file.py +14 -23
  19. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  20. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  21. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  22. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  23. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  24. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  25. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  26. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  27. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
  28. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  29. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  30. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
  31. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  32. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
  33. mindsdb/integrations/libs/api_handler.py +261 -57
  34. mindsdb/integrations/libs/base.py +100 -29
  35. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  36. mindsdb/integrations/utilities/handler_utils.py +23 -8
  37. mindsdb/integrations/utilities/sql_utils.py +35 -40
  38. mindsdb/interfaces/agents/agents_controller.py +196 -192
  39. mindsdb/interfaces/agents/constants.py +7 -1
  40. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  41. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  42. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  43. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  44. mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
  45. mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
  46. mindsdb/interfaces/database/database.py +81 -57
  47. mindsdb/interfaces/database/integrations.py +220 -234
  48. mindsdb/interfaces/database/log.py +72 -104
  49. mindsdb/interfaces/database/projects.py +156 -193
  50. mindsdb/interfaces/file/file_controller.py +21 -65
  51. mindsdb/interfaces/knowledge_base/controller.py +63 -10
  52. mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
  53. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  54. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  55. mindsdb/interfaces/skills/skills_controller.py +54 -36
  56. mindsdb/interfaces/skills/sql_agent.py +109 -86
  57. mindsdb/interfaces/storage/db.py +223 -79
  58. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  59. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  60. mindsdb/utilities/config.py +9 -2
  61. mindsdb/utilities/log.py +35 -26
  62. mindsdb/utilities/ml_task_queue/task.py +19 -22
  63. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  64. mindsdb/utilities/starters.py +40 -0
  65. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +253 -253
  66. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +69 -61
  67. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
  68. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
  69. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
@@ -2,23 +2,15 @@ from mindsdb_sql_parser.ast import (
2
2
  Identifier,
3
3
  )
4
4
 
5
- from mindsdb.api.executor.planner.steps import (
6
- SaveToTable,
7
- InsertToTable,
8
- CreateTableStep
9
- )
5
+ from mindsdb.api.executor.planner.steps import SaveToTable, InsertToTable, CreateTableStep
10
6
  from mindsdb.api.executor.sql_query.result_set import ResultSet, Column
11
- from mindsdb.api.executor.exceptions import (
12
- NotSupportedYet,
13
- LogicError
14
- )
7
+ from mindsdb.api.executor.exceptions import NotSupportedYet, LogicError
15
8
  from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
16
9
 
17
10
  from .base import BaseStepCall
18
11
 
19
12
 
20
13
  class InsertToTableCall(BaseStepCall):
21
-
22
14
  bind = InsertToTable
23
15
 
24
16
  def call(self, step):
@@ -35,16 +27,16 @@ class InsertToTableCall(BaseStepCall):
35
27
  integration_name = step.table.parts[0]
36
28
  table_name = Identifier(parts=step.table.parts[1:])
37
29
  else:
38
- integration_name = self.context['database']
30
+ integration_name = self.context["database"]
39
31
  table_name = step.table
40
32
 
41
33
  dn = self.session.datahub.get(integration_name)
42
34
 
43
- if hasattr(dn, 'create_table') is False:
35
+ if hasattr(dn, "create_table") is False:
44
36
  raise NotSupportedYet(f"Creating table in '{integration_name}' is not supported")
45
37
 
46
38
  if step.dataframe is not None:
47
- data = self.steps_data[step.dataframe.result.step_num]
39
+ data = self.steps_data[step.dataframe.step_num]
48
40
  elif step.query is not None:
49
41
  data = ResultSet()
50
42
  if step.query.columns is None:
@@ -62,7 +54,7 @@ class InsertToTableCall(BaseStepCall):
62
54
  for row in step.query.values:
63
55
  record = []
64
56
  for v in row:
65
- if isinstance(v, Identifier) and v.parts[0] == 'None':
57
+ if isinstance(v, Identifier) and v.parts[0] == "None":
66
58
  # Allow explicitly inserting NULL values.
67
59
  record.append(None)
68
60
  continue
@@ -72,12 +64,12 @@ class InsertToTableCall(BaseStepCall):
72
64
 
73
65
  data.add_raw_values(records)
74
66
  else:
75
- raise LogicError(f'Data not found for insert: {step}')
67
+ raise LogicError(f"Data not found for insert: {step}")
76
68
 
77
69
  # del 'service' columns
78
- for col in data.find_columns('__mindsdb_row_id'):
70
+ for col in data.find_columns("__mindsdb_row_id"):
79
71
  data.del_column(col)
80
- for col in data.find_columns('__mdb_forecast_offset'):
72
+ for col in data.find_columns("__mdb_forecast_offset"):
81
73
  data.del_column(col)
82
74
 
83
75
  # region del columns filtered at projection step
@@ -85,7 +77,7 @@ class InsertToTableCall(BaseStepCall):
85
77
  if columns_list is not None:
86
78
  filtered_column_names = [x.name for x in columns_list]
87
79
  for col in data.columns:
88
- if col.name.startswith('predictor.'):
80
+ if col.name.startswith("predictor."):
89
81
  continue
90
82
  if col.name in filtered_column_names:
91
83
  continue
@@ -101,39 +93,27 @@ class InsertToTableCall(BaseStepCall):
101
93
  col_names.add(col.alias)
102
94
 
103
95
  response = dn.create_table(
104
- table_name=table_name,
105
- result_set=data,
106
- is_replace=is_replace,
107
- is_create=is_create,
108
- params=step.params
96
+ table_name=table_name, result_set=data, is_replace=is_replace, is_create=is_create, params=step.params
109
97
  )
110
98
  return ResultSet(affected_rows=response.affected_rows)
111
99
 
112
100
 
113
101
  class SaveToTableCall(InsertToTableCall):
114
-
115
102
  bind = SaveToTable
116
103
 
117
104
 
118
105
  class CreateTableCall(BaseStepCall):
119
-
120
106
  bind = CreateTableStep
121
107
 
122
108
  def call(self, step):
123
-
124
109
  if len(step.table.parts) > 1:
125
110
  integration_name = step.table.parts[0]
126
111
  table_name = Identifier(parts=step.table.parts[1:])
127
112
  else:
128
- integration_name = self.context['database']
113
+ integration_name = self.context["database"]
129
114
  table_name = step.table
130
115
 
131
116
  dn = self.session.datahub.get(integration_name)
132
117
 
133
- dn.create_table(
134
- table_name=table_name,
135
- columns=step.columns,
136
- is_replace=step.is_replace,
137
- is_create=True
138
- )
118
+ dn.create_table(table_name=table_name, columns=step.columns, is_replace=step.is_replace, is_create=True)
139
119
  return ResultSet()
@@ -2,29 +2,22 @@ from collections import defaultdict
2
2
 
3
3
  import pandas as pd
4
4
 
5
- from mindsdb_sql_parser.ast import (
6
- Identifier, Select, Star, Constant, Parameter, Function, Variable, BinaryOperation
7
- )
5
+ from mindsdb_sql_parser.ast import Identifier, Select, Star, Constant, Parameter, Function, Variable, BinaryOperation
8
6
 
9
7
  from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import SERVER_VARIABLES
10
-
11
8
  from mindsdb.api.executor.planner.step_result import Result
12
9
  from mindsdb.api.executor.planner.steps import SubSelectStep, QueryStep
13
- from mindsdb.integrations.utilities.query_traversal import query_traversal
14
-
15
10
  from mindsdb.api.executor.sql_query.result_set import ResultSet, Column
16
11
  from mindsdb.api.executor.utilities.sql import query_df
17
-
18
- from mindsdb.interfaces.query_context.context_controller import query_context_controller
19
-
20
12
  from mindsdb.api.executor.exceptions import KeyColumnDoesNotExist
13
+ from mindsdb.integrations.utilities.query_traversal import query_traversal
14
+ from mindsdb.interfaces.query_context.context_controller import query_context_controller
21
15
 
22
16
  from .base import BaseStepCall
23
17
  from .fetch_dataframe import get_fill_param_fnc
24
18
 
25
19
 
26
20
  class SubSelectStepCall(BaseStepCall):
27
-
28
21
  bind = SubSelectStep
29
22
 
30
23
  def call(self, step):
@@ -32,12 +25,12 @@ class SubSelectStepCall(BaseStepCall):
32
25
 
33
26
  table_name = step.table_name
34
27
  if table_name is None:
35
- table_name = 'df_table'
28
+ table_name = "df_table"
36
29
  else:
37
30
  table_name = table_name
38
31
 
39
32
  query = step.query
40
- query.from_table = Identifier('df_table')
33
+ query.from_table = Identifier("df_table")
41
34
 
42
35
  if step.add_absent_cols and isinstance(query, Select):
43
36
  query_cols = set()
@@ -64,6 +57,7 @@ class SubSelectStepCall(BaseStepCall):
64
57
  if isinstance(node, Parameter) and isinstance(node.value, Result):
65
58
  prev_result = self.steps_data[node.value.step_num]
66
59
  return Constant(prev_result.get_column_values(col_idx=0)[0])
60
+
67
61
  query_traversal(query, inject_values)
68
62
 
69
63
  df = result.to_df()
@@ -76,7 +70,6 @@ class SubSelectStepCall(BaseStepCall):
76
70
 
77
71
 
78
72
  class QueryStepCall(BaseStepCall):
79
-
80
73
  bind = QueryStep
81
74
 
82
75
  def call(self, step: QueryStep):
@@ -103,6 +96,15 @@ class QueryStepCall(BaseStepCall):
103
96
  if col.table_name != col.table_alias:
104
97
  tbl_idx[col.table_alias].append(name)
105
98
 
99
+ lower_col_idx = {}
100
+ for key, value in col_idx.items():
101
+ if isinstance(key, int):
102
+ key = str(key)
103
+ if isinstance(key, str):
104
+ lower_col_idx[key.lower()] = value
105
+ continue
106
+ lower_col_idx[tuple(str(x).lower() for x in key)] = value
107
+
106
108
  # get aliases of first level
107
109
  aliases = []
108
110
  for col in query.targets:
@@ -120,7 +122,8 @@ class QueryStepCall(BaseStepCall):
120
122
  "user": self.session.username,
121
123
  "version": "8.0.17",
122
124
  "current_schema": "public",
123
- "connection_id": self.context.get('connection_id')
125
+ "schema": "public",
126
+ "connection_id": self.context.get("connection_id"),
124
127
  }
125
128
  if function_name in functions_results:
126
129
  return Constant(functions_results[function_name], alias=Identifier(parts=[function_name]))
@@ -144,14 +147,11 @@ class QueryStepCall(BaseStepCall):
144
147
  else:
145
148
  # replace with all columns from table
146
149
  table_name = node.parts[-2]
147
- return [
148
- Identifier(parts=[col])
149
- for col in tbl_idx.get(table_name, [])
150
- ]
150
+ return [Identifier(parts=[col]) for col in tbl_idx.get(table_name, [])]
151
151
 
152
152
  if node.parts[-1].lower() == "session_user":
153
153
  return Constant(self.session.username, alias=node)
154
- if node.parts[-1].lower() == '$$':
154
+ if node.parts[-1].lower() == "$$":
155
155
  # NOTE: sinve version 9.0 mysql client sends query 'select $$'.
156
156
  # Connection can be continued only if answer is parse error.
157
157
  raise ValueError(
@@ -159,23 +159,31 @@ class QueryStepCall(BaseStepCall):
159
159
  "version for the right syntax to use near '$$' at line 1"
160
160
  )
161
161
 
162
- if len(node.parts) == 1:
163
- key = col_name
164
- if key in aliases:
165
- # key is defined as alias
166
- return
167
- else:
168
- table_name = node.parts[-2]
169
- key = (table_name, col_name)
162
+ match node.parts, node.is_quoted:
163
+ case [column_name], [column_quoted]:
164
+ if column_name in aliases:
165
+ # key is defined as alias
166
+ return
170
167
 
171
- if key not in col_idx:
172
- if len(node.parts) == 1:
173
- # it can be local alias of a query
174
- return
168
+ key = column_name if column_quoted else column_name.lower()
169
+
170
+ if key not in col_idx and key not in lower_col_idx:
171
+ # it can be local alias of a query, like:
172
+ # SELECT t1.a + t2.a col1, min(t1.a) c
173
+ # FROM dummy_data.tbl1 as t1
174
+ # JOIN pg.tbl2 as t2 on t1.c=t2.c
175
+ # group by col1
176
+ # order by c -- <--- "с" is alias
177
+ return
178
+ case [*_, table_name, column_name], [*_, column_quoted]:
179
+ key = (table_name, column_name) if column_quoted else (table_name.lower(), column_name.lower())
180
+
181
+ search_idx = col_idx if column_quoted else lower_col_idx
175
182
 
176
- raise KeyColumnDoesNotExist(f'Table not found for column: {key}')
183
+ if key not in search_idx:
184
+ raise KeyColumnDoesNotExist(f"Table not found for column: {key}")
177
185
 
178
- new_name = col_idx[key]
186
+ new_name = search_idx[key]
179
187
  return Identifier(parts=[new_name], alias=node.alias)
180
188
 
181
189
  # fill params
@@ -196,14 +204,14 @@ class QueryStepCall(BaseStepCall):
196
204
  if key not in col_idx:
197
205
  # exclude
198
206
  node.args = [Constant(0), Constant(0)]
199
- node.op = '='
207
+ node.op = "="
200
208
 
201
209
  query_traversal(query.where, remove_not_used_conditions)
202
210
 
203
211
  query_traversal(query, check_fields)
204
212
  query.where = query_context_controller.remove_lasts(query.where)
205
213
 
206
- query.from_table = Identifier('df_table')
214
+ query.from_table = Identifier("df_table")
207
215
  res = query_df(df, query, session=self.session)
208
216
 
209
217
  return ResultSet.from_df_cols(df=res, columns_dict=col_names, strict=False)
@@ -8,10 +8,7 @@ import numpy as np
8
8
  from mindsdb_sql_parser import parse_sql
9
9
  from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
10
10
  from mindsdb.integrations.utilities.query_traversal import query_traversal
11
- from mindsdb_sql_parser.ast import (
12
- ASTNode, Select, Identifier,
13
- Function, Constant
14
- )
11
+ from mindsdb_sql_parser.ast import ASTNode, Select, Identifier, Function, Constant
15
12
  from mindsdb.utilities.functions import resolve_table_identifier, resolve_model_identifier
16
13
 
17
14
  from mindsdb.utilities import log
@@ -53,37 +50,38 @@ def get_query_models(query: ASTNode, default_database: str = None) -> List[tuple
53
50
 
54
51
 
55
52
  def query_df_with_type_infer_fallback(query_str: str, dataframes: dict, user_functions=None):
56
- ''' Duckdb need to infer column types if column.dtype == object. By default it take 1000 rows,
57
- but that may be not sufficient for some cases. This func try to run query multiple times
58
- increasing butch size for type infer
53
+ """Duckdb need to infer column types if column.dtype == object. By default it take 1000 rows,
54
+ but that may be not sufficient for some cases. This func try to run query multiple times
55
+ increasing butch size for type infer
59
56
 
60
- Args:
61
- query_str (str): query to execute
62
- dataframes (dict): dataframes
63
- user_functions: functions controller which register new functions in connection
57
+ Args:
58
+ query_str (str): query to execute
59
+ dataframes (dict): dataframes
60
+ user_functions: functions controller which register new functions in connection
64
61
 
65
- Returns:
66
- pandas.DataFrame
67
- pandas.columns
68
- '''
62
+ Returns:
63
+ pandas.DataFrame
64
+ pandas.columns
65
+ """
69
66
 
70
67
  for name, value in dataframes.items():
71
68
  locals()[name] = value
72
69
 
73
- con = duckdb.connect(database=':memory:')
70
+ con = duckdb.connect(database=":memory:")
74
71
  if user_functions:
75
72
  user_functions.register(con)
76
73
 
74
+ exception = None
77
75
  for sample_size in [1000, 10000, 1000000]:
78
76
  try:
79
- con.execute(f'set global pandas_analyze_sample={sample_size};')
77
+ con.execute(f"set global pandas_analyze_sample={sample_size};")
80
78
  result_df = con.execute(query_str).fetchdf()
81
- except InvalidInputException:
82
- pass
79
+ except InvalidInputException as e:
80
+ exception = e
83
81
  else:
84
82
  break
85
83
  else:
86
- raise InvalidInputException
84
+ raise exception
87
85
  description = con.description
88
86
  con.close()
89
87
 
@@ -91,14 +89,14 @@ def query_df_with_type_infer_fallback(query_str: str, dataframes: dict, user_fun
91
89
 
92
90
 
93
91
  def query_df(df, query, session=None):
94
- """ Perform simple query ('select' from one table, without subqueries and joins) on DataFrame.
92
+ """Perform simple query ('select' from one table, without subqueries and joins) on DataFrame.
95
93
 
96
- Args:
97
- df (pandas.DataFrame): data
98
- query (mindsdb_sql_parser.ast.Select | str): select query
94
+ Args:
95
+ df (pandas.DataFrame): data
96
+ query (mindsdb_sql_parser.ast.Select | str): select query
99
97
 
100
- Returns:
101
- pandas.DataFrame
98
+ Returns:
99
+ pandas.DataFrame
102
100
  """
103
101
 
104
102
  if isinstance(query, str):
@@ -106,14 +104,11 @@ def query_df(df, query, session=None):
106
104
  else:
107
105
  query_ast = copy.deepcopy(query)
108
106
 
109
- if isinstance(query_ast, Select) is False \
110
- or isinstance(query_ast.from_table, Identifier) is False:
111
- raise Exception(
112
- "Only 'SELECT from TABLE' statements supported for internal query"
113
- )
107
+ if isinstance(query_ast, Select) is False or isinstance(query_ast.from_table, Identifier) is False:
108
+ raise Exception("Only 'SELECT from TABLE' statements supported for internal query")
114
109
 
115
110
  table_name = query_ast.from_table.parts[0]
116
- query_ast.from_table.parts = ['df']
111
+ query_ast.from_table.parts = ["df"]
117
112
 
118
113
  json_columns = set()
119
114
 
@@ -131,18 +126,18 @@ def query_df(df, query, session=None):
131
126
  return node
132
127
  if isinstance(node, Function):
133
128
  fnc_name = node.op.lower()
134
- if fnc_name == 'database' and len(node.args) == 0:
129
+ if fnc_name == "database" and len(node.args) == 0:
135
130
  if session is not None:
136
131
  cur_db = session.database
137
132
  else:
138
133
  cur_db = None
139
134
  return Constant(cur_db)
140
- elif fnc_name == 'truncate':
135
+ elif fnc_name == "truncate":
141
136
  # replace mysql 'truncate' function to duckdb 'round'
142
- node.op = 'round'
137
+ node.op = "round"
143
138
  if len(node.args) == 1:
144
139
  node.args.append(0)
145
- elif fnc_name == 'json_extract':
140
+ elif fnc_name == "json_extract":
146
141
  json_columns.add(node.args[0].parts[-1])
147
142
  else:
148
143
  if user_functions is not None:
@@ -160,28 +155,27 @@ def query_df(df, query, session=None):
160
155
  except Exception:
161
156
  pass
162
157
  return v
158
+
163
159
  for column in json_columns:
164
160
  df[column] = df[column].apply(_convert)
165
161
 
166
- render = SqlalchemyRender('postgres')
162
+ render = SqlalchemyRender("postgres")
167
163
  try:
168
164
  query_str = render.get_string(query_ast, with_failback=False)
169
165
  except Exception as e:
170
- logger.error(
171
- f"Exception during query casting to 'postgres' dialect. Query: {str(query)}. Error: {e}"
172
- )
166
+ logger.error(f"Exception during query casting to 'postgres' dialect. Query: {str(query)}. Error: {e}")
173
167
  query_str = render.get_string(query_ast, with_failback=True)
174
168
 
175
169
  # workaround to prevent duckdb.TypeMismatchException
176
170
  if len(df) > 0:
177
- if table_name.lower() in ('models', 'predictors'):
178
- if 'TRAINING_OPTIONS' in df.columns:
179
- df = df.astype({'TRAINING_OPTIONS': 'string'})
180
- if table_name.lower() == 'ml_engines':
181
- if 'CONNECTION_DATA' in df.columns:
182
- df = df.astype({'CONNECTION_DATA': 'string'})
183
-
184
- result_df, description = query_df_with_type_infer_fallback(query_str, {'df': df}, user_functions=user_functions)
171
+ if table_name.lower() in ("models", "predictors"):
172
+ if "TRAINING_OPTIONS" in df.columns:
173
+ df = df.astype({"TRAINING_OPTIONS": "string"})
174
+ if table_name.lower() == "ml_engines":
175
+ if "CONNECTION_DATA" in df.columns:
176
+ df = df.astype({"CONNECTION_DATA": "string"})
177
+
178
+ result_df, description = query_df_with_type_infer_fallback(query_str, {"df": df}, user_functions=user_functions)
185
179
  result_df.replace({np.nan: None}, inplace=True)
186
180
  result_df.columns = [x[0] for x in description]
187
181
  return result_df
@@ -28,7 +28,7 @@ class GetConfig(Resource):
28
28
  def get(self):
29
29
  config = Config()
30
30
  resp = {"auth": {"http_auth_enabled": config["auth"]["http_auth_enabled"]}}
31
- for key in ["default_llm", "default_embedding_model", "default_reranking_model"]:
31
+ for key in ["default_llm", "default_embedding_model", "default_reranking_model", "a2a"]:
32
32
  value = config.get(key)
33
33
  if value is not None:
34
34
  resp[key] = value
@@ -18,6 +18,7 @@ from mindsdb.utilities.context import context as ctx
18
18
  from mindsdb.utilities import log
19
19
  from mindsdb.utilities.security import is_private_url, clear_filename, validate_urls
20
20
  from mindsdb.utilities.fs import safe_extract
21
+ from mindsdb.integrations.utilities.files.file_reader import FileProcessingError
21
22
 
22
23
  logger = log.getLogger(__name__)
23
24
  MAX_FILE_SIZE = 1024 * 1024 * 100 # 100Mb
@@ -26,7 +27,7 @@ MAX_FILE_SIZE = 1024 * 1024 * 100 # 100Mb
26
27
  @ns_conf.route("/")
27
28
  class FilesList(Resource):
28
29
  @ns_conf.doc("get_files_list")
29
- @api_endpoint_metrics('GET', '/files')
30
+ @api_endpoint_metrics("GET", "/files")
30
31
  def get(self):
31
32
  """List all files"""
32
33
  return ca.file_controller.get_files()
@@ -36,7 +37,7 @@ class FilesList(Resource):
36
37
  @ns_conf.param("name", "MindsDB's name for file")
37
38
  class File(Resource):
38
39
  @ns_conf.doc("put_file")
39
- @api_endpoint_metrics('PUT', '/files/file')
40
+ @api_endpoint_metrics("PUT", "/files/file")
40
41
  def put(self, name: str):
41
42
  """add new file
42
43
  params in FormData:
@@ -105,15 +106,13 @@ class File(Resource):
105
106
  if data.get("source_type") == "url":
106
107
  url = data["source"]
107
108
  config = Config()
108
- allowed_urls = config.get('file_upload_domains', [])
109
+ allowed_urls = config.get("file_upload_domains", [])
109
110
  if allowed_urls and not validate_urls(url, allowed_urls):
110
111
  return http_error(400, "Invalid File URL source.", f"Allowed hosts are: {', '.join(allowed_urls)}.")
111
112
  data["file"] = clear_filename(data["name"])
112
113
  is_cloud = config.get("cloud", False)
113
114
  if is_cloud and is_private_url(url):
114
- return http_error(
115
- 400, f'URL is private: {url}'
116
- )
115
+ return http_error(400, f"URL is private: {url}")
117
116
 
118
117
  if is_cloud is True and ctx.user_class != 1:
119
118
  info = requests.head(url)
@@ -130,14 +129,10 @@ class File(Resource):
130
129
  "Сan't determine remote file size",
131
130
  )
132
131
  if file_size > MAX_FILE_SIZE:
133
- return http_error(
134
- 400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB"
135
- )
132
+ return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
136
133
  with requests.get(url, stream=True) as r:
137
134
  if r.status_code != 200:
138
- return http_error(
139
- 400, "Error getting file", f"Got status code: {r.status_code}"
140
- )
135
+ return http_error(400, "Error getting file", f"Got status code: {r.status_code}")
141
136
  file_path = os.path.join(temp_dir_path, data["file"])
142
137
  with open(file_path, "wb") as f:
143
138
  for chunk in r.iter_content(chunk_size=8192):
@@ -158,30 +153,26 @@ class File(Resource):
158
153
  files = os.listdir(temp_dir_path)
159
154
  if len(files) != 1:
160
155
  os.rmdir(temp_dir_path)
161
- return http_error(
162
- 400, "Wrong content.", "Archive must contain only one data file."
163
- )
156
+ return http_error(400, "Wrong content.", "Archive must contain only one data file.")
164
157
  file_path = os.path.join(temp_dir_path, files[0])
165
158
  mindsdb_file_name = files[0]
166
159
  if not os.path.isfile(file_path):
167
160
  os.rmdir(temp_dir_path)
168
- return http_error(
169
- 400, "Wrong content.", "Archive must contain data file in root."
170
- )
161
+ return http_error(400, "Wrong content.", "Archive must contain data file in root.")
171
162
 
172
163
  try:
173
- ca.file_controller.save_file(
174
- mindsdb_file_name, file_path, file_name=original_file_name
175
- )
164
+ ca.file_controller.save_file(mindsdb_file_name, file_path, file_name=original_file_name)
165
+ except FileProcessingError as e:
166
+ return http_error(400, "Error", str(e))
176
167
  except Exception as e:
177
- return http_error(500, 'Error', str(e))
168
+ return http_error(500, "Error", str(e))
178
169
  finally:
179
170
  shutil.rmtree(temp_dir_path, ignore_errors=True)
180
171
 
181
172
  return "", 200
182
173
 
183
174
  @ns_conf.doc("delete_file")
184
- @api_endpoint_metrics('DELETE', '/files/file')
175
+ @api_endpoint_metrics("DELETE", "/files/file")
185
176
  def delete(self, name: str):
186
177
  """delete file"""
187
178
 
@@ -8,6 +8,7 @@
8
8
  * permission of MindsDB Inc
9
9
  *******************************************************
10
10
  """
11
+
11
12
  import struct
12
13
 
13
14
  from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import (
@@ -25,16 +26,16 @@ NULL_VALUE_INT = ord(NULL_VALUE)
25
26
 
26
27
 
27
28
  class Datum:
28
- __slots__ = ['value', 'var_type', 'var_len']
29
+ __slots__ = ["value", "var_type", "var_len"]
29
30
 
30
31
  def __init__(self, var_type, value=None, var_len=None):
31
32
  # TODO other types: float, timestamp
32
33
  self.value = b""
33
34
 
34
35
  if var_len is None:
35
- idx = var_type.find('<')
36
- var_len = var_type[idx + 1: -1]
37
- var_type = var_type[: idx]
36
+ idx = var_type.find("<")
37
+ var_len = var_type[idx + 1 : -1]
38
+ var_type = var_type[:idx]
38
39
  self.var_type = var_type
39
40
  self.var_len = var_len
40
41
 
@@ -128,7 +129,7 @@ class Datum:
128
129
  return self.get_serializer()(self.value)
129
130
 
130
131
  def get_serializer(self):
131
- if self.var_type == "string":
132
+ if self.var_type in ("string", "byte"):
132
133
  if self.var_len == "lenenc":
133
134
  if isinstance(self.value, bytes):
134
135
  return self.serialize_bytes
@@ -140,15 +141,13 @@ class Datum:
140
141
  if self.var_len == "packet":
141
142
  return lambda v: v.get_packet_string()
142
143
  else:
143
- return lambda v: struct.pack(self.var_len + "s", bytes(v, "utf-8"))[
144
- :int(self.var_len)
145
- ]
144
+ return lambda v: struct.pack(self.var_len + "s", bytes(v, "utf-8"))[: int(self.var_len)]
146
145
 
147
146
  if self.var_type == "int":
148
147
  if self.var_len == "lenenc":
149
148
  return self.serialize_int
150
149
  else:
151
- return lambda v: struct.pack("Q", v)[:int(self.var_len)]
150
+ return lambda v: struct.pack("Q", v)[: int(self.var_len)]
152
151
 
153
152
  @classmethod
154
153
  def serialize_str_eof(cls, value):
@@ -157,9 +156,7 @@ class Datum:
157
156
  if length == 0:
158
157
  return b""
159
158
  else:
160
- return struct.pack(
161
- "{len}s".format(len=var_len), bytes(value, "utf-8")
162
- )[:length]
159
+ return struct.pack("{len}s".format(len=var_len), bytes(value, "utf-8"))[:length]
163
160
 
164
161
  # def serialize_obj(self, value):
165
162
  # return self.serialize_str(str(value))
@@ -170,7 +167,6 @@ class Datum:
170
167
 
171
168
  @classmethod
172
169
  def serialize_bytes(cls, value):
173
-
174
170
  val_len = len(value)
175
171
 
176
172
  if val_len == 0:
@@ -181,23 +177,11 @@ class Datum:
181
177
 
182
178
  byte_count = -(val_len.bit_length() // (-8))
183
179
  if byte_count <= 2:
184
- return (
185
- TWO_BYTE_ENC
186
- + struct.pack("H", val_len)
187
- + value
188
- )
180
+ return TWO_BYTE_ENC + struct.pack("H", val_len) + value
189
181
  if byte_count <= 3:
190
- return (
191
- THREE_BYTE_ENC
192
- + struct.pack("i", val_len)[:3]
193
- + value
194
- )
182
+ return THREE_BYTE_ENC + struct.pack("i", val_len)[:3] + value
195
183
  if byte_count <= 8:
196
- return (
197
- THREE_BYTE_ENC
198
- + struct.pack("Q", val_len)
199
- + value
200
- )
184
+ return THREE_BYTE_ENC + struct.pack("Q", val_len) + value
201
185
 
202
186
 
203
187
  def test():