MindsDB 25.4.3.2__py3-none-any.whl → 25.4.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (68) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +18 -4
  3. mindsdb/api/executor/command_executor.py +12 -2
  4. mindsdb/api/executor/data_types/response_type.py +1 -0
  5. mindsdb/api/executor/datahub/classes/tables_row.py +3 -10
  6. mindsdb/api/executor/datahub/datanodes/datanode.py +7 -2
  7. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +44 -10
  8. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +57 -38
  9. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +2 -1
  10. mindsdb/api/executor/datahub/datanodes/project_datanode.py +39 -7
  11. mindsdb/api/executor/datahub/datanodes/system_tables.py +116 -109
  12. mindsdb/api/executor/planner/query_plan.py +1 -0
  13. mindsdb/api/executor/planner/query_planner.py +15 -1
  14. mindsdb/api/executor/planner/steps.py +8 -2
  15. mindsdb/api/executor/sql_query/sql_query.py +24 -8
  16. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +25 -8
  17. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +4 -2
  18. mindsdb/api/executor/sql_query/steps/insert_step.py +2 -1
  19. mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -3
  20. mindsdb/api/http/namespaces/config.py +19 -11
  21. mindsdb/api/litellm/start.py +82 -0
  22. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +133 -0
  23. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +7 -2
  24. mindsdb/integrations/handlers/chromadb_handler/settings.py +1 -0
  25. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +13 -4
  26. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +14 -5
  27. mindsdb/integrations/handlers/openai_handler/helpers.py +3 -5
  28. mindsdb/integrations/handlers/openai_handler/openai_handler.py +20 -8
  29. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +14 -4
  30. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +34 -19
  31. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +21 -18
  32. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +14 -4
  33. mindsdb/integrations/handlers/togetherai_handler/__about__.py +9 -0
  34. mindsdb/integrations/handlers/togetherai_handler/__init__.py +20 -0
  35. mindsdb/integrations/handlers/togetherai_handler/creation_args.py +14 -0
  36. mindsdb/integrations/handlers/togetherai_handler/icon.svg +15 -0
  37. mindsdb/integrations/handlers/togetherai_handler/model_using_args.py +5 -0
  38. mindsdb/integrations/handlers/togetherai_handler/requirements.txt +2 -0
  39. mindsdb/integrations/handlers/togetherai_handler/settings.py +33 -0
  40. mindsdb/integrations/handlers/togetherai_handler/togetherai_handler.py +234 -0
  41. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +1 -1
  42. mindsdb/integrations/libs/response.py +80 -32
  43. mindsdb/integrations/utilities/handler_utils.py +4 -0
  44. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +360 -0
  45. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +8 -153
  46. mindsdb/interfaces/agents/litellm_server.py +345 -0
  47. mindsdb/interfaces/agents/mcp_client_agent.py +252 -0
  48. mindsdb/interfaces/agents/run_mcp_agent.py +205 -0
  49. mindsdb/interfaces/functions/controller.py +3 -2
  50. mindsdb/interfaces/knowledge_base/controller.py +106 -82
  51. mindsdb/interfaces/query_context/context_controller.py +55 -15
  52. mindsdb/interfaces/query_context/query_task.py +19 -0
  53. mindsdb/interfaces/skills/skill_tool.py +7 -1
  54. mindsdb/interfaces/skills/sql_agent.py +8 -3
  55. mindsdb/interfaces/storage/db.py +2 -2
  56. mindsdb/interfaces/tasks/task_monitor.py +5 -1
  57. mindsdb/interfaces/tasks/task_thread.py +6 -0
  58. mindsdb/migrations/versions/2025-04-22_53502b6d63bf_query_database.py +27 -0
  59. mindsdb/utilities/config.py +20 -2
  60. mindsdb/utilities/context.py +1 -0
  61. mindsdb/utilities/starters.py +7 -0
  62. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.5.0.dist-info}/METADATA +226 -221
  63. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.5.0.dist-info}/RECORD +67 -53
  64. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.5.0.dist-info}/WHEEL +1 -1
  65. mindsdb/integrations/handlers/snowflake_handler/tests/test_snowflake_handler.py +0 -230
  66. /mindsdb/{integrations/handlers/snowflake_handler/tests → api/litellm}/__init__.py +0 -0
  67. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.5.0.dist-info}/licenses/LICENSE +0 -0
  68. {mindsdb-25.4.3.2.dist-info → mindsdb-25.4.5.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from typing import Optional, Literal
2
- from dataclasses import dataclass, astuple, fields
2
+ from dataclasses import dataclass, fields
3
3
 
4
4
  import pandas as pd
5
5
  from mindsdb_sql_parser.ast.base import ASTNode
@@ -7,11 +7,10 @@ from mindsdb_sql_parser.ast.base import ASTNode
7
7
  from mindsdb.utilities import log
8
8
  from mindsdb.utilities.config import config
9
9
  from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
10
- from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
11
- from mindsdb.api.executor.datahub.classes.tables_row import (
12
- TABLES_ROW_TYPE,
13
- TablesRow,
14
- )
10
+ from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
11
+ from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE, MYSQL_DATA_TYPE_COLUMNS_DEFAULT
12
+ from mindsdb.api.executor.datahub.classes.tables_row import TABLES_ROW_TYPE, TablesRow
13
+
15
14
 
16
15
  logger = log.getLogger(__name__)
17
16
 
@@ -165,11 +164,36 @@ class TablesTable(Table):
165
164
  return df
166
165
 
167
166
 
168
- @dataclass
167
+ def infer_mysql_type(original_type: str) -> MYSQL_DATA_TYPE:
168
+ """Infer MySQL data type from original type string from a database.
169
+
170
+ Args:
171
+ original_type (str): The original type string from a database.
172
+
173
+ Returns:
174
+ MYSQL_DATA_TYPE: The inferred MySQL data type.
175
+ """
176
+ match original_type.lower():
177
+ case 'double precision' | 'real' | 'numeric' | 'float':
178
+ data_type = MYSQL_DATA_TYPE.FLOAT
179
+ case 'integer' | 'smallint' | 'int' | 'bigint':
180
+ data_type = MYSQL_DATA_TYPE.BIGINT
181
+ case 'timestamp without time zone' | 'timestamp with time zone' | 'date' | 'timestamp':
182
+ data_type = MYSQL_DATA_TYPE.DATETIME
183
+ case _:
184
+ data_type = MYSQL_DATA_TYPE.VARCHAR
185
+ return data_type
186
+
187
+
188
+ @dataclass(slots=True, kw_only=True)
169
189
  class ColumnsTableRow:
170
- """Represents a row in the COLUMNS table.
171
- Fields description: https://dev.mysql.com/doc/refman/8.4/en/information-schema-columns-table.html
172
- NOTE: attrs order matter, don't change it.
190
+ """Represents a row in the MindsDB's internal INFORMATION_SCHEMA.COLUMNS table.
191
+ This class follows the MySQL-compatible COLUMNS table structure.
192
+
193
+ Detailed field descriptions can be found in MySQL documentation:
194
+ https://dev.mysql.com/doc/refman/8.4/en/information-schema-columns-table.html
195
+
196
+ NOTE: The order of attributes is significant and matches the MySQL column order.
173
197
  """
174
198
  TABLE_CATALOG: Literal['def'] = 'def'
175
199
  TABLE_SCHEMA: Optional[str] = None
@@ -192,73 +216,81 @@ class ColumnsTableRow:
192
216
  PRIVILEGES: str = 'select'
193
217
  COLUMN_COMMENT: Optional[str] = None
194
218
  GENERATION_EXPRESSION: Optional[str] = None
219
+ SRS_ID: Optional[str] = None
220
+ # MindsDB's specific columns:
221
+ ORIGINAL_TYPE: Optional[str] = None
222
+
223
+ @classmethod
224
+ def from_is_columns_row(cls, table_schema: str, table_name: str, row: pd.Series) -> 'ColumnsTableRow':
225
+ """Transform row from response of `handler.get_columns(...)` to internal information_schema.columns row.
226
+
227
+ Args:
228
+ table_schema (str): The name of the schema of the table which columns are described.
229
+ table_name (str): The name of the table which columns are described.
230
+ row (pd.Series): A row from the response of `handler.get_columns(...)`.
231
+
232
+ Returns:
233
+ ColumnsTableRow: A row in the MindsDB's internal INFORMATION_SCHEMA.COLUMNS table.
234
+ """
235
+ original_type: str = row[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE] or ''
236
+ data_type: MYSQL_DATA_TYPE | None = row[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE]
237
+ if isinstance(data_type, MYSQL_DATA_TYPE) is False:
238
+ data_type = infer_mysql_type(original_type)
239
+
240
+ # region set default values depend on type
241
+ defaults = MYSQL_DATA_TYPE_COLUMNS_DEFAULT.get(data_type)
242
+ if defaults is not None:
243
+ for key, value in defaults.items():
244
+ if key in row and row[key] is None:
245
+ row[key] = value
246
+
247
+ # region determine COLUMN_TYPE - it is text representation of DATA_TYPE with additioan attributes
248
+ match data_type:
249
+ case MYSQL_DATA_TYPE.DECIMAL:
250
+ column_type = f'decimal({row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION]},{INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE})'
251
+ case MYSQL_DATA_TYPE.VARCHAR:
252
+ column_type = f'varchar({row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH]})'
253
+ case MYSQL_DATA_TYPE.VARBINARY:
254
+ column_type = f'varbinary({row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH]})'
255
+ case MYSQL_DATA_TYPE.BIT | MYSQL_DATA_TYPE.BINARY | MYSQL_DATA_TYPE.CHAR:
256
+ column_type = f'{data_type.value.lower()}(1)'
257
+ case MYSQL_DATA_TYPE.BOOL | MYSQL_DATA_TYPE.BOOLEAN:
258
+ column_type = 'tinyint(1)'
259
+ case _:
260
+ column_type = data_type.value.lower()
261
+ # endregion
262
+
263
+ # BOOLean types had 'tinyint' DATA_TYPE in MySQL
264
+ if data_type in (MYSQL_DATA_TYPE.BOOL, MYSQL_DATA_TYPE.BOOLEAN):
265
+ data_type = 'tinyint'
266
+ else:
267
+ data_type = data_type.value.lower()
268
+
269
+ return cls(
270
+ TABLE_SCHEMA=table_schema,
271
+ TABLE_NAME=table_name,
272
+ COLUMN_NAME=row[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME],
273
+ ORDINAL_POSITION=row[INF_SCHEMA_COLUMNS_NAMES.ORDINAL_POSITION],
274
+ COLUMN_DEFAULT=row[INF_SCHEMA_COLUMNS_NAMES.COLUMN_DEFAULT],
275
+ IS_NULLABLE=row[INF_SCHEMA_COLUMNS_NAMES.IS_NULLABLE],
276
+ DATA_TYPE=data_type,
277
+ CHARACTER_MAXIMUM_LENGTH=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_MAXIMUM_LENGTH],
278
+ CHARACTER_OCTET_LENGTH=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_OCTET_LENGTH],
279
+ NUMERIC_PRECISION=row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_PRECISION],
280
+ NUMERIC_SCALE=row[INF_SCHEMA_COLUMNS_NAMES.NUMERIC_SCALE],
281
+ DATETIME_PRECISION=row[INF_SCHEMA_COLUMNS_NAMES.DATETIME_PRECISION],
282
+ CHARACTER_SET_NAME=row[INF_SCHEMA_COLUMNS_NAMES.CHARACTER_SET_NAME],
283
+ COLLATION_NAME=row[INF_SCHEMA_COLUMNS_NAMES.COLLATION_NAME],
284
+ COLUMN_TYPE=column_type,
285
+ ORIGINAL_TYPE=original_type
286
+ )
195
287
 
196
288
  def __post_init__(self):
197
- # region check mandatory fields
289
+ """Check if all mandatory fields are filled.
290
+ """
198
291
  mandatory_fields = ['TABLE_SCHEMA', 'TABLE_NAME', 'COLUMN_NAME']
199
292
  if any(getattr(self, field_name) is None for field_name in mandatory_fields):
200
293
  raise ValueError('One of mandatory fields is missed when creating ColumnsTableRow')
201
- # endregion
202
-
203
- # region set default values depend on type
204
- defaults = {
205
- 'COLUMN_TYPE': self.DATA_TYPE
206
- }
207
- if MYSQL_DATA_TYPE(self.DATA_TYPE) in (
208
- MYSQL_DATA_TYPE.TIMESTAMP,
209
- MYSQL_DATA_TYPE.DATETIME,
210
- MYSQL_DATA_TYPE.DATE
211
- ):
212
- defaults = {
213
- 'DATETIME_PRECISION': 0,
214
- 'COLUMN_TYPE': self.DATA_TYPE
215
- }
216
- elif MYSQL_DATA_TYPE(self.DATA_TYPE) in (
217
- MYSQL_DATA_TYPE.FLOAT,
218
- MYSQL_DATA_TYPE.DOUBLE,
219
- MYSQL_DATA_TYPE.DECIMAL
220
- ):
221
- defaults = {
222
- 'NUMERIC_PRECISION': 12,
223
- 'NUMERIC_SCALE': 0,
224
- 'COLUMN_TYPE': self.DATA_TYPE
225
- }
226
- elif MYSQL_DATA_TYPE(self.DATA_TYPE) in (
227
- MYSQL_DATA_TYPE.TINYINT,
228
- MYSQL_DATA_TYPE.SMALLINT,
229
- MYSQL_DATA_TYPE.MEDIUMINT,
230
- MYSQL_DATA_TYPE.INT,
231
- MYSQL_DATA_TYPE.BIGINT
232
- ):
233
- defaults = {
234
- 'NUMERIC_PRECISION': 20,
235
- 'NUMERIC_SCALE': 0,
236
- 'COLUMN_TYPE': self.DATA_TYPE
237
- }
238
- elif MYSQL_DATA_TYPE(self.DATA_TYPE) is MYSQL_DATA_TYPE.VARCHAR:
239
- defaults = {
240
- 'CHARACTER_MAXIMUM_LENGTH': 1024,
241
- 'CHARACTER_OCTET_LENGTH': 3072,
242
- 'CHARACTER_SET_NAME': 'utf8',
243
- 'COLLATION_NAME': 'utf8_bin',
244
- 'COLUMN_TYPE': 'varchar(1024)'
245
- }
246
- else:
247
- # show as MYSQL_DATA_TYPE.TEXT:
248
- defaults = {
249
- 'CHARACTER_MAXIMUM_LENGTH': 65535, # from https://bugs.mysql.com/bug.php?id=90685
250
- 'CHARACTER_OCTET_LENGTH': 65535, #
251
- 'CHARACTER_SET_NAME': 'utf8',
252
- 'COLLATION_NAME': 'utf8_bin',
253
- 'COLUMN_TYPE': 'text'
254
- }
255
-
256
- for key, value in defaults.items():
257
- setattr(self, key, value)
258
-
259
- self.DATA_TYPE = self.DATA_TYPE.lower()
260
- self.COLUMN_TYPE = self.COLUMN_TYPE.lower()
261
- # endregion
262
294
 
263
295
 
264
296
  class ColumnsTable(Table):
@@ -266,9 +298,7 @@ class ColumnsTable(Table):
266
298
  columns = [field.name for field in fields(ColumnsTableRow)]
267
299
 
268
300
  @classmethod
269
- def get_data(cls, inf_schema=None, query: ASTNode = None, **kwargs):
270
- result = []
271
-
301
+ def get_data(cls, inf_schema=None, query: ASTNode = None, **kwargs) -> pd.DataFrame:
272
302
  databases, tables_names = _get_scope(query)
273
303
 
274
304
  if databases is None:
@@ -278,6 +308,7 @@ class ColumnsTable(Table):
278
308
  'files'
279
309
  ]
280
310
 
311
+ result = []
281
312
  for db_name in databases:
282
313
  tables = {}
283
314
  if db_name == 'information_schema':
@@ -293,43 +324,19 @@ class ColumnsTable(Table):
293
324
  if tables_names is None:
294
325
  tables_names = [t.TABLE_NAME for t in dn.get_tables()]
295
326
  for table_name in tables_names:
296
- tables[table_name] = dn.get_table_columns(table_name)
297
-
298
- for table_name, table_columns in tables.items():
299
- for i, column in enumerate(table_columns):
300
- column_name = column['name']
301
- column_type = column.get('type', 'text')
302
-
303
- # region infer type
304
- if isinstance(column_type, MYSQL_DATA_TYPE) is False:
305
- if column_type in ('double precision', 'real', 'numeric', 'float'):
306
- column_type = MYSQL_DATA_TYPE.FLOAT
307
- elif column_type in ('integer', 'smallint', 'int', 'bigint'):
308
- column_type = MYSQL_DATA_TYPE.BIGINT
309
- elif column_type in (
310
- 'timestamp without time zone',
311
- 'timestamp with time zone',
312
- 'date', 'timestamp'
313
- ):
314
- column_type = MYSQL_DATA_TYPE.DATETIME
315
- else:
316
- column_type = MYSQL_DATA_TYPE.VARCHAR
317
- # endregion
318
-
319
- column_row = astuple(
320
- ColumnsTableRow(
321
- TABLE_SCHEMA=db_name,
322
- TABLE_NAME=table_name,
323
- COLUMN_NAME=column_name,
324
- DATA_TYPE=column_type.value,
325
- ORDINAL_POSITION=i
327
+ tables[table_name] = dn.get_table_columns_df(table_name)
328
+
329
+ for table_name, table_columns_df in tables.items():
330
+ for _, row in table_columns_df.iterrows():
331
+ result.append(
332
+ ColumnsTableRow.from_is_columns_row(
333
+ table_schema=db_name,
334
+ table_name=table_name,
335
+ row=row
326
336
  )
327
337
  )
328
338
 
329
- result.append(column_row)
330
-
331
- df = pd.DataFrame(result, columns=cls.columns)
332
- return df
339
+ return pd.DataFrame(result, columns=cls.columns)
333
340
 
334
341
 
335
342
  class EventsTable(Table):
@@ -3,6 +3,7 @@ class QueryPlan:
3
3
  def __init__(self, steps=None, **kwargs):
4
4
  self.steps = []
5
5
  self.is_resumable = False
6
+ self.is_async = False
6
7
 
7
8
  if steps:
8
9
  for step in steps:
@@ -656,9 +656,18 @@ class QueryPlanner:
656
656
  # plan sub-select first
657
657
  last_step = self.plan_select(query.from_select, integration=integration_name)
658
658
 
659
+ # possible knowledge base parameters
660
+ select = query.from_select
661
+ params = {}
662
+ if isinstance(select, Select) and select.using is not None:
663
+ for k, v in select.using.items():
664
+ if k.startswith('kb_'):
665
+ params[k] = v
666
+
659
667
  self.plan.add_step(InsertToTable(
660
668
  table=table,
661
669
  dataframe=last_step,
670
+ params=params,
662
671
  ))
663
672
  else:
664
673
  self.plan.add_step(InsertToTable(
@@ -762,7 +771,7 @@ class QueryPlanner:
762
771
  elif from_table is None:
763
772
  # one line select
764
773
  step = QueryStep(query, from_table=pd.DataFrame([None]))
765
- self.plan.add_step(step)
774
+ return self.plan.add_step(step)
766
775
  else:
767
776
  raise PlanningException(f'Unsupported from_table {type(from_table)}')
768
777
 
@@ -838,6 +847,7 @@ class QueryPlanner:
838
847
  # handle fetchdataframe partitioning
839
848
  steps_out = []
840
849
 
850
+ step = None
841
851
  partition_step = None
842
852
  for step in plan.steps:
843
853
  if isinstance(step, FetchDataframeStep) and step.params is not None:
@@ -889,6 +899,10 @@ class QueryPlanner:
889
899
  continue
890
900
 
891
901
  steps_out.append(step)
902
+
903
+ if plan.is_resumable and isinstance(step, InsertToTable):
904
+ plan.is_async = True
905
+
892
906
  plan.steps = steps_out
893
907
  return plan
894
908
 
@@ -200,7 +200,7 @@ class MultipleSteps(PlanStep):
200
200
 
201
201
 
202
202
  class SaveToTable(PlanStep):
203
- def __init__(self, table, dataframe, is_replace=False, *args, **kwargs):
203
+ def __init__(self, table, dataframe, is_replace=False, params=None, *args, **kwargs):
204
204
  """
205
205
  Creates table if not exists and fills it with content of dataframe
206
206
  is_replace - to drop table beforehand
@@ -209,15 +209,21 @@ class SaveToTable(PlanStep):
209
209
  self.table = table
210
210
  self.dataframe = dataframe
211
211
  self.is_replace = is_replace
212
+ if params is None:
213
+ params = {}
214
+ self.params = params
212
215
 
213
216
 
214
217
  class InsertToTable(PlanStep):
215
- def __init__(self, table, dataframe=None, query=None, *args, **kwargs):
218
+ def __init__(self, table, dataframe=None, query=None, params=None, *args, **kwargs):
216
219
  """Fills table with content of dataframe"""
217
220
  super().__init__(*args, **kwargs)
218
221
  self.table = table
219
222
  self.dataframe = dataframe
220
223
  self.query = query
224
+ if params is None:
225
+ params = {}
226
+ self.params = params
221
227
 
222
228
 
223
229
  class CreateTableStep(PlanStep):
@@ -12,7 +12,9 @@ import inspect
12
12
  from textwrap import dedent
13
13
  from typing import Union, Dict
14
14
 
15
+ import pandas as pd
15
16
  from mindsdb_sql_parser import parse_sql, ASTNode
17
+
16
18
  from mindsdb.api.executor.planner.steps import (
17
19
  ApplyTimeseriesPredictorStep,
18
20
  ApplyPredictorRowStep,
@@ -47,9 +49,16 @@ class SQLQuery:
47
49
  step_handlers = {}
48
50
 
49
51
  def __init__(self, sql: Union[ASTNode, str], session, execute: bool = True,
50
- database: str = None, query_id: int = None):
52
+ database: str = None, query_id: int = None, stop_event=None):
51
53
  self.session = session
52
54
 
55
+ self.query_id = query_id
56
+ if self.query_id is not None:
57
+ # get sql and database from resumed query
58
+ run_query = query_context_controller.get_query(self.query_id)
59
+ sql = run_query.sql
60
+ database = run_query.database
61
+
53
62
  if database is not None:
54
63
  self.database = database
55
64
  else:
@@ -69,12 +78,7 @@ class SQLQuery:
69
78
 
70
79
  self.outer_query = None
71
80
  self.run_query = None
72
- self.query_id = query_id
73
- if query_id is not None:
74
- # resume query
75
- run_query = query_context_controller.get_query(self.query_id)
76
- run_query.clear_error()
77
- sql = run_query.sql
81
+ self.stop_event = stop_event
78
82
 
79
83
  if isinstance(sql, str):
80
84
  self.query = parse_sql(sql)
@@ -240,7 +244,19 @@ class SQLQuery:
240
244
  if self.query_id is not None:
241
245
  self.run_query = query_context_controller.get_query(self.query_id)
242
246
  else:
243
- self.run_query = query_context_controller.create_query(self.context['query_str'])
247
+ self.run_query = query_context_controller.create_query(self.context['query_str'], database=self.database)
248
+
249
+ if self.planner.plan.is_async and ctx.task_id is None:
250
+ # add to task
251
+ self.run_query.add_to_task()
252
+ # return query info
253
+ # columns in upper case
254
+ rec = {k.upper(): v for k, v in self.run_query.get_info().items()}
255
+ self.fetched_data = ResultSet().from_df(pd.DataFrame([rec]))
256
+ self.columns_list = self.fetched_data.columns
257
+ return
258
+ self.run_query.mark_as_run()
259
+
244
260
  ctx.run_query_id = self.run_query.record.id
245
261
 
246
262
  step_result = None
@@ -1,8 +1,8 @@
1
1
  import datetime as dt
2
2
  import re
3
3
 
4
- import dateinfer
5
4
  import pandas as pd
5
+ import dateparser
6
6
 
7
7
  from mindsdb_sql_parser.ast import (
8
8
  BinaryOperation,
@@ -95,8 +95,8 @@ class ApplyPredictorRowStepCall(ApplyPredictorBaseCall):
95
95
  result = ResultSet()
96
96
  result.is_prediction = True
97
97
  if len(predictions) == 0:
98
- columns = [col['name'] for col in project_datanode.get_table_columns(predictor_name)]
99
- predictions = pd.DataFrame([], columns=columns)
98
+ columns_names = project_datanode.get_table_columns_names(predictor_name)
99
+ predictions = pd.DataFrame([], columns=columns_names)
100
100
 
101
101
  result.from_df(
102
102
  predictions,
@@ -173,10 +173,10 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
173
173
 
174
174
  project_datanode = self.session.datahub.get(project_name)
175
175
  if len(data) == 0:
176
- cols = [col['name'] for col in project_datanode.get_table_columns(predictor_name)] + ['__mindsdb_row_id']
177
- for col in cols:
176
+ columns_names = project_datanode.get_table_columns_names(predictor_name) + ['__mindsdb_row_id']
177
+ for column_name in columns_names:
178
178
  result.add_column(Column(
179
- name=col,
179
+ name=column_name,
180
180
  database=table_name[0],
181
181
  table_name=table_name[1],
182
182
  table_alias=table_name[2]
@@ -262,7 +262,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
262
262
  return predictor_data
263
263
 
264
264
  def get_date_format(samples):
265
- # dateinfer reads sql date 2020-04-01 as yyyy-dd-mm. workaround for in
265
+ # Try common formats first with explicit patterns
266
266
  for date_format, pattern in (
267
267
  ('%Y-%m-%d', r'[\d]{4}-[\d]{2}-[\d]{2}'),
268
268
  ('%Y-%m-%d %H:%M:%S', r'[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}'),
@@ -280,7 +280,24 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
280
280
  if date_format is not None:
281
281
  return date_format
282
282
 
283
- return dateinfer.infer(samples)
283
+ # Use dateparser as fallback and infer format
284
+ try:
285
+ # Parse the first sample to get its format
286
+ parsed_date = dateparser.parse(samples[0])
287
+ if parsed_date is None:
288
+ raise ValueError("Could not parse date")
289
+
290
+ # Verify the format works for all samples
291
+ for sample in samples[1:]:
292
+ if dateparser.parse(sample) is None:
293
+ raise ValueError("Inconsistent date formats in samples")
294
+ # Convert to strftime format based on the input
295
+ if re.search(r'\d{2}:\d{2}:\d{2}', samples[0]):
296
+ return '%Y-%m-%d %H:%M:%S'
297
+ return '%Y-%m-%d'
298
+ except (ValueError, AttributeError):
299
+ # If dateparser fails, return a basic format as last resort
300
+ return '%Y-%m-%d'
284
301
 
285
302
  model_types = predictor_metadata['model_types']
286
303
  if model_types.get(order_col) in ('float', 'integer'):
@@ -182,7 +182,7 @@ class FetchDataframePartitionCall(BaseStepCall):
182
182
  thread_count = get_max_thread_count()
183
183
 
184
184
  # 3 tasks per worker during 1 batch
185
- partition_size = int(run_query.batch_size / thread_count / 3)
185
+ partition_size = int(run_query.batch_size / thread_count)
186
186
  # min partition size
187
187
  if partition_size < 10:
188
188
  partition_size = 10
@@ -222,7 +222,9 @@ class FetchDataframePartitionCall(BaseStepCall):
222
222
  else:
223
223
  executor.shutdown()
224
224
  raise e
225
-
225
+ if self.sql_query.stop_event is not None and self.sql_query.stop_event.is_set():
226
+ executor.shutdown()
227
+ raise RuntimeError('Query is interrupted')
226
228
  # TODO
227
229
  # 1. get next batch without updating track_value:
228
230
  # it allows to keep queue_in filled with data between fetching batches
@@ -95,7 +95,8 @@ class InsertToTableCall(BaseStepCall):
95
95
  table_name=table_name,
96
96
  result_set=data,
97
97
  is_replace=is_replace,
98
- is_create=is_create
98
+ is_create=is_create,
99
+ params=step.params
99
100
  )
100
101
  return ResultSet(affected_rows=response.affected_rows)
101
102
 
@@ -20,15 +20,14 @@ class GetPredictorColumnsCall(BaseStepCall):
20
20
  bind = GetPredictorColumns
21
21
 
22
22
  def call(self, step):
23
-
24
23
  mindsdb_database_name = config.get('default_project')
25
24
 
26
25
  predictor_name = step.predictor.parts[-1]
27
26
  dn = self.session.datahub.get(mindsdb_database_name)
28
- columns = [col['name'] for col in dn.get_table_columns(predictor_name)]
27
+ columns_names = dn.get_table_columns_names(predictor_name)
29
28
 
30
29
  data = ResultSet()
31
- for column_name in columns:
30
+ for column_name in columns_names:
32
31
  data.add_column(Column(
33
32
  name=column_name,
34
33
  table_name=predictor_name,
@@ -27,33 +27,41 @@ class GetConfig(Resource):
27
27
  @api_endpoint_metrics('GET', '/config')
28
28
  def get(self):
29
29
  config = Config()
30
- return {
30
+ resp = {
31
31
  'auth': {
32
32
  'http_auth_enabled': config['auth']['http_auth_enabled']
33
33
  }
34
34
  }
35
+ for key in ['default_llm', 'default_embedding_model']:
36
+ value = config.get(key)
37
+ if value is not None:
38
+ resp[key] = value
39
+ return resp
35
40
 
36
41
  @ns_conf.doc('put_config')
37
42
  @api_endpoint_metrics('PUT', '/config')
38
43
  def put(self):
39
44
  data = request.json
40
45
 
41
- unknown_argumens = list(set(data.keys()) - {'auth'})
42
- if len(unknown_argumens) > 0:
46
+ allowed_arguments = {'auth', 'default_llm', 'default_embedding_model'}
47
+ unknown_arguments = list(set(data.keys()) - allowed_arguments)
48
+ if len(unknown_arguments) > 0:
43
49
  return http_error(
44
50
  HTTPStatus.BAD_REQUEST, 'Wrong arguments',
45
- f'Unknown argumens: {unknown_argumens}'
51
+ f'Unknown argumens: {unknown_arguments}'
46
52
  )
47
53
 
54
+ nested_keys_to_validate = {'auth'}
48
55
  for key in data.keys():
49
- unknown_argumens = list(
50
- set(data[key].keys()) - set(Config()[key].keys())
51
- )
52
- if len(unknown_argumens) > 0:
53
- return http_error(
54
- HTTPStatus.BAD_REQUEST, 'Wrong arguments',
55
- f'Unknown argumens: {unknown_argumens}'
56
+ if key in nested_keys_to_validate:
57
+ unknown_arguments = list(
58
+ set(data[key].keys()) - set(Config()[key].keys())
56
59
  )
60
+ if len(unknown_arguments) > 0:
61
+ return http_error(
62
+ HTTPStatus.BAD_REQUEST, 'Wrong arguments',
63
+ f'Unknown argumens: {unknown_arguments}'
64
+ )
57
65
 
58
66
  Config().update(data)
59
67