MindsDB 25.4.2.0__py3-none-any.whl → 25.4.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (30) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/command_executor.py +29 -0
  3. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +3 -2
  4. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +43 -1
  5. mindsdb/api/executor/planner/plan_join.py +1 -1
  6. mindsdb/api/executor/planner/query_plan.py +1 -0
  7. mindsdb/api/executor/planner/query_planner.py +86 -14
  8. mindsdb/api/executor/planner/steps.py +9 -1
  9. mindsdb/api/executor/sql_query/sql_query.py +37 -6
  10. mindsdb/api/executor/sql_query/steps/__init__.py +1 -0
  11. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +288 -0
  12. mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +17 -16
  13. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +1 -0
  14. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +7 -11
  15. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +28 -4
  16. mindsdb/integrations/libs/llm/config.py +11 -1
  17. mindsdb/integrations/libs/llm/utils.py +12 -0
  18. mindsdb/interfaces/agents/constants.py +12 -1
  19. mindsdb/interfaces/agents/langchain_agent.py +6 -0
  20. mindsdb/interfaces/knowledge_base/controller.py +128 -43
  21. mindsdb/interfaces/query_context/context_controller.py +221 -0
  22. mindsdb/interfaces/storage/db.py +23 -0
  23. mindsdb/migrations/versions/2025-03-21_fda503400e43_queries.py +45 -0
  24. mindsdb/utilities/context_executor.py +1 -1
  25. mindsdb/utilities/partitioning.py +35 -20
  26. {mindsdb-25.4.2.0.dist-info → mindsdb-25.4.2.1.dist-info}/METADATA +224 -222
  27. {mindsdb-25.4.2.0.dist-info → mindsdb-25.4.2.1.dist-info}/RECORD +30 -28
  28. {mindsdb-25.4.2.0.dist-info → mindsdb-25.4.2.1.dist-info}/WHEEL +0 -0
  29. {mindsdb-25.4.2.0.dist-info → mindsdb-25.4.2.1.dist-info}/licenses/LICENSE +0 -0
  30. {mindsdb-25.4.2.0.dist-info → mindsdb-25.4.2.1.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = 'MindsDB'
2
2
  __package_name__ = 'mindsdb'
3
- __version__ = '25.4.2.0'
3
+ __version__ = '25.4.2.1'
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = 'MindsDB Inc'
@@ -34,6 +34,7 @@ from mindsdb_sql_parser.ast import (
34
34
  Update,
35
35
  Use,
36
36
  Tuple,
37
+ Function,
37
38
  )
38
39
 
39
40
  # typed models
@@ -599,6 +600,9 @@ class ExecuteCommands:
599
600
  ):
600
601
  return ExecuteAnswer()
601
602
  elif statement_type is Select:
603
+ ret = self.exec_service_function(statement, database_name)
604
+ if ret is not None:
605
+ return ret
602
606
  query = SQLQuery(statement, session=self.session, database=database_name)
603
607
  return self.answer_select(query)
604
608
  elif statement_type is Union:
@@ -648,6 +652,31 @@ class ExecuteCommands:
648
652
  logger.warning(f"Unknown SQL statement: {sql}")
649
653
  raise NotSupportedYet(f"Unknown SQL statement: {sql}")
650
654
 
655
+ def exec_service_function(self, statement: Select, database_name: str) -> Optional[ExecuteAnswer]:
656
+ """
657
+ If input query is a single line select without FROM
658
+ and has function in targets that matches with one of the mindsdb service functions:
659
+ - execute this function and return response
660
+ Otherwise, return None to allow to continue execution query outside
661
+ """
662
+
663
+ if statement.from_table is not None or len(statement.targets) != 1:
664
+ return
665
+
666
+ target = statement.targets[0]
667
+ if not isinstance(target, Function):
668
+ return
669
+
670
+ command = target.op.lower()
671
+ args = [arg.value for arg in target.args if isinstance(arg, Constant)]
672
+ if command == 'query_resume':
673
+ ret = SQLQuery(None, session=self.session, database=database_name, query_id=args[0])
674
+ return self.answer_select(ret)
675
+
676
+ elif command == 'query_cancel':
677
+ query_context_controller.cancel_query(*args)
678
+ return ExecuteAnswer()
679
+
651
680
  def answer_create_trigger(self, statement, database_name):
652
681
  triggers_controller = TriggersController()
653
682
 
@@ -17,7 +17,7 @@ from .system_tables import (
17
17
  PluginsTable, EnginesTable, KeyColumnUsageTable, StatisticsTable,
18
18
  CharacterSetsTable, CollationsTable)
19
19
  from .mindsdb_tables import (
20
- ModelsTable, DatabasesTable, MLEnginesTable, HandlersTable, JobsTable,
20
+ ModelsTable, DatabasesTable, MLEnginesTable, HandlersTable, JobsTable, QueriesTable,
21
21
  ChatbotsTable, KBTable, SkillsTable, AgentsTable, ViewsTable, TriggersTable)
22
22
 
23
23
 
@@ -32,7 +32,8 @@ class InformationSchemaDataNode(DataNode):
32
32
  PluginsTable, EnginesTable, KeyColumnUsageTable, StatisticsTable,
33
33
  CharacterSetsTable, CollationsTable,
34
34
  ModelsTable, DatabasesTable, MLEnginesTable, HandlersTable, JobsTable,
35
- ChatbotsTable, KBTable, SkillsTable, AgentsTable, ViewsTable, TriggersTable
35
+ ChatbotsTable, KBTable, SkillsTable, AgentsTable, ViewsTable, TriggersTable,
36
+ QueriesTable
36
37
  ]
37
38
 
38
39
  def __init__(self, session):
@@ -9,6 +9,7 @@ from mindsdb.interfaces.jobs.jobs_controller import JobsController
9
9
  from mindsdb.interfaces.skills.skills_controller import SkillsController
10
10
  from mindsdb.interfaces.database.views import ViewController
11
11
  from mindsdb.interfaces.database.projects import ProjectController
12
+ from mindsdb.interfaces.query_context.context_controller import query_context_controller
12
13
 
13
14
  from mindsdb.api.executor.datahub.datanodes.system_tables import Table
14
15
 
@@ -326,7 +327,8 @@ class ChatbotsTable(MdbTable):
326
327
 
327
328
  class KBTable(MdbTable):
328
329
  name = 'KNOWLEDGE_BASES'
329
- columns = ["NAME", "PROJECT", "MODEL", "STORAGE", "PARAMS"]
330
+ columns = ["NAME", "PROJECT", "MODEL", "STORAGE", "PARAMS",
331
+ "INSERT_STARTED_AT", "INSERT_FINISHED_AT", "PROCESSED_ROWS", "ERROR", "QUERY_ID"]
330
332
 
331
333
  @classmethod
332
334
  def get_data(cls, query: ASTNode = None, inf_schema=None, **kwargs):
@@ -336,17 +338,36 @@ class KBTable(MdbTable):
336
338
  controller = KnowledgeBaseController(inf_schema.session)
337
339
  kb_list = controller.list(project_name)
338
340
 
341
+ # shouldn't be a lot of queries, we can fetch them all
342
+ queries_data = {
343
+ item['id']: item
344
+ for item in query_context_controller.list_queries()
345
+ }
346
+
339
347
  data = []
340
348
 
341
349
  for kb in kb_list:
342
350
  vector_database_name = kb['vector_database'] or ''
343
351
 
352
+ query_item = {}
353
+ query_id = kb['query_id']
354
+ if query_id is not None:
355
+ if query_id in queries_data:
356
+ query_item = queries_data.get(query_id)
357
+ else:
358
+ query_id = None
359
+
344
360
  data.append((
345
361
  kb['name'],
346
362
  kb['project_name'],
347
363
  kb['embedding_model'],
348
364
  vector_database_name + '.' + kb['vector_database_table'],
349
365
  to_json(kb['params']),
366
+ query_item.get('started_at'),
367
+ query_item.get('finished_at'),
368
+ query_item.get('processed_rows'),
369
+ query_item.get('error'),
370
+ query_id,
350
371
  ))
351
372
 
352
373
  return pd.DataFrame(data, columns=cls.columns)
@@ -426,3 +447,24 @@ class ViewsTable(MdbTable):
426
447
  data = [[row[k] for k in columns_lower] for row in data]
427
448
 
428
449
  return pd.DataFrame(data, columns=cls.columns)
450
+
451
+
452
+ class QueriesTable(MdbTable):
453
+ name = 'QUERIES'
454
+ columns = ["ID", "STARTED_AT", "FINISHED_AT", "PROCESSED_ROWS", "ERROR", "SQL", "PARAMETERS", "CONTEXT", "UPDATED_AT"]
455
+
456
+ @classmethod
457
+ def get_data(cls, **kwargs):
458
+ """
459
+ Returns all queries in progres or recently completed
460
+ Only queries marked as is_resumable by planner are stored in this table
461
+ :param kwargs:
462
+ :return:
463
+ """
464
+
465
+ data = query_context_controller.list_queries()
466
+ columns_lower = [col.lower() for col in cls.columns]
467
+
468
+ data = [[row[k] for k in columns_lower] for row in data]
469
+
470
+ return pd.DataFrame(data, columns=cls.columns)
@@ -423,7 +423,7 @@ class PlanJoinTablesQuery:
423
423
  else:
424
424
  query2.where = cond
425
425
 
426
- step = self.planner.get_integration_select_step(query2)
426
+ step = self.planner.get_integration_select_step(query2, params=query_in.using)
427
427
  self.tables_fetch_step[item.index] = step
428
428
 
429
429
  self.add_plan_step(step)
@@ -2,6 +2,7 @@
2
2
  class QueryPlan:
3
3
  def __init__(self, steps=None, **kwargs):
4
4
  self.steps = []
5
+ self.is_resumable = False
5
6
 
6
7
  if steps:
7
8
  for step in steps:
@@ -12,14 +12,13 @@ from mindsdb.api.executor.planner.exceptions import PlanningException
12
12
  from mindsdb.api.executor.planner import utils
13
13
  from mindsdb.api.executor.planner.query_plan import QueryPlan
14
14
  from mindsdb.api.executor.planner.steps import (
15
- FetchDataframeStep, ProjectStep, ApplyPredictorStep,
15
+ PlanStep, FetchDataframeStep, ProjectStep, ApplyPredictorStep,
16
16
  ApplyPredictorRowStep, UnionStep, GetPredictorColumns, SaveToTable,
17
- InsertToTable, UpdateToTable, SubSelectStep, QueryStep,
18
- DeleteStep, DataStep, CreateTableStep
17
+ InsertToTable, UpdateToTable, SubSelectStep, QueryStep, JoinStep,
18
+ DeleteStep, DataStep, CreateTableStep, FetchDataframeStepPartition
19
19
  )
20
20
  from mindsdb.api.executor.planner.utils import (
21
21
  disambiguate_predictor_column_identifier,
22
- get_deepest_select,
23
22
  recursively_extract_column_values,
24
23
  query_traversal, filters_to_bin_op
25
24
  )
@@ -166,7 +165,11 @@ class QueryPlanner:
166
165
 
167
166
  query_traversal(query, _prepare_integration_select)
168
167
 
169
- def get_integration_select_step(self, select):
168
+ def get_integration_select_step(self, select: Select, params: dict = None) -> PlanStep:
169
+ """
170
+ Generate planner step to execute query over integration or over results of previous step (if it is CTE)
171
+ """
172
+
170
173
  if isinstance(select.from_table, NativeQuery):
171
174
  integration_name = select.from_table.integration.parts[-1]
172
175
  else:
@@ -188,12 +191,22 @@ class QueryPlanner:
188
191
  if fetch_df_select.using is not None:
189
192
  fetch_df_select.using = None
190
193
 
191
- return FetchDataframeStep(integration=integration_name, query=fetch_df_select)
194
+ if params:
195
+ fetch_params = params.copy()
196
+ # remove partition parameters
197
+ for key in ('batch_size', 'track_column'):
198
+ if key in params:
199
+ del params[key]
200
+ if 'track_column' in fetch_params and isinstance(fetch_params['track_column'], Identifier):
201
+ fetch_params['track_column'] = fetch_params['track_column'].parts[-1]
202
+ else:
203
+ fetch_params = None
204
+ return FetchDataframeStep(integration=integration_name, query=fetch_df_select, params=fetch_params)
192
205
 
193
206
  def plan_integration_select(self, select):
194
207
  """Plan for a select query that can be fully executed in an integration"""
195
208
 
196
- return self.plan.add_step(self.get_integration_select_step(select))
209
+ return self.plan.add_step(self.get_integration_select_step(select, params=select.using))
197
210
 
198
211
  def resolve_database_table(self, node: Identifier):
199
212
  # resolves integration name and table name
@@ -414,12 +427,6 @@ class QueryPlanner:
414
427
 
415
428
  return self.plan_mdb_nested_select(select)
416
429
 
417
- def plan_integration_nested_select(self, select, integration_name):
418
- fetch_df_select = copy.deepcopy(select)
419
- deepest_select = get_deepest_select(fetch_df_select)
420
- self.prepare_integration_select(integration_name, deepest_select)
421
- return self.plan.add_step(FetchDataframeStep(integration=integration_name, query=fetch_df_select))
422
-
423
430
  def plan_mdb_nested_select(self, select):
424
431
  # plan nested select
425
432
 
@@ -818,7 +825,72 @@ class QueryPlanner:
818
825
  else:
819
826
  raise PlanningException(f'Unsupported query type {type(query)}')
820
827
 
821
- return self.plan
828
+ plan = self.handle_partitioning(self.plan)
829
+
830
+ return plan
831
+
832
+ def handle_partitioning(self, plan: QueryPlan) -> QueryPlan:
833
+ """
834
+ If plan has fetching in partitions:
835
+ try to rebuild plan to send fetched chunk of data through the following steps, if it is possible
836
+ """
837
+
838
+ # handle fetchdataframe partitioning
839
+ steps_out = []
840
+
841
+ partition_step = None
842
+ for step in plan.steps:
843
+ if isinstance(step, FetchDataframeStep) and step.params is not None:
844
+ batch_size = step.params.get('batch_size')
845
+ if batch_size is not None:
846
+ # found batched fetch
847
+ partition_step = FetchDataframeStepPartition(
848
+ step_num=step.step_num,
849
+ integration=step.integration,
850
+ query=step.query,
851
+ raw_query=step.raw_query,
852
+ params=step.params
853
+ )
854
+ steps_out.append(partition_step)
855
+ # mark plan
856
+ plan.is_resumable = True
857
+ continue
858
+ else:
859
+ step.params = None
860
+
861
+ if partition_step is not None:
862
+ # check and add step into partition
863
+
864
+ can_be_partitioned = False
865
+ if isinstance(step, (JoinStep, ApplyPredictorStep, InsertToTable)):
866
+ can_be_partitioned = True
867
+ elif isinstance(step, QueryStep):
868
+ query = step.query
869
+ if (
870
+ query.group_by is None and query.order_by is None and query.distinct is False
871
+ and query.limit is None and query.offset is None
872
+ ):
873
+ no_identifiers = [
874
+ target
875
+ for target in step.query.targets
876
+ if not isinstance(target, (Star, Identifier))
877
+ ]
878
+ if len(no_identifiers) == 0:
879
+ can_be_partitioned = True
880
+
881
+ if not can_be_partitioned:
882
+ if len(partition_step.steps) == 0:
883
+ # Nothing can be partitioned, failback to old plan
884
+ plan.is_resumable = False
885
+ return plan
886
+ partition_step = None
887
+ else:
888
+ partition_step.steps.append(step)
889
+ continue
890
+
891
+ steps_out.append(step)
892
+ plan.steps = steps_out
893
+ return plan
822
894
 
823
895
  def prepare_steps(self, query):
824
896
  statement_planner = PreparedStatementPlanner(self)
@@ -104,11 +104,19 @@ class LimitOffsetStep(PlanStep):
104
104
 
105
105
  class FetchDataframeStep(PlanStep):
106
106
  """Fetches a dataframe from external integration"""
107
- def __init__(self, integration, query=None, raw_query=None, *args, **kwargs):
107
+ def __init__(self, integration, query=None, raw_query=None, params=None, *args, **kwargs):
108
108
  super().__init__(*args, **kwargs)
109
109
  self.integration = integration
110
110
  self.query = query
111
111
  self.raw_query = raw_query
112
+ self.params = params
113
+
114
+
115
+ class FetchDataframeStepPartition(FetchDataframeStep):
116
+ """Fetches a dataframe from external integration in partitions"""
117
+ def __init__(self, *args, **kwargs):
118
+ super().__init__(*args, **kwargs)
119
+ self.steps = []
112
120
 
113
121
 
114
122
  class ApplyPredictorStep(PlanStep):
@@ -10,9 +10,9 @@
10
10
  """
11
11
  import inspect
12
12
  from textwrap import dedent
13
- from typing import Dict
13
+ from typing import Union, Dict
14
14
 
15
- from mindsdb_sql_parser import parse_sql
15
+ from mindsdb_sql_parser import parse_sql, ASTNode
16
16
  from mindsdb.api.executor.planner.steps import (
17
17
  ApplyTimeseriesPredictorStep,
18
18
  ApplyPredictorRowStep,
@@ -33,6 +33,9 @@ from mindsdb.api.executor.exceptions import (
33
33
  import mindsdb.utilities.profiler as profiler
34
34
  from mindsdb.utilities.fs import create_process_mark, delete_process_mark
35
35
  from mindsdb.utilities.exception import EntityNotExistsError
36
+ from mindsdb.interfaces.query_context.context_controller import query_context_controller
37
+ from mindsdb.utilities.context import context as ctx
38
+
36
39
 
37
40
  from . import steps
38
41
  from .result_set import ResultSet, Column
@@ -43,7 +46,8 @@ class SQLQuery:
43
46
 
44
47
  step_handlers = {}
45
48
 
46
- def __init__(self, sql, session, execute=True, database=None):
49
+ def __init__(self, sql: Union[ASTNode, str], session, execute: bool = True,
50
+ database: str = None, query_id: int = None):
47
51
  self.session = session
48
52
 
49
53
  if database is not None:
@@ -63,6 +67,15 @@ class SQLQuery:
63
67
  self.parameters = []
64
68
  self.fetched_data: ResultSet = None
65
69
 
70
+ self.outer_query = None
71
+ self.run_query = None
72
+ self.query_id = query_id
73
+ if query_id is not None:
74
+ # resume query
75
+ run_query = query_context_controller.get_query(self.query_id)
76
+ run_query.clear_error()
77
+ sql = run_query.sql
78
+
66
79
  if isinstance(sql, str):
67
80
  self.query = parse_sql(sql)
68
81
  self.context['query_str'] = sql
@@ -217,10 +230,22 @@ class SQLQuery:
217
230
  # no need to execute
218
231
  return
219
232
 
233
+ try:
234
+ steps = list(self.planner.execute_steps())
235
+ except PlanningException as e:
236
+ raise LogicError(e)
237
+
238
+ if self.planner.plan.is_resumable:
239
+ # create query
240
+ if self.query_id is not None:
241
+ self.run_query = query_context_controller.get_query(self.query_id)
242
+ else:
243
+ self.run_query = query_context_controller.create_query(self.context['query_str'])
244
+ ctx.run_query_id = self.run_query.record.id
245
+
220
246
  step_result = None
221
247
  process_mark = None
222
248
  try:
223
- steps = list(self.planner.execute_steps())
224
249
  steps_classes = (x.__class__ for x in steps)
225
250
  predict_steps = (ApplyPredictorRowStep, ApplyPredictorStep, ApplyTimeseriesPredictorStep)
226
251
  if any(s in predict_steps for s in steps_classes):
@@ -229,10 +254,16 @@ class SQLQuery:
229
254
  with profiler.Context(f'step: {step.__class__.__name__}'):
230
255
  step_result = self.execute_step(step)
231
256
  self.steps_data[step.step_num] = step_result
232
- except PlanningException as e:
233
- raise LogicError(e)
234
257
  except Exception as e:
258
+ if self.run_query is not None:
259
+ # set error and place where it stopped
260
+ self.run_query.on_error(e, step.step_num, self.steps_data)
235
261
  raise e
262
+ else:
263
+ # mark running query as completed
264
+ if self.run_query is not None:
265
+ self.run_query.finish()
266
+ ctx.run_query_id = None
236
267
  finally:
237
268
  if process_mark is not None:
238
269
  delete_process_mark('predict', process_mark)
@@ -1,6 +1,7 @@
1
1
  from .apply_predictor_step import ApplyPredictorStepCall, ApplyPredictorRowStepCall, ApplyTimeseriesPredictorStepCall
2
2
  from .delete_step import DeleteStepCall
3
3
  from .fetch_dataframe import FetchDataframeStepCall
4
+ from .fetch_dataframe_partition import FetchDataframePartitionCall
4
5
  from .insert_step import InsertToTableCall, SaveToTableCall, CreateTableCall
5
6
  from .join_step import JoinStepCall
6
7
  from .map_reduce_step import MapReduceStepCall