MindsDB 25.6.4.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +53 -94
- mindsdb/api/a2a/agent.py +30 -206
- mindsdb/api/a2a/common/server/server.py +26 -27
- mindsdb/api/a2a/task_manager.py +93 -227
- mindsdb/api/a2a/utils.py +21 -0
- mindsdb/api/executor/command_executor.py +8 -6
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
- mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
- mindsdb/api/executor/planner/query_prepare.py +68 -87
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
- mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
- mindsdb/api/executor/utilities/sql.py +97 -21
- mindsdb/api/http/namespaces/agents.py +126 -201
- mindsdb/api/http/namespaces/config.py +12 -1
- mindsdb/api/http/namespaces/file.py +49 -24
- mindsdb/api/mcp/start.py +45 -31
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +244 -141
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +3 -2
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +1 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/libs/keyword_search_base.py +41 -0
- mindsdb/integrations/libs/vectordatabase_handler.py +114 -84
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
- mindsdb/integrations/utilities/sql_utils.py +11 -0
- mindsdb/interfaces/agents/agents_controller.py +29 -9
- mindsdb/interfaces/agents/langchain_agent.py +7 -5
- mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
- mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
- mindsdb/interfaces/database/projects.py +1 -3
- mindsdb/interfaces/functions/controller.py +54 -64
- mindsdb/interfaces/functions/to_markdown.py +47 -14
- mindsdb/interfaces/knowledge_base/controller.py +228 -110
- mindsdb/interfaces/knowledge_base/evaluate.py +18 -6
- mindsdb/interfaces/knowledge_base/executor.py +346 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
- mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
- mindsdb/interfaces/skills/sql_agent.py +181 -130
- mindsdb/interfaces/storage/db.py +9 -7
- mindsdb/utilities/config.py +58 -40
- mindsdb/utilities/exception.py +58 -7
- mindsdb/utilities/security.py +54 -11
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/METADATA +245 -259
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/RECORD +61 -58
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/top_level.txt +0 -0
|
@@ -8,7 +8,7 @@ from mindsdb.api.executor.planner import utils
|
|
|
8
8
|
|
|
9
9
|
def to_string(identifier):
|
|
10
10
|
# alternative to AST.to_string() but without quoting
|
|
11
|
-
return
|
|
11
|
+
return ".".join(identifier.parts)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Table:
|
|
@@ -32,7 +32,6 @@ class Column:
|
|
|
32
32
|
def __init__(self, node=None, table=None, name=None, type=None):
|
|
33
33
|
alias = None
|
|
34
34
|
if node is not None:
|
|
35
|
-
|
|
36
35
|
if isinstance(node, ast.Identifier):
|
|
37
36
|
# set name
|
|
38
37
|
name = node.parts[-1] # ???
|
|
@@ -67,26 +66,25 @@ class Statement:
|
|
|
67
66
|
self.offset = 0
|
|
68
67
|
|
|
69
68
|
|
|
70
|
-
class PreparedStatementPlanner
|
|
71
|
-
|
|
69
|
+
class PreparedStatementPlanner:
|
|
72
70
|
def __init__(self, planner):
|
|
73
71
|
self.planner = planner
|
|
74
72
|
|
|
75
73
|
def get_type_of_var(self, v):
|
|
76
74
|
if isinstance(v, str):
|
|
77
|
-
return
|
|
75
|
+
return "str"
|
|
78
76
|
elif isinstance(v, float):
|
|
79
|
-
return
|
|
77
|
+
return "float"
|
|
80
78
|
elif isinstance(v, int):
|
|
81
|
-
return
|
|
79
|
+
return "integer"
|
|
82
80
|
|
|
83
|
-
return
|
|
81
|
+
return "str"
|
|
84
82
|
|
|
85
83
|
def get_statement_info(self):
|
|
86
84
|
stmt = self.planner.statement
|
|
87
85
|
|
|
88
86
|
if stmt is None:
|
|
89
|
-
raise PlanningException(
|
|
87
|
+
raise PlanningException("Statement is not prepared")
|
|
90
88
|
|
|
91
89
|
columns_result = []
|
|
92
90
|
|
|
@@ -95,45 +93,45 @@ class PreparedStatementPlanner():
|
|
|
95
93
|
if column.table is not None:
|
|
96
94
|
table = column.table.name
|
|
97
95
|
ds = column.table.ds
|
|
98
|
-
columns_result.append(
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
96
|
+
columns_result.append(
|
|
97
|
+
dict(
|
|
98
|
+
alias=column.alias,
|
|
99
|
+
type=column.type,
|
|
100
|
+
name=column.name,
|
|
101
|
+
table_name=table,
|
|
102
|
+
table_alias=table,
|
|
103
|
+
ds=ds,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
106
|
|
|
107
107
|
parameters = []
|
|
108
108
|
for param in stmt.params:
|
|
109
|
-
name =
|
|
110
|
-
parameters.append(
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
'parameters': parameters,
|
|
118
|
-
'columns': columns_result
|
|
119
|
-
}
|
|
109
|
+
name = "?"
|
|
110
|
+
parameters.append(
|
|
111
|
+
dict(
|
|
112
|
+
alias=name,
|
|
113
|
+
type="str",
|
|
114
|
+
name=name,
|
|
115
|
+
)
|
|
116
|
+
)
|
|
120
117
|
|
|
121
|
-
|
|
118
|
+
return {"parameters": parameters, "columns": columns_result}
|
|
122
119
|
|
|
120
|
+
def get_table_of_column(self, t):
|
|
123
121
|
tables_map = self.planner.statement.tables_map
|
|
124
122
|
|
|
125
123
|
# get tables to check
|
|
126
124
|
if len(t.parts) > 1:
|
|
127
125
|
# try to find table
|
|
128
126
|
table_parts = t.parts[:-1]
|
|
129
|
-
table_name =
|
|
127
|
+
table_name = ".".join(table_parts)
|
|
130
128
|
if table_name in tables_map:
|
|
131
129
|
return tables_map[table_name]
|
|
132
130
|
|
|
133
131
|
elif len(table_parts) > 1:
|
|
134
132
|
# maybe datasource is 1st part
|
|
135
133
|
table_parts = table_parts[1:]
|
|
136
|
-
table_name =
|
|
134
|
+
table_name = ".".join(table_parts)
|
|
137
135
|
if table_name in tables_map:
|
|
138
136
|
return tables_map[table_name]
|
|
139
137
|
|
|
@@ -158,14 +156,10 @@ class PreparedStatementPlanner():
|
|
|
158
156
|
# in reverse order
|
|
159
157
|
for p in table.parts[::-1]:
|
|
160
158
|
parts.insert(0, p)
|
|
161
|
-
keys.append(
|
|
159
|
+
keys.append(".".join(parts))
|
|
162
160
|
|
|
163
161
|
# remember table
|
|
164
|
-
tbl = Table(
|
|
165
|
-
ds=ds,
|
|
166
|
-
node=table,
|
|
167
|
-
is_predictor=is_predictor
|
|
168
|
-
)
|
|
162
|
+
tbl = Table(ds=ds, node=table, is_predictor=is_predictor)
|
|
169
163
|
tbl.keys = keys
|
|
170
164
|
|
|
171
165
|
return tbl
|
|
@@ -189,7 +183,6 @@ class PreparedStatementPlanner():
|
|
|
189
183
|
stmt.tables_map = {}
|
|
190
184
|
stmt.tables_lvl1 = []
|
|
191
185
|
if query.from_table is not None:
|
|
192
|
-
|
|
193
186
|
if isinstance(query.from_table, ast.Join):
|
|
194
187
|
# get all tables
|
|
195
188
|
join_tables = utils.convert_join_to_list(query.from_table)
|
|
@@ -198,21 +191,17 @@ class PreparedStatementPlanner():
|
|
|
198
191
|
|
|
199
192
|
if isinstance(query.from_table, ast.Select):
|
|
200
193
|
# nested select, get only last select
|
|
201
|
-
join_tables = [
|
|
202
|
-
dict(
|
|
203
|
-
table=utils.get_deepest_select(query.from_table).from_table
|
|
204
|
-
)
|
|
205
|
-
]
|
|
194
|
+
join_tables = [dict(table=utils.get_deepest_select(query.from_table).from_table)]
|
|
206
195
|
|
|
207
196
|
for i, join_table in enumerate(join_tables):
|
|
208
|
-
table = join_table[
|
|
197
|
+
table = join_table["table"]
|
|
209
198
|
if isinstance(table, ast.Identifier):
|
|
210
199
|
tbl = self.table_from_identifier(table)
|
|
211
200
|
|
|
212
201
|
if tbl.is_predictor:
|
|
213
202
|
# Is the last table?
|
|
214
203
|
if i + 1 < len(join_tables):
|
|
215
|
-
raise PlanningException(
|
|
204
|
+
raise PlanningException("Predictor must be last table in query")
|
|
216
205
|
|
|
217
206
|
stmt.tables_lvl1.append(tbl)
|
|
218
207
|
for key in tbl.keys:
|
|
@@ -225,13 +214,12 @@ class PreparedStatementPlanner():
|
|
|
225
214
|
# is there any predictors at other levels?
|
|
226
215
|
lvl1_predictors = [i for i in stmt.tables_lvl1 if i.is_predictor]
|
|
227
216
|
if len(query_predictors) != len(lvl1_predictors):
|
|
228
|
-
raise PlanningException(
|
|
217
|
+
raise PlanningException("Predictor is not at first level")
|
|
229
218
|
|
|
230
219
|
# === get targets ===
|
|
231
220
|
columns = []
|
|
232
221
|
get_all_tables = False
|
|
233
222
|
for t in query.targets:
|
|
234
|
-
|
|
235
223
|
column = Column(t)
|
|
236
224
|
|
|
237
225
|
# column alias
|
|
@@ -264,10 +252,10 @@ class PreparedStatementPlanner():
|
|
|
264
252
|
column.type = self.get_type_of_var(t.value)
|
|
265
253
|
elif isinstance(t, ast.Function):
|
|
266
254
|
# mysql function
|
|
267
|
-
if t.op ==
|
|
268
|
-
column.type =
|
|
255
|
+
if t.op == "connection_id":
|
|
256
|
+
column.type = "integer"
|
|
269
257
|
else:
|
|
270
|
-
column.type =
|
|
258
|
+
column.type = "str"
|
|
271
259
|
else:
|
|
272
260
|
# TODO go down into lower level.
|
|
273
261
|
# It can be function, operation, select.
|
|
@@ -276,7 +264,7 @@ class PreparedStatementPlanner():
|
|
|
276
264
|
# TODO add several known types for function, i.e ABS-int
|
|
277
265
|
|
|
278
266
|
# TODO TypeCast - as casted type
|
|
279
|
-
column.type =
|
|
267
|
+
column.type = "str"
|
|
280
268
|
|
|
281
269
|
if alias is not None:
|
|
282
270
|
column.alias = alias
|
|
@@ -299,28 +287,25 @@ class PreparedStatementPlanner():
|
|
|
299
287
|
if step.result_data is not None:
|
|
300
288
|
# save results
|
|
301
289
|
|
|
302
|
-
if len(step.result_data[
|
|
303
|
-
table_info = step.result_data[
|
|
304
|
-
columns_info = step.result_data[
|
|
290
|
+
if len(step.result_data["tables"]) > 0:
|
|
291
|
+
table_info = step.result_data["tables"][0]
|
|
292
|
+
columns_info = step.result_data["columns"][table_info]
|
|
305
293
|
|
|
306
294
|
table.columns = []
|
|
307
295
|
table.ds = table_info[0]
|
|
308
296
|
for col in columns_info:
|
|
309
297
|
if isinstance(col, tuple):
|
|
310
298
|
# is predictor
|
|
311
|
-
col = dict(name=col[0], type=
|
|
299
|
+
col = dict(name=col[0], type="str")
|
|
312
300
|
table.columns.append(
|
|
313
301
|
Column(
|
|
314
|
-
name=col[
|
|
315
|
-
type=col[
|
|
302
|
+
name=col["name"],
|
|
303
|
+
type=col["type"],
|
|
316
304
|
)
|
|
317
305
|
)
|
|
318
306
|
|
|
319
307
|
# map by names
|
|
320
|
-
table.columns_map = {
|
|
321
|
-
i.name.upper(): i
|
|
322
|
-
for i in table.columns
|
|
323
|
-
}
|
|
308
|
+
table.columns_map = {i.name.upper(): i for i in table.columns}
|
|
324
309
|
|
|
325
310
|
# === create columns list ===
|
|
326
311
|
columns_result = []
|
|
@@ -329,7 +314,7 @@ class PreparedStatementPlanner():
|
|
|
329
314
|
# add data from all tables
|
|
330
315
|
for table in stmt.tables_lvl1:
|
|
331
316
|
if table.columns is None:
|
|
332
|
-
raise PlanningException(f
|
|
317
|
+
raise PlanningException(f"Table is not found {table.name}")
|
|
333
318
|
|
|
334
319
|
for col in table.columns:
|
|
335
320
|
# col = {name: 'col', type: 'str'}
|
|
@@ -354,7 +339,7 @@ class PreparedStatementPlanner():
|
|
|
354
339
|
column.type = table.columns_map[col_name].type
|
|
355
340
|
else:
|
|
356
341
|
# continue
|
|
357
|
-
raise PlanningException(f
|
|
342
|
+
raise PlanningException(f"Column not found {col_name}")
|
|
358
343
|
|
|
359
344
|
else:
|
|
360
345
|
# table is not found, looking for in all tables
|
|
@@ -368,11 +353,11 @@ class PreparedStatementPlanner():
|
|
|
368
353
|
|
|
369
354
|
# forcing alias
|
|
370
355
|
if column.alias is None:
|
|
371
|
-
column.alias = f
|
|
356
|
+
column.alias = f"column_{i}"
|
|
372
357
|
|
|
373
358
|
# forcing type
|
|
374
359
|
if column.type is None:
|
|
375
|
-
column.type =
|
|
360
|
+
column.type = "str"
|
|
376
361
|
|
|
377
362
|
columns_result.append(column)
|
|
378
363
|
|
|
@@ -393,28 +378,25 @@ class PreparedStatementPlanner():
|
|
|
393
378
|
if step.result_data is not None:
|
|
394
379
|
# save results
|
|
395
380
|
|
|
396
|
-
if len(step.result_data[
|
|
397
|
-
table_info = step.result_data[
|
|
398
|
-
columns_info = step.result_data[
|
|
381
|
+
if len(step.result_data["tables"]) > 0:
|
|
382
|
+
table_info = step.result_data["tables"][0]
|
|
383
|
+
columns_info = step.result_data["columns"][table_info]
|
|
399
384
|
|
|
400
385
|
table.columns = []
|
|
401
386
|
table.ds = table_info[0]
|
|
402
387
|
for col in columns_info:
|
|
403
388
|
if isinstance(col, tuple):
|
|
404
389
|
# is predictor
|
|
405
|
-
col = dict(name=col[0], type=
|
|
390
|
+
col = dict(name=col[0], type="str")
|
|
406
391
|
table.columns.append(
|
|
407
392
|
Column(
|
|
408
|
-
name=col[
|
|
409
|
-
type=col[
|
|
393
|
+
name=col["name"],
|
|
394
|
+
type=col["type"],
|
|
410
395
|
)
|
|
411
396
|
)
|
|
412
397
|
|
|
413
398
|
# map by names
|
|
414
|
-
table.columns_map = {
|
|
415
|
-
i.name.upper(): i
|
|
416
|
-
for i in table.columns
|
|
417
|
-
}
|
|
399
|
+
table.columns_map = {i.name.upper(): i for i in table.columns}
|
|
418
400
|
|
|
419
401
|
# save results
|
|
420
402
|
columns_result = []
|
|
@@ -430,7 +412,7 @@ class PreparedStatementPlanner():
|
|
|
430
412
|
|
|
431
413
|
if column.type is None:
|
|
432
414
|
# forcing type
|
|
433
|
-
column.type =
|
|
415
|
+
column.type = "str"
|
|
434
416
|
|
|
435
417
|
columns_result.append(column)
|
|
436
418
|
|
|
@@ -440,13 +422,12 @@ class PreparedStatementPlanner():
|
|
|
440
422
|
stmt = self.planner.statement
|
|
441
423
|
|
|
442
424
|
stmt.columns = [
|
|
443
|
-
Column(name=
|
|
444
|
-
Column(name=
|
|
425
|
+
Column(name="Variable_name", type="str"),
|
|
426
|
+
Column(name="Value", type="str"),
|
|
445
427
|
]
|
|
446
428
|
return []
|
|
447
429
|
|
|
448
430
|
def prepare_steps(self, query):
|
|
449
|
-
|
|
450
431
|
stmt = Statement()
|
|
451
432
|
self.planner.statement = stmt
|
|
452
433
|
|
|
@@ -476,7 +457,6 @@ class PreparedStatementPlanner():
|
|
|
476
457
|
if isinstance(query, ast.Show):
|
|
477
458
|
return self.prepare_show(query)
|
|
478
459
|
else:
|
|
479
|
-
|
|
480
460
|
# do nothing
|
|
481
461
|
return []
|
|
482
462
|
# raise NotImplementedError(query.__name__)
|
|
@@ -496,7 +476,6 @@ class PreparedStatementPlanner():
|
|
|
496
476
|
query = self.planner.query
|
|
497
477
|
|
|
498
478
|
if params is not None:
|
|
499
|
-
|
|
500
479
|
if len(params) != len(stmt.params):
|
|
501
480
|
raise PlanningException("Count of execution parameters don't match prepared statement")
|
|
502
481
|
|
|
@@ -508,12 +487,14 @@ class PreparedStatementPlanner():
|
|
|
508
487
|
stmt.params = None
|
|
509
488
|
|
|
510
489
|
if (
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
490
|
+
isinstance(query, ast.Select)
|
|
491
|
+
or isinstance(query, ast.Union)
|
|
492
|
+
or isinstance(query, ast.CreateTable)
|
|
493
|
+
or isinstance(query, ast.Insert)
|
|
494
|
+
or isinstance(query, ast.Update)
|
|
495
|
+
or isinstance(query, ast.Delete)
|
|
496
|
+
or isinstance(query, ast.Intersect)
|
|
497
|
+
or isinstance(query, ast.Except)
|
|
517
498
|
):
|
|
518
499
|
return self.plan_query(query)
|
|
519
500
|
else:
|
|
@@ -6,6 +6,8 @@ from mindsdb_sql_parser.ast import (
|
|
|
6
6
|
Parameter,
|
|
7
7
|
BinaryOperation,
|
|
8
8
|
Tuple,
|
|
9
|
+
Union,
|
|
10
|
+
Intersect,
|
|
9
11
|
)
|
|
10
12
|
|
|
11
13
|
from mindsdb.api.executor.planner.steps import FetchDataframeStep
|
|
@@ -92,7 +94,10 @@ class FetchDataframeStepCall(BaseStepCall):
|
|
|
92
94
|
response: DataHubResponse = dn.query(native_query=step.raw_query, session=self.session)
|
|
93
95
|
df = response.data_frame
|
|
94
96
|
else:
|
|
95
|
-
|
|
97
|
+
if isinstance(step.query, (Union, Intersect)):
|
|
98
|
+
table_alias = ["", "", ""]
|
|
99
|
+
else:
|
|
100
|
+
table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
|
|
96
101
|
|
|
97
102
|
# TODO for information_schema we have 'database' = 'mindsdb'
|
|
98
103
|
|
|
@@ -9,7 +9,6 @@ from .base import BaseStepCall
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class UnionStepCall(BaseStepCall):
|
|
12
|
-
|
|
13
12
|
bind = UnionStep
|
|
14
13
|
|
|
15
14
|
def call(self, step):
|
|
@@ -19,7 +18,8 @@ class UnionStepCall(BaseStepCall):
|
|
|
19
18
|
# count of columns have to match
|
|
20
19
|
if len(left_result.columns) != len(right_result.columns):
|
|
21
20
|
raise WrongArgumentError(
|
|
22
|
-
f
|
|
21
|
+
f"UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} "
|
|
22
|
+
)
|
|
23
23
|
|
|
24
24
|
# types have to match
|
|
25
25
|
# TODO: return checking type later
|
|
@@ -33,19 +33,21 @@ class UnionStepCall(BaseStepCall):
|
|
|
33
33
|
table_a, names = left_result.to_df_cols()
|
|
34
34
|
table_b, _ = right_result.to_df_cols()
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
if step.operation.lower() == "intersect":
|
|
37
|
+
op = "INTERSECT"
|
|
38
|
+
else:
|
|
39
|
+
op = "UNION"
|
|
40
|
+
|
|
41
|
+
if step.unique is not True:
|
|
42
|
+
op += " ALL"
|
|
43
|
+
|
|
39
44
|
query = f"""
|
|
40
45
|
SELECT * FROM table_a
|
|
41
46
|
{op}
|
|
42
47
|
SELECT * FROM table_b
|
|
43
48
|
"""
|
|
44
49
|
|
|
45
|
-
resp_df, _description = query_df_with_type_infer_fallback(query, {
|
|
46
|
-
'table_a': table_a,
|
|
47
|
-
'table_b': table_b
|
|
48
|
-
})
|
|
50
|
+
resp_df, _description = query_df_with_type_infer_fallback(query, {"table_a": table_a, "table_b": table_b})
|
|
49
51
|
resp_df.replace({np.nan: None}, inplace=True)
|
|
50
52
|
|
|
51
53
|
return ResultSet.from_df_cols(df=resp_df, columns_dict=names)
|
|
@@ -6,13 +6,14 @@ from duckdb import InvalidInputException
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
|
|
8
8
|
from mindsdb_sql_parser import parse_sql
|
|
9
|
-
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
10
|
-
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
11
9
|
from mindsdb_sql_parser.ast import ASTNode, Select, Identifier, Function, Constant
|
|
12
|
-
from mindsdb.utilities.functions import resolve_table_identifier, resolve_model_identifier
|
|
13
10
|
|
|
11
|
+
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
14
12
|
from mindsdb.utilities import log
|
|
13
|
+
from mindsdb.utilities.exception import format_db_error_message
|
|
14
|
+
from mindsdb.utilities.functions import resolve_table_identifier, resolve_model_identifier
|
|
15
15
|
from mindsdb.utilities.json_encoder import CustomJSONEncoder
|
|
16
|
+
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
16
17
|
|
|
17
18
|
logger = log.getLogger(__name__)
|
|
18
19
|
|
|
@@ -64,29 +65,85 @@ def query_df_with_type_infer_fallback(query_str: str, dataframes: dict, user_fun
|
|
|
64
65
|
pandas.columns
|
|
65
66
|
"""
|
|
66
67
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
user_functions
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
68
|
+
try:
|
|
69
|
+
with duckdb.connect(database=":memory:") as con:
|
|
70
|
+
if user_functions:
|
|
71
|
+
user_functions.register(con)
|
|
72
|
+
|
|
73
|
+
for name, value in dataframes.items():
|
|
74
|
+
con.register(name, value)
|
|
75
|
+
|
|
76
|
+
exception = None
|
|
77
|
+
for sample_size in [1000, 10000, 1000000]:
|
|
78
|
+
try:
|
|
79
|
+
con.execute(f"set global pandas_analyze_sample={sample_size};")
|
|
80
|
+
result_df = con.execute(query_str).fetchdf()
|
|
81
|
+
except InvalidInputException as e:
|
|
82
|
+
exception = e
|
|
83
|
+
else:
|
|
84
|
+
break
|
|
81
85
|
else:
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
+
raise exception
|
|
87
|
+
description = con.description
|
|
88
|
+
except Exception as e:
|
|
89
|
+
raise Exception(
|
|
90
|
+
format_db_error_message(db_type="DuckDB", db_error_msg=str(e), failed_query=query_str, is_external=False)
|
|
91
|
+
) from e
|
|
86
92
|
|
|
87
93
|
return result_df, description
|
|
88
94
|
|
|
89
95
|
|
|
96
|
+
_duckdb_functions_and_kw_list = None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_duckdb_functions_and_kw_list() -> list[str] | None:
|
|
100
|
+
"""Returns a list of all functions and keywords supported by DuckDB.
|
|
101
|
+
The list is merge of:
|
|
102
|
+
- list of duckdb's functions: 'select * from duckdb_functions()' or 'pragma functions'
|
|
103
|
+
- ist of keywords, because of some functions are just sintax-sugar
|
|
104
|
+
and not present in the duckdb_functions (like 'if()').
|
|
105
|
+
- hardcoded list of window_functions, because there are no way to get if from duckdb,
|
|
106
|
+
and they are not present in the duckdb_functions()
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
list[str] | None: List of supported functions and keywords, or None if unable to retrieve the list.
|
|
110
|
+
"""
|
|
111
|
+
global _duckdb_functions_and_kw_list
|
|
112
|
+
window_functions_list = [
|
|
113
|
+
"cume_dist",
|
|
114
|
+
"dense_rank",
|
|
115
|
+
"first_value",
|
|
116
|
+
"lag",
|
|
117
|
+
"last_value",
|
|
118
|
+
"lead",
|
|
119
|
+
"nth_value",
|
|
120
|
+
"ntile",
|
|
121
|
+
"percent_rank",
|
|
122
|
+
"rank_dense",
|
|
123
|
+
"rank",
|
|
124
|
+
"row_number",
|
|
125
|
+
]
|
|
126
|
+
if _duckdb_functions_and_kw_list is None:
|
|
127
|
+
try:
|
|
128
|
+
df, _ = query_df_with_type_infer_fallback(
|
|
129
|
+
"""
|
|
130
|
+
select distinct name
|
|
131
|
+
from (
|
|
132
|
+
select function_name as name from duckdb_functions()
|
|
133
|
+
union all
|
|
134
|
+
select keyword_name as name from duckdb_keywords()
|
|
135
|
+
) ta;
|
|
136
|
+
""",
|
|
137
|
+
dataframes={},
|
|
138
|
+
)
|
|
139
|
+
df.columns = [name.lower() for name in df.columns]
|
|
140
|
+
_duckdb_functions_and_kw_list = df["name"].drop_duplicates().str.lower().to_list() + window_functions_list
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.warning(f"Unable to get DuckDB functions list: {e}")
|
|
143
|
+
|
|
144
|
+
return _duckdb_functions_and_kw_list
|
|
145
|
+
|
|
146
|
+
|
|
90
147
|
def query_df(df, query, session=None):
|
|
91
148
|
"""Perform simple query ('select' from one table, without subqueries and joins) on DataFrame.
|
|
92
149
|
|
|
@@ -100,8 +157,10 @@ def query_df(df, query, session=None):
|
|
|
100
157
|
|
|
101
158
|
if isinstance(query, str):
|
|
102
159
|
query_ast = parse_sql(query)
|
|
160
|
+
query_str = query
|
|
103
161
|
else:
|
|
104
162
|
query_ast = copy.deepcopy(query)
|
|
163
|
+
query_str = str(query)
|
|
105
164
|
|
|
106
165
|
if isinstance(query_ast, Select) is False or isinstance(query_ast.from_table, Identifier) is False:
|
|
107
166
|
raise Exception("Only 'SELECT from TABLE' statements supported for internal query")
|
|
@@ -125,6 +184,7 @@ def query_df(df, query, session=None):
|
|
|
125
184
|
return node
|
|
126
185
|
if isinstance(node, Function):
|
|
127
186
|
fnc_name = node.op.lower()
|
|
187
|
+
|
|
128
188
|
if fnc_name == "database" and len(node.args) == 0:
|
|
129
189
|
if session is not None:
|
|
130
190
|
cur_db = session.database
|
|
@@ -142,6 +202,22 @@ def query_df(df, query, session=None):
|
|
|
142
202
|
if user_functions is not None:
|
|
143
203
|
user_functions.check_function(node)
|
|
144
204
|
|
|
205
|
+
duckdb_functions_and_kw_list = get_duckdb_functions_and_kw_list() or []
|
|
206
|
+
custom_functions_list = [] if user_functions is None else list(user_functions.functions.keys())
|
|
207
|
+
all_functions_list = duckdb_functions_and_kw_list + custom_functions_list
|
|
208
|
+
if len(all_functions_list) > 0 and fnc_name not in all_functions_list:
|
|
209
|
+
raise Exception(
|
|
210
|
+
format_db_error_message(
|
|
211
|
+
db_type="DuckDB",
|
|
212
|
+
db_error_msg=(
|
|
213
|
+
f"Unknown function: '{fnc_name}'. This function is not recognized during internal query processing.\n"
|
|
214
|
+
"Please use DuckDB-supported functions instead."
|
|
215
|
+
),
|
|
216
|
+
failed_query=query_str,
|
|
217
|
+
is_external=False,
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
|
|
145
221
|
query_traversal(query_ast, adapt_query)
|
|
146
222
|
|
|
147
223
|
# convert json columns
|