MindsDB 25.7.4.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +11 -1
- mindsdb/api/executor/command_executor.py +9 -15
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
- mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
- mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
- mindsdb/api/executor/utilities/sql.py +30 -0
- mindsdb/api/http/initialize.py +2 -1
- mindsdb/api/http/namespaces/views.py +56 -72
- mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
- mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
- mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
- mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
- mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
- mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
- mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
- mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +1 -2
- mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
- mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
- mindsdb/integrations/handlers/salesforce_handler/constants.py +9 -2
- mindsdb/integrations/libs/llm/config.py +0 -14
- mindsdb/integrations/libs/llm/utils.py +0 -15
- mindsdb/integrations/utilities/files/file_reader.py +5 -19
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +83 -45
- mindsdb/interfaces/agents/constants.py +0 -1
- mindsdb/interfaces/agents/langchain_agent.py +1 -3
- mindsdb/interfaces/database/projects.py +111 -7
- mindsdb/interfaces/knowledge_base/controller.py +7 -1
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
- mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
- mindsdb/interfaces/query_context/context_controller.py +14 -15
- mindsdb/utilities/config.py +2 -0
- mindsdb/utilities/fs.py +54 -17
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.2.0.dist-info}/METADATA +278 -263
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.2.0.dist-info}/RECORD +49 -48
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
- mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
- mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
- mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
- mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
- /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.4.0.dist-info → mindsdb-25.8.2.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
__title__ = "MindsDB"
|
|
2
2
|
__package_name__ = "mindsdb"
|
|
3
|
-
__version__ = "25.
|
|
3
|
+
__version__ = "25.8.2.0"
|
|
4
4
|
__description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
|
|
5
5
|
__email__ = "jorge@mindsdb.com"
|
|
6
6
|
__author__ = "MindsDB Inc"
|
mindsdb/__main__.py
CHANGED
|
@@ -39,7 +39,7 @@ from mindsdb.utilities.starters import (
|
|
|
39
39
|
)
|
|
40
40
|
from mindsdb.utilities.ps import is_pid_listen_port, get_child_pids
|
|
41
41
|
import mindsdb.interfaces.storage.db as db
|
|
42
|
-
from mindsdb.utilities.fs import clean_process_marks, clean_unlinked_process_marks
|
|
42
|
+
from mindsdb.utilities.fs import clean_process_marks, clean_unlinked_process_marks, create_pid_file, delete_pid_file
|
|
43
43
|
from mindsdb.utilities.context import context as ctx
|
|
44
44
|
from mindsdb.utilities.auth import register_oauth_client, get_aws_meta_data
|
|
45
45
|
from mindsdb.utilities.sentry import sentry_sdk # noqa: F401
|
|
@@ -335,6 +335,13 @@ if __name__ == "__main__":
|
|
|
335
335
|
print(f"MindsDB {mindsdb_version}")
|
|
336
336
|
sys.exit(0)
|
|
337
337
|
|
|
338
|
+
if config.cmd_args.update_gui:
|
|
339
|
+
from mindsdb.api.http.initialize import initialize_static
|
|
340
|
+
|
|
341
|
+
logger.info("Updating the GUI version")
|
|
342
|
+
initialize_static()
|
|
343
|
+
sys.exit(0)
|
|
344
|
+
|
|
338
345
|
config.raise_warnings(logger=logger)
|
|
339
346
|
os.environ["MINDSDB_RUNTIME"] = "1"
|
|
340
347
|
|
|
@@ -510,6 +517,8 @@ if __name__ == "__main__":
|
|
|
510
517
|
if config.cmd_args.ml_task_queue_consumer is True:
|
|
511
518
|
trunc_processes_struct[TrunkProcessEnum.ML_TASK_QUEUE].need_to_run = True
|
|
512
519
|
|
|
520
|
+
create_pid_file()
|
|
521
|
+
|
|
513
522
|
for trunc_process_data in trunc_processes_struct.values():
|
|
514
523
|
if trunc_process_data.started is True or trunc_process_data.need_to_run is False:
|
|
515
524
|
continue
|
|
@@ -591,6 +600,7 @@ if __name__ == "__main__":
|
|
|
591
600
|
],
|
|
592
601
|
return_exceptions=False,
|
|
593
602
|
)
|
|
603
|
+
delete_pid_file()
|
|
594
604
|
|
|
595
605
|
ioloop = asyncio.new_event_loop()
|
|
596
606
|
ioloop.run_until_complete(wait_apis_start())
|
|
@@ -1195,11 +1195,17 @@ class ExecuteCommands:
|
|
|
1195
1195
|
msg = dedent(
|
|
1196
1196
|
f"""\
|
|
1197
1197
|
The '{handler_module_meta["name"]}' handler cannot be used. Reason is:
|
|
1198
|
-
{handler_module_meta["import"]["error_message"]}
|
|
1198
|
+
{handler_module_meta["import"]["error_message"] or msg}
|
|
1199
1199
|
"""
|
|
1200
1200
|
)
|
|
1201
1201
|
is_cloud = self.session.config.get("cloud", False)
|
|
1202
|
-
if
|
|
1202
|
+
if (
|
|
1203
|
+
is_cloud is False
|
|
1204
|
+
# NOTE: BYOM may raise these errors if there is an error in the user's code,
|
|
1205
|
+
# therefore error_message will be None
|
|
1206
|
+
and handler_module_meta["name"] != "byom"
|
|
1207
|
+
and "No module named" in handler_module_meta["import"]["error_message"]
|
|
1208
|
+
):
|
|
1203
1209
|
logger.info(get_handler_install_message(handler_module_meta["name"]))
|
|
1204
1210
|
ast_drop = DropMLEngine(name=Identifier(name))
|
|
1205
1211
|
self.answer_drop_ml_engine(ast_drop)
|
|
@@ -1342,24 +1348,12 @@ class ExecuteCommands:
|
|
|
1342
1348
|
from_table=NativeQuery(integration=statement.from_table, query=statement.query_str),
|
|
1343
1349
|
)
|
|
1344
1350
|
query_str = query.to_string()
|
|
1345
|
-
else:
|
|
1346
|
-
query = parse_sql(query_str)
|
|
1347
|
-
|
|
1348
|
-
if isinstance(query, Select):
|
|
1349
|
-
# check create view sql
|
|
1350
|
-
query.limit = Constant(1)
|
|
1351
|
-
|
|
1352
|
-
query_context_controller.set_context(query_context_controller.IGNORE_CONTEXT)
|
|
1353
|
-
try:
|
|
1354
|
-
SQLQuery(query, session=self.session, database=database_name)
|
|
1355
|
-
finally:
|
|
1356
|
-
query_context_controller.release_context(query_context_controller.IGNORE_CONTEXT)
|
|
1357
1351
|
|
|
1358
1352
|
project = self.session.database_controller.get_project(project_name)
|
|
1359
1353
|
|
|
1360
1354
|
if isinstance(statement, CreateView):
|
|
1361
1355
|
try:
|
|
1362
|
-
project.create_view(view_name, query=query_str)
|
|
1356
|
+
project.create_view(view_name, query=query_str, session=self.session)
|
|
1363
1357
|
except EntityExistsError:
|
|
1364
1358
|
if getattr(statement, "if_not_exists", False) is False:
|
|
1365
1359
|
raise
|
|
@@ -50,29 +50,26 @@ def get_table_alias(table_obj, default_db_name):
|
|
|
50
50
|
|
|
51
51
|
def get_fill_param_fnc(steps_data):
|
|
52
52
|
def fill_params(node, callstack=None, **kwargs):
|
|
53
|
-
if isinstance(node, Parameter):
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
rs = steps_data[node.value.step_num]
|
|
74
|
-
items = [Constant(i) for i in rs.get_column_values(col_idx=0)]
|
|
75
|
-
return Tuple(items)
|
|
53
|
+
if not isinstance(node, Parameter):
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
rs = steps_data[node.value.step_num]
|
|
57
|
+
items = [Constant(i) for i in rs.get_column_values(col_idx=0)]
|
|
58
|
+
|
|
59
|
+
is_single_item = True
|
|
60
|
+
if callstack:
|
|
61
|
+
node_prev = callstack[0]
|
|
62
|
+
if isinstance(node_prev, BinaryOperation):
|
|
63
|
+
# Check case: 'something IN Parameter()'
|
|
64
|
+
if node_prev.op.lower() == "in" and node_prev.args[1] is node:
|
|
65
|
+
is_single_item = False
|
|
66
|
+
|
|
67
|
+
if is_single_item and len(items) == 1:
|
|
68
|
+
# extract one value for option 'col=(subselect)'
|
|
69
|
+
node = items[0]
|
|
70
|
+
else:
|
|
71
|
+
node = Tuple(items)
|
|
72
|
+
return node
|
|
76
73
|
|
|
77
74
|
return fill_params
|
|
78
75
|
|
|
@@ -115,7 +112,7 @@ class FetchDataframeStepCall(BaseStepCall):
|
|
|
115
112
|
|
|
116
113
|
# if query registered, set progress
|
|
117
114
|
if self.sql_query.run_query is not None:
|
|
118
|
-
self.sql_query.run_query.set_progress(df
|
|
115
|
+
self.sql_query.run_query.set_progress(processed_rows=len(df))
|
|
119
116
|
return ResultSet.from_df(
|
|
120
117
|
df,
|
|
121
118
|
table_name=table_alias[1],
|
|
@@ -97,6 +97,7 @@ class FetchDataframePartitionCall(BaseStepCall):
|
|
|
97
97
|
for df in run_query.get_partitions(self.dn, self, query):
|
|
98
98
|
try:
|
|
99
99
|
sub_data = self.exec_sub_steps(df)
|
|
100
|
+
run_query.set_progress(processed_rows=len(df))
|
|
100
101
|
results.append(sub_data)
|
|
101
102
|
except Exception as e:
|
|
102
103
|
if on_error == "skip":
|
|
@@ -175,17 +176,22 @@ class FetchDataframePartitionCall(BaseStepCall):
|
|
|
175
176
|
# split into chunks and send to workers
|
|
176
177
|
futures = []
|
|
177
178
|
for df2 in split_data_frame(df, partition_size):
|
|
178
|
-
futures.append(executor.submit(self.exec_sub_steps, df2))
|
|
179
|
+
futures.append([executor.submit(self.exec_sub_steps, df2), len(df2)])
|
|
179
180
|
|
|
180
|
-
|
|
181
|
+
error = None
|
|
182
|
+
for future, rows_count in futures:
|
|
181
183
|
try:
|
|
182
184
|
results.append(future.result())
|
|
185
|
+
run_query.set_progress(processed_rows=rows_count)
|
|
183
186
|
except Exception as e:
|
|
184
187
|
if on_error == "skip":
|
|
185
188
|
logger.error(e)
|
|
186
189
|
else:
|
|
187
190
|
executor.shutdown()
|
|
188
|
-
|
|
191
|
+
error = e
|
|
192
|
+
|
|
193
|
+
if error:
|
|
194
|
+
raise error
|
|
189
195
|
if self.sql_query.stop_event is not None and self.sql_query.stop_event.is_set():
|
|
190
196
|
executor.shutdown()
|
|
191
197
|
raise RuntimeError("Query is interrupted")
|
|
@@ -2,7 +2,15 @@ from collections import defaultdict
|
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
|
-
from mindsdb_sql_parser.ast import
|
|
5
|
+
from mindsdb_sql_parser.ast import (
|
|
6
|
+
Identifier,
|
|
7
|
+
Select,
|
|
8
|
+
Star,
|
|
9
|
+
Constant,
|
|
10
|
+
Function,
|
|
11
|
+
Variable,
|
|
12
|
+
BinaryOperation,
|
|
13
|
+
)
|
|
6
14
|
|
|
7
15
|
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import SERVER_VARIABLES
|
|
8
16
|
from mindsdb.api.executor.planner.step_result import Result
|
|
@@ -52,13 +60,8 @@ class SubSelectStepCall(BaseStepCall):
|
|
|
52
60
|
|
|
53
61
|
# inject previous step values
|
|
54
62
|
if isinstance(query, Select):
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
if isinstance(node, Parameter) and isinstance(node.value, Result):
|
|
58
|
-
prev_result = self.steps_data[node.value.step_num]
|
|
59
|
-
return Constant(prev_result.get_column_values(col_idx=0)[0])
|
|
60
|
-
|
|
61
|
-
query_traversal(query, inject_values)
|
|
63
|
+
fill_params = get_fill_param_fnc(self.steps_data)
|
|
64
|
+
query_traversal(query, fill_params)
|
|
62
65
|
|
|
63
66
|
df = result.to_df()
|
|
64
67
|
res = query_df(df, query, session=self.session)
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
from mindsdb_sql_parser.ast import Identifier, Function, Constant, BinaryOperation
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def adapt_char_fn(node: Function) -> Function | None:
|
|
5
|
+
"""Replace MySQL's multy-arg CHAR call to chain of DuckDB's CHR calls
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
CHAR(77, 78, 79) => CHR(77) || CHR(78) || CHR(79)
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
node (Function): Function node to adapt
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Function | None: Adapted function node
|
|
15
|
+
"""
|
|
16
|
+
if len(node.args) == 1:
|
|
17
|
+
node.op = "chr"
|
|
18
|
+
return node
|
|
19
|
+
|
|
20
|
+
acc = None
|
|
21
|
+
for arg in node.args:
|
|
22
|
+
fn = Function(op="chr", args=[arg])
|
|
23
|
+
if acc is None:
|
|
24
|
+
acc = fn
|
|
25
|
+
continue
|
|
26
|
+
acc = BinaryOperation("||", args=[acc, fn])
|
|
27
|
+
|
|
28
|
+
acc.parentheses = True
|
|
29
|
+
acc.alias = node.alias
|
|
30
|
+
return acc
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def adapt_locate_fn(node: Function) -> Function | None:
|
|
34
|
+
"""Replace MySQL's LOCATE (or INSTR) call to DuckDB's STRPOS call
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
LOCATE('bar', 'foobarbar') => STRPOS('foobarbar', 'bar')
|
|
38
|
+
INSTR('foobarbar', 'bar') => STRPOS('foobarbar', 'bar')
|
|
39
|
+
LOCATE('bar', 'foobarbar', 3) => ValueError (there is no analogue in DuckDB)
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
node (Function): Function node to adapt
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Function | None: Adapted function node
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
ValueError: If the function has 3 arguments
|
|
49
|
+
"""
|
|
50
|
+
if len(node.args) == 3:
|
|
51
|
+
raise ValueError("MySQL LOCATE function with 3 arguments is not supported")
|
|
52
|
+
if node.op == "locate":
|
|
53
|
+
node.args = [node.args[1], node.args[0]]
|
|
54
|
+
elif node.op == "insrt":
|
|
55
|
+
node.args = [node.args[0], node.args[1]]
|
|
56
|
+
node.op = "strpos"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def adapt_unhex_fn(node: Function) -> None:
|
|
60
|
+
"""Check MySQL's UNHEX function call arguments to ensure they are strings,
|
|
61
|
+
because DuckDB's UNHEX accepts only string arguments, while MySQL's UNHEX can accept integer arguments.
|
|
62
|
+
NOTE: if return dataframe from duckdb then unhex values are array - this may be an issue
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
node (Function): Function node to adapt
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
None
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
ValueError: If the function argument is not a string
|
|
72
|
+
"""
|
|
73
|
+
for arg in node.args:
|
|
74
|
+
if not isinstance(arg, (str, bytes)):
|
|
75
|
+
raise ValueError("MySQL UNHEX function argument must be a string")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def adapt_format_fn(node: Function) -> None:
|
|
79
|
+
"""Adapt MySQL's FORMAT function to DuckDB's FORMAT function
|
|
80
|
+
|
|
81
|
+
Example:
|
|
82
|
+
FORMAT(1234567.89, 0) => FORMAT('{:,.0f}', 1234567.89)
|
|
83
|
+
FORMAT(1234567.89, 2) => FORMAT('{:,.2f}', 1234567.89)
|
|
84
|
+
FORMAT(name, 2) => FORMAT('{:,.2f}', name)
|
|
85
|
+
FORMAT('{:.2f}', 1234567.89) => FORMAT('{:,.2f}', 1234567.89) # no changes for original style
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
node (Function): Function node to adapt
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
None
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
ValueError: If MySQL's function has 3rd 'locale' argument, like FORMAT(12332.2, 2, 'de_DE')
|
|
95
|
+
"""
|
|
96
|
+
match node.args[0], node.args[1]:
|
|
97
|
+
case Constant(value=(int() | float())), Constant(value=int()):
|
|
98
|
+
...
|
|
99
|
+
case Identifier(), Constant(value=int()):
|
|
100
|
+
...
|
|
101
|
+
case _:
|
|
102
|
+
return node
|
|
103
|
+
|
|
104
|
+
if len(node.args) > 2:
|
|
105
|
+
raise ValueError("'locale' argument of 'format' function is not supported")
|
|
106
|
+
decimal_places = node.args[1].value
|
|
107
|
+
|
|
108
|
+
if isinstance(node.args[0], Constant):
|
|
109
|
+
node.args[1].value = node.args[0].value
|
|
110
|
+
node.args[0].value = f"{{:,.{decimal_places}f}}"
|
|
111
|
+
else:
|
|
112
|
+
node.args[1] = node.args[0]
|
|
113
|
+
node.args[0] = Constant(f"{{:,.{decimal_places}f}}")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def adapt_sha2_fn(node: Function) -> None:
|
|
117
|
+
"""Adapt MySQL's SHA2 function to DuckDB's SHA256 function
|
|
118
|
+
|
|
119
|
+
Example:
|
|
120
|
+
SHA2('test', 256) => SHA256('test')
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
node (Function): Function node to adapt
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
None
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
ValueError: If the function has more than 1 argument or the argument is not 256
|
|
130
|
+
"""
|
|
131
|
+
if len(node.args) > 1 and node.args[1].value != 256:
|
|
132
|
+
raise ValueError("Only sha256 is supported")
|
|
133
|
+
node.op = "sha256"
|
|
134
|
+
node.args = [node.args[0]]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def adapt_length_fn(node: Function) -> None:
|
|
138
|
+
"""Adapt MySQL's LENGTH function to DuckDB's STRLEN function
|
|
139
|
+
NOTE: duckdb also have LENGTH, therefore it can not be used
|
|
140
|
+
|
|
141
|
+
Example:
|
|
142
|
+
LENGTH('test') => STRLEN('test')
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
node (Function): Function node to adapt
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
None
|
|
149
|
+
"""
|
|
150
|
+
node.op = "strlen"
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def adapt_regexp_substr_fn(node: Function) -> None:
|
|
154
|
+
"""Adapt MySQL's REGEXP_SUBSTR function to DuckDB's REGEXP_EXTRACT function
|
|
155
|
+
|
|
156
|
+
Example:
|
|
157
|
+
REGEXP_SUBSTR('foobarbar', 'bar', 1, 1) => REGEXP_EXTRACT('foobarbar', 'bar')
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
node (Function): Function node to adapt
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
None
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
ValueError: If the function has more than 2 arguments or 3rd or 4th argument is not 1
|
|
167
|
+
"""
|
|
168
|
+
if (
|
|
169
|
+
len(node.args) == 3
|
|
170
|
+
and node.args[2].value != 1
|
|
171
|
+
or len(node.args) == 4
|
|
172
|
+
and (node.args[3].value != 1 or node.args[2].value != 1)
|
|
173
|
+
or len(node.args) > 4
|
|
174
|
+
):
|
|
175
|
+
raise ValueError("Only 2 arguments are supported for REGEXP_SUBSTR function")
|
|
176
|
+
node.args = node.args[:2]
|
|
177
|
+
node.op = "regexp_extract"
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def adapt_substring_index_fn(node: Function) -> BinaryOperation | Function:
|
|
181
|
+
"""Adapt MySQL's SUBSTRING_INDEX function to DuckDB's SPLIT_PART function
|
|
182
|
+
|
|
183
|
+
Example:
|
|
184
|
+
SUBSTRING_INDEX('a.b.c.d', '.', 1) => SPLIT_PART('a.b.c.d', '.', 1)
|
|
185
|
+
SUBSTRING_INDEX('a.b.c.d', '.', 2) => CONCAT_WS('.', SPLIT_PART('a.b.c.d', '.', 1), SPLIT_PART('a.b.c.d', '.', 2))
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
node (Function): Function node to adapt
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
BinaryOperation | Function: Binary operation node or function node
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
ValueError: If the function has more than 3 arguments or the 3rd argument is not 1
|
|
195
|
+
"""
|
|
196
|
+
if len(node.args[1].value) > 1:
|
|
197
|
+
raise ValueError("Only one car in separator")
|
|
198
|
+
|
|
199
|
+
if node.args[2].value == 1:
|
|
200
|
+
node.op = "split_part"
|
|
201
|
+
return node
|
|
202
|
+
|
|
203
|
+
acc = [node.args[1]]
|
|
204
|
+
for i in range(node.args[2].value):
|
|
205
|
+
fn = Function(op="split_part", args=[node.args[0], node.args[1], Constant(i + 1)])
|
|
206
|
+
acc.append(fn)
|
|
207
|
+
|
|
208
|
+
acc = Function(op="concat_ws", args=acc)
|
|
209
|
+
acc.alias = node.alias
|
|
210
|
+
return acc
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def adapt_curtime_fn(node: Function) -> BinaryOperation:
|
|
214
|
+
"""Adapt MySQL's CURTIME function to DuckDB's GET_CURRENT_TIME function.
|
|
215
|
+
To get the same type as MySQL's CURTIME function, we need to cast the result to time type.
|
|
216
|
+
|
|
217
|
+
Example:
|
|
218
|
+
CURTIME() => GET_CURRENT_TIME()::time
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
node (Function): Function node to adapt
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
BinaryOperation: Binary operation node
|
|
225
|
+
"""
|
|
226
|
+
return BinaryOperation("::", args=[Function(op="get_current_time", args=[]), Identifier("time")], alias=node.alias)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def adapt_timestampdiff_fn(node: Function) -> None:
|
|
230
|
+
"""Adapt MySQL's TIMESTAMPDIFF function to DuckDB's DATE_DIFF function
|
|
231
|
+
NOTE: Looks like cast string args to timestamp works in most cases, but there may be some exceptions.
|
|
232
|
+
|
|
233
|
+
Example:
|
|
234
|
+
TIMESTAMPDIFF(YEAR, '2000-02-01', '2003-05-01') => DATE_DIFF('year', timestamp '2000-02-01', timestamp '2003-05-01')
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
node (Function): Function node to adapt
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
None
|
|
241
|
+
"""
|
|
242
|
+
node.op = "date_diff"
|
|
243
|
+
node.args[0] = Constant(node.args[0].parts[0])
|
|
244
|
+
node.args[1] = BinaryOperation(" ", args=[Identifier("timestamp"), node.args[1]])
|
|
245
|
+
node.args[2] = BinaryOperation(" ", args=[Identifier("timestamp"), node.args[2]])
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def adapt_extract_fn(node: Function) -> None:
|
|
249
|
+
"""Adapt MySQL's EXTRACT function to DuckDB's EXTRACT function
|
|
250
|
+
TODO: multi-part args, like YEAR_MONTH, is not supported yet
|
|
251
|
+
NOTE: Looks like adding 'timestamp' works in most cases, but there may be some exceptions.
|
|
252
|
+
|
|
253
|
+
Example:
|
|
254
|
+
EXTRACT(YEAR FROM '2000-02-01') => EXTRACT('year' from timestamp '2000-02-01')
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
node (Function): Function node to adapt
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
None
|
|
261
|
+
"""
|
|
262
|
+
node.args[0] = Constant(node.args[0].parts[0])
|
|
263
|
+
if not isinstance(node.from_arg, Identifier):
|
|
264
|
+
node.from_arg = BinaryOperation(" ", args=[Identifier("timestamp"), node.from_arg])
|
|
@@ -14,6 +14,19 @@ from mindsdb.utilities.exception import format_db_error_message
|
|
|
14
14
|
from mindsdb.utilities.functions import resolve_table_identifier, resolve_model_identifier
|
|
15
15
|
from mindsdb.utilities.json_encoder import CustomJSONEncoder
|
|
16
16
|
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
17
|
+
from mindsdb.api.executor.utilities.mysql_to_duckdb_functions import (
|
|
18
|
+
adapt_char_fn,
|
|
19
|
+
adapt_locate_fn,
|
|
20
|
+
adapt_unhex_fn,
|
|
21
|
+
adapt_format_fn,
|
|
22
|
+
adapt_sha2_fn,
|
|
23
|
+
adapt_length_fn,
|
|
24
|
+
adapt_regexp_substr_fn,
|
|
25
|
+
adapt_substring_index_fn,
|
|
26
|
+
adapt_curtime_fn,
|
|
27
|
+
adapt_timestampdiff_fn,
|
|
28
|
+
adapt_extract_fn,
|
|
29
|
+
)
|
|
17
30
|
|
|
18
31
|
logger = log.getLogger(__name__)
|
|
19
32
|
|
|
@@ -185,6 +198,23 @@ def query_df(df, query, session=None):
|
|
|
185
198
|
if isinstance(node, Function):
|
|
186
199
|
fnc_name = node.op.lower()
|
|
187
200
|
|
|
201
|
+
mysql_to_duck_fn_map = {
|
|
202
|
+
"char": adapt_char_fn,
|
|
203
|
+
"locate": adapt_locate_fn,
|
|
204
|
+
"insrt": adapt_locate_fn,
|
|
205
|
+
"unhex": adapt_unhex_fn,
|
|
206
|
+
"format": adapt_format_fn,
|
|
207
|
+
"sha2": adapt_sha2_fn,
|
|
208
|
+
"length": adapt_length_fn,
|
|
209
|
+
"regexp_substr": adapt_regexp_substr_fn,
|
|
210
|
+
"substring_index": adapt_substring_index_fn,
|
|
211
|
+
"curtime": adapt_curtime_fn,
|
|
212
|
+
"timestampdiff": adapt_timestampdiff_fn,
|
|
213
|
+
"extract": adapt_extract_fn,
|
|
214
|
+
}
|
|
215
|
+
if fnc_name in mysql_to_duck_fn_map:
|
|
216
|
+
return mysql_to_duck_fn_map[fnc_name](node)
|
|
217
|
+
|
|
188
218
|
if fnc_name == "database" and len(node.args) == 0:
|
|
189
219
|
if session is not None:
|
|
190
220
|
cur_db = session.database
|