MindsDB 25.7.3.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +11 -1
- mindsdb/api/a2a/common/server/server.py +16 -6
- mindsdb/api/executor/command_executor.py +215 -150
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
- mindsdb/api/executor/planner/plan_join.py +3 -0
- mindsdb/api/executor/planner/plan_join_ts.py +117 -100
- mindsdb/api/executor/planner/query_planner.py +1 -0
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
- mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
- mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
- mindsdb/api/executor/utilities/sql.py +30 -0
- mindsdb/api/http/initialize.py +18 -44
- mindsdb/api/http/namespaces/agents.py +23 -20
- mindsdb/api/http/namespaces/chatbots.py +83 -120
- mindsdb/api/http/namespaces/file.py +1 -1
- mindsdb/api/http/namespaces/jobs.py +38 -60
- mindsdb/api/http/namespaces/tree.py +69 -61
- mindsdb/api/http/namespaces/views.py +56 -72
- mindsdb/api/mcp/start.py +2 -0
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
- mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
- mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
- mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
- mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
- mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
- mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
- mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
- mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
- mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
- mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -77
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +5 -2
- mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
- mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
- mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
- mindsdb/integrations/handlers/salesforce_handler/constants.py +215 -0
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +141 -80
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +0 -1
- mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
- mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
- mindsdb/integrations/libs/llm/config.py +0 -14
- mindsdb/integrations/libs/llm/utils.py +0 -15
- mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
- mindsdb/integrations/utilities/files/file_reader.py +5 -19
- mindsdb/integrations/utilities/handler_utils.py +32 -12
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +246 -149
- mindsdb/interfaces/agents/constants.py +0 -1
- mindsdb/interfaces/agents/langchain_agent.py +11 -6
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +4 -4
- mindsdb/interfaces/database/database.py +38 -13
- mindsdb/interfaces/database/integrations.py +20 -5
- mindsdb/interfaces/database/projects.py +174 -23
- mindsdb/interfaces/database/views.py +86 -60
- mindsdb/interfaces/jobs/jobs_controller.py +103 -110
- mindsdb/interfaces/knowledge_base/controller.py +33 -6
- mindsdb/interfaces/knowledge_base/evaluate.py +2 -1
- mindsdb/interfaces/knowledge_base/executor.py +24 -0
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
- mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
- mindsdb/interfaces/query_context/context_controller.py +111 -145
- mindsdb/interfaces/skills/skills_controller.py +18 -6
- mindsdb/interfaces/storage/db.py +40 -6
- mindsdb/interfaces/variables/variables_controller.py +8 -15
- mindsdb/utilities/config.py +5 -3
- mindsdb/utilities/fs.py +54 -17
- mindsdb/utilities/functions.py +72 -60
- mindsdb/utilities/log.py +38 -6
- mindsdb/utilities/ps.py +7 -7
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/METADATA +282 -268
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/RECORD +94 -92
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
- mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
- mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
- mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
- mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
- /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/top_level.txt +0 -0
|
@@ -26,38 +26,28 @@ from .base import BaseStepCall
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def get_preditor_alias(step, mindsdb_database):
|
|
29
|
-
predictor_name =
|
|
30
|
-
predictor_alias =
|
|
29
|
+
predictor_name = ".".join(step.predictor.parts)
|
|
30
|
+
predictor_alias = ".".join(step.predictor.alias.parts) if step.predictor.alias is not None else predictor_name
|
|
31
31
|
return (mindsdb_database, predictor_name, predictor_alias)
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
class ApplyPredictorBaseCall(BaseStepCall):
|
|
35
|
-
|
|
36
35
|
def apply_predictor(self, project_name, predictor_name, df, version, params):
|
|
37
36
|
# is it an agent?
|
|
38
37
|
agent = self.session.agents_controller.get_agent(predictor_name, project_name)
|
|
39
38
|
if agent is not None:
|
|
40
|
-
|
|
41
|
-
messages = df.to_dict('records')
|
|
39
|
+
messages = df.to_dict("records")
|
|
42
40
|
predictions = self.session.agents_controller.get_completion(
|
|
43
|
-
agent,
|
|
44
|
-
messages=messages,
|
|
45
|
-
project_name=project_name,
|
|
41
|
+
agent, messages=messages, project_name=project_name, params=params
|
|
46
42
|
)
|
|
47
43
|
|
|
48
44
|
else:
|
|
49
45
|
project_datanode = self.session.datahub.get(project_name)
|
|
50
|
-
predictions = project_datanode.predict(
|
|
51
|
-
model_name=predictor_name,
|
|
52
|
-
df=df,
|
|
53
|
-
version=version,
|
|
54
|
-
params=params
|
|
55
|
-
)
|
|
46
|
+
predictions = project_datanode.predict(model_name=predictor_name, df=df, version=version, params=params)
|
|
56
47
|
return predictions
|
|
57
48
|
|
|
58
49
|
|
|
59
50
|
class ApplyPredictorRowStepCall(ApplyPredictorBaseCall):
|
|
60
|
-
|
|
61
51
|
bind = ApplyPredictorRowStep
|
|
62
52
|
|
|
63
53
|
def call(self, step):
|
|
@@ -89,7 +79,7 @@ class ApplyPredictorRowStepCall(ApplyPredictorBaseCall):
|
|
|
89
79
|
for k, v in where_data.items():
|
|
90
80
|
predictions[k] = v
|
|
91
81
|
|
|
92
|
-
table_name = get_preditor_alias(step, self.context.get(
|
|
82
|
+
table_name = get_preditor_alias(step, self.context.get("database"))
|
|
93
83
|
|
|
94
84
|
if len(predictions) == 0:
|
|
95
85
|
columns_names = project_datanode.get_table_columns_names(predictor_name)
|
|
@@ -100,12 +90,11 @@ class ApplyPredictorRowStepCall(ApplyPredictorBaseCall):
|
|
|
100
90
|
database=table_name[0],
|
|
101
91
|
table_name=table_name[1],
|
|
102
92
|
table_alias=table_name[2],
|
|
103
|
-
is_prediction=True
|
|
93
|
+
is_prediction=True,
|
|
104
94
|
)
|
|
105
95
|
|
|
106
96
|
|
|
107
97
|
class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
108
|
-
|
|
109
98
|
bind = ApplyPredictorStep
|
|
110
99
|
|
|
111
100
|
def call(self, step):
|
|
@@ -115,20 +104,20 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
115
104
|
params = step.params or {}
|
|
116
105
|
|
|
117
106
|
# adding __mindsdb_row_id, use first table if exists
|
|
118
|
-
if len(data.find_columns(
|
|
107
|
+
if len(data.find_columns("__mindsdb_row_id")) == 0:
|
|
119
108
|
table = data.get_tables()[0] if len(data.get_tables()) > 0 else None
|
|
120
109
|
|
|
121
110
|
row_id_col = Column(
|
|
122
|
-
name=
|
|
123
|
-
database=table[
|
|
124
|
-
table_name=table[
|
|
125
|
-
table_alias=table[
|
|
111
|
+
name="__mindsdb_row_id",
|
|
112
|
+
database=table["database"] if table is not None else None,
|
|
113
|
+
table_name=table["table_name"] if table is not None else None,
|
|
114
|
+
table_alias=table["table_alias"] if table is not None else None,
|
|
126
115
|
)
|
|
127
116
|
|
|
128
|
-
row_id = self.context.get(
|
|
117
|
+
row_id = self.context.get("row_id")
|
|
129
118
|
values = range(row_id, row_id + data.length())
|
|
130
119
|
data.add_column(row_id_col, values)
|
|
131
|
-
self.context[
|
|
120
|
+
self.context["row_id"] += data.length()
|
|
132
121
|
|
|
133
122
|
project_name = step.namespace
|
|
134
123
|
predictor_name = step.predictor.parts[0]
|
|
@@ -143,47 +132,46 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
143
132
|
data.set_column_values(k, v)
|
|
144
133
|
|
|
145
134
|
predictor_metadata = {}
|
|
146
|
-
for pm in self.context[
|
|
147
|
-
if pm[
|
|
135
|
+
for pm in self.context["predictor_metadata"]:
|
|
136
|
+
if pm["name"] == predictor_name and pm["integration_name"].lower() == project_name:
|
|
148
137
|
predictor_metadata = pm
|
|
149
138
|
break
|
|
150
|
-
is_timeseries = predictor_metadata[
|
|
139
|
+
is_timeseries = predictor_metadata["timeseries"]
|
|
151
140
|
_mdb_forecast_offset = None
|
|
152
141
|
if is_timeseries:
|
|
153
|
-
if
|
|
142
|
+
if "> LATEST" in self.context["query_str"]:
|
|
154
143
|
# stream mode -- if > LATEST, forecast starts on inferred next timestamp
|
|
155
144
|
_mdb_forecast_offset = 1
|
|
156
|
-
elif
|
|
145
|
+
elif "= LATEST" in self.context["query_str"]:
|
|
157
146
|
# override: when = LATEST, forecast starts on last provided timestamp instead of inferred next time
|
|
158
147
|
_mdb_forecast_offset = 0
|
|
159
148
|
else:
|
|
160
149
|
# normal mode -- emit a forecast ($HORIZON data points on each) for each provided timestamp
|
|
161
|
-
params[
|
|
150
|
+
params["force_ts_infer"] = True
|
|
162
151
|
_mdb_forecast_offset = None
|
|
163
152
|
|
|
164
|
-
data.add_column(Column(name=
|
|
153
|
+
data.add_column(Column(name="__mdb_forecast_offset"), _mdb_forecast_offset)
|
|
165
154
|
|
|
166
|
-
table_name = get_preditor_alias(step, self.context[
|
|
155
|
+
table_name = get_preditor_alias(step, self.context["database"])
|
|
167
156
|
|
|
168
157
|
project_datanode = self.session.datahub.get(project_name)
|
|
169
158
|
if len(data) == 0:
|
|
170
|
-
columns_names = project_datanode.get_table_columns_names(predictor_name) + [
|
|
159
|
+
columns_names = project_datanode.get_table_columns_names(predictor_name) + ["__mindsdb_row_id"]
|
|
171
160
|
result = ResultSet(is_prediction=True)
|
|
172
161
|
for column_name in columns_names:
|
|
173
|
-
result.add_column(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
))
|
|
162
|
+
result.add_column(
|
|
163
|
+
Column(
|
|
164
|
+
name=column_name, database=table_name[0], table_name=table_name[1], table_alias=table_name[2]
|
|
165
|
+
)
|
|
166
|
+
)
|
|
179
167
|
else:
|
|
180
|
-
predictor_id = predictor_metadata[
|
|
168
|
+
predictor_id = predictor_metadata["id"]
|
|
181
169
|
table_df = data.to_df()
|
|
182
170
|
|
|
183
171
|
if self.session.predictor_cache is not False:
|
|
184
|
-
key = f
|
|
172
|
+
key = f"{predictor_name}_{predictor_id}_{dataframe_checksum(table_df)}"
|
|
185
173
|
|
|
186
|
-
predictor_cache = get_cache(
|
|
174
|
+
predictor_cache = get_cache("predict")
|
|
187
175
|
predictions = predictor_cache.get(key)
|
|
188
176
|
else:
|
|
189
177
|
predictions = None
|
|
@@ -221,7 +209,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
221
209
|
|
|
222
210
|
# apply filter
|
|
223
211
|
if is_timeseries:
|
|
224
|
-
pred_data = predictions.to_dict(orient=
|
|
212
|
+
pred_data = predictions.to_dict(orient="records")
|
|
225
213
|
where_data = list(data.get_records())
|
|
226
214
|
pred_data = self.apply_ts_filter(pred_data, where_data, step, predictor_metadata)
|
|
227
215
|
predictions = pd.DataFrame(pred_data)
|
|
@@ -231,37 +219,33 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
231
219
|
database=table_name[0],
|
|
232
220
|
table_name=table_name[1],
|
|
233
221
|
table_alias=table_name[2],
|
|
234
|
-
is_prediction=True
|
|
222
|
+
is_prediction=True,
|
|
235
223
|
)
|
|
236
224
|
|
|
237
225
|
return result
|
|
238
226
|
|
|
239
227
|
def apply_ts_filter(self, predictor_data, table_data, step, predictor_metadata):
|
|
240
|
-
|
|
241
228
|
if step.output_time_filter is None:
|
|
242
229
|
# no filter, exit
|
|
243
230
|
return predictor_data
|
|
244
231
|
|
|
245
232
|
# apply filter
|
|
246
|
-
group_cols = predictor_metadata[
|
|
247
|
-
order_col = predictor_metadata[
|
|
233
|
+
group_cols = predictor_metadata["group_by_columns"]
|
|
234
|
+
order_col = predictor_metadata["order_by_column"]
|
|
248
235
|
|
|
249
236
|
filter_args = step.output_time_filter.args
|
|
250
237
|
filter_op = step.output_time_filter.op
|
|
251
238
|
|
|
252
239
|
# filter field must be order column
|
|
253
|
-
if not (
|
|
254
|
-
isinstance(filter_args[0], Identifier)
|
|
255
|
-
and filter_args[0].parts[-1] == order_col
|
|
256
|
-
):
|
|
240
|
+
if not (isinstance(filter_args[0], Identifier) and filter_args[0].parts[-1] == order_col):
|
|
257
241
|
# exit otherwise
|
|
258
242
|
return predictor_data
|
|
259
243
|
|
|
260
244
|
def get_date_format(samples):
|
|
261
245
|
# Try common formats first with explicit patterns
|
|
262
246
|
for date_format, pattern in (
|
|
263
|
-
(
|
|
264
|
-
(
|
|
247
|
+
("%Y-%m-%d", r"[\d]{4}-[\d]{2}-[\d]{2}"),
|
|
248
|
+
("%Y-%m-%d %H:%M:%S", r"[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}"),
|
|
265
249
|
# ('%Y-%m-%d %H:%M:%S%z', r'[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}\+[\d]{2}:[\d]{2}'),
|
|
266
250
|
# ('%Y', '[\d]{4}')
|
|
267
251
|
):
|
|
@@ -281,6 +265,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
281
265
|
# Parse the first sample to get its format
|
|
282
266
|
# The import is heavy, so we do it here on-demand
|
|
283
267
|
import dateparser
|
|
268
|
+
|
|
284
269
|
parsed_date = dateparser.parse(samples[0])
|
|
285
270
|
if parsed_date is None:
|
|
286
271
|
raise ValueError("Could not parse date")
|
|
@@ -290,25 +275,21 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
290
275
|
if dateparser.parse(sample) is None:
|
|
291
276
|
raise ValueError("Inconsistent date formats in samples")
|
|
292
277
|
# Convert to strftime format based on the input
|
|
293
|
-
if re.search(r
|
|
294
|
-
return
|
|
295
|
-
return
|
|
278
|
+
if re.search(r"\d{2}:\d{2}:\d{2}", samples[0]):
|
|
279
|
+
return "%Y-%m-%d %H:%M:%S"
|
|
280
|
+
return "%Y-%m-%d"
|
|
296
281
|
except (ValueError, AttributeError):
|
|
297
282
|
# If dateparser fails, return a basic format as last resort
|
|
298
|
-
return
|
|
283
|
+
return "%Y-%m-%d"
|
|
299
284
|
|
|
300
|
-
model_types = predictor_metadata[
|
|
301
|
-
if model_types.get(order_col) in (
|
|
285
|
+
model_types = predictor_metadata["model_types"]
|
|
286
|
+
if model_types.get(order_col) in ("float", "integer"):
|
|
302
287
|
# convert strings to digits
|
|
303
|
-
fnc = {
|
|
304
|
-
'integer': int,
|
|
305
|
-
'float': float
|
|
306
|
-
}[model_types[order_col]]
|
|
288
|
+
fnc = {"integer": int, "float": float}[model_types[order_col]]
|
|
307
289
|
|
|
308
290
|
# convert predictor_data
|
|
309
291
|
if len(predictor_data) > 0:
|
|
310
292
|
if isinstance(predictor_data[0][order_col], str):
|
|
311
|
-
|
|
312
293
|
for row in predictor_data:
|
|
313
294
|
row[order_col] = fnc(row[order_col])
|
|
314
295
|
elif isinstance(predictor_data[0][order_col], dt.date):
|
|
@@ -318,7 +299,6 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
318
299
|
|
|
319
300
|
# convert predictor_data
|
|
320
301
|
if isinstance(table_data[0][order_col], str):
|
|
321
|
-
|
|
322
302
|
for row in table_data:
|
|
323
303
|
row[order_col] = fnc(row[order_col])
|
|
324
304
|
elif isinstance(table_data[0][order_col], dt.date):
|
|
@@ -327,18 +307,13 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
327
307
|
row[order_col] = fnc(row[order_col])
|
|
328
308
|
|
|
329
309
|
# convert args to date
|
|
330
|
-
samples = [
|
|
331
|
-
arg.value
|
|
332
|
-
for arg in filter_args
|
|
333
|
-
if isinstance(arg, Constant) and isinstance(arg.value, str)
|
|
334
|
-
]
|
|
310
|
+
samples = [arg.value for arg in filter_args if isinstance(arg, Constant) and isinstance(arg.value, str)]
|
|
335
311
|
if len(samples) > 0:
|
|
336
|
-
|
|
337
312
|
for arg in filter_args:
|
|
338
313
|
if isinstance(arg, Constant) and isinstance(arg.value, str):
|
|
339
314
|
arg.value = fnc(arg.value)
|
|
340
315
|
|
|
341
|
-
if model_types.get(order_col) in (
|
|
316
|
+
if model_types.get(order_col) in ("date", "datetime") or isinstance(predictor_data[0][order_col], pd.Timestamp): # noqa
|
|
342
317
|
# convert strings to date
|
|
343
318
|
# it is making side effect on original data by changing it but let it be
|
|
344
319
|
|
|
@@ -364,11 +339,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
364
339
|
_cast_samples(table_data, order_col)
|
|
365
340
|
|
|
366
341
|
# convert args to date
|
|
367
|
-
samples = [
|
|
368
|
-
arg.value
|
|
369
|
-
for arg in filter_args
|
|
370
|
-
if isinstance(arg, Constant) and isinstance(arg.value, str)
|
|
371
|
-
]
|
|
342
|
+
samples = [arg.value for arg in filter_args if isinstance(arg, Constant) and isinstance(arg.value, str)]
|
|
372
343
|
if len(samples) > 0:
|
|
373
344
|
date_format = get_date_format(samples)
|
|
374
345
|
|
|
@@ -380,7 +351,6 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
380
351
|
# first pass: get max values for Latest in table data
|
|
381
352
|
latest_vals = {}
|
|
382
353
|
if Latest() in filter_args:
|
|
383
|
-
|
|
384
354
|
for row in table_data:
|
|
385
355
|
if group_cols is None:
|
|
386
356
|
key = 0 # the same for any value
|
|
@@ -400,11 +370,11 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
400
370
|
data2.append(row)
|
|
401
371
|
elif isinstance(step.output_time_filter, BinaryOperation):
|
|
402
372
|
op_map = {
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
373
|
+
"<": "__lt__",
|
|
374
|
+
"<=": "__le__",
|
|
375
|
+
">": "__gt__",
|
|
376
|
+
">=": "__ge__",
|
|
377
|
+
"=": "__eq__",
|
|
408
378
|
}
|
|
409
379
|
arg = filter_args[1]
|
|
410
380
|
if isinstance(arg, Latest):
|
|
@@ -435,5 +405,4 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
435
405
|
|
|
436
406
|
|
|
437
407
|
class ApplyTimeseriesPredictorStepCall(ApplyPredictorStepCall):
|
|
438
|
-
|
|
439
408
|
bind = ApplyTimeseriesPredictorStep
|
|
@@ -50,29 +50,26 @@ def get_table_alias(table_obj, default_db_name):
|
|
|
50
50
|
|
|
51
51
|
def get_fill_param_fnc(steps_data):
|
|
52
52
|
def fill_params(node, callstack=None, **kwargs):
|
|
53
|
-
if isinstance(node, Parameter):
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
rs = steps_data[node.value.step_num]
|
|
74
|
-
items = [Constant(i) for i in rs.get_column_values(col_idx=0)]
|
|
75
|
-
return Tuple(items)
|
|
53
|
+
if not isinstance(node, Parameter):
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
rs = steps_data[node.value.step_num]
|
|
57
|
+
items = [Constant(i) for i in rs.get_column_values(col_idx=0)]
|
|
58
|
+
|
|
59
|
+
is_single_item = True
|
|
60
|
+
if callstack:
|
|
61
|
+
node_prev = callstack[0]
|
|
62
|
+
if isinstance(node_prev, BinaryOperation):
|
|
63
|
+
# Check case: 'something IN Parameter()'
|
|
64
|
+
if node_prev.op.lower() == "in" and node_prev.args[1] is node:
|
|
65
|
+
is_single_item = False
|
|
66
|
+
|
|
67
|
+
if is_single_item and len(items) == 1:
|
|
68
|
+
# extract one value for option 'col=(subselect)'
|
|
69
|
+
node = items[0]
|
|
70
|
+
else:
|
|
71
|
+
node = Tuple(items)
|
|
72
|
+
return node
|
|
76
73
|
|
|
77
74
|
return fill_params
|
|
78
75
|
|
|
@@ -115,7 +112,7 @@ class FetchDataframeStepCall(BaseStepCall):
|
|
|
115
112
|
|
|
116
113
|
# if query registered, set progress
|
|
117
114
|
if self.sql_query.run_query is not None:
|
|
118
|
-
self.sql_query.run_query.set_progress(df
|
|
115
|
+
self.sql_query.run_query.set_progress(processed_rows=len(df))
|
|
119
116
|
return ResultSet.from_df(
|
|
120
117
|
df,
|
|
121
118
|
table_name=table_alias[1],
|
|
@@ -97,6 +97,7 @@ class FetchDataframePartitionCall(BaseStepCall):
|
|
|
97
97
|
for df in run_query.get_partitions(self.dn, self, query):
|
|
98
98
|
try:
|
|
99
99
|
sub_data = self.exec_sub_steps(df)
|
|
100
|
+
run_query.set_progress(processed_rows=len(df))
|
|
100
101
|
results.append(sub_data)
|
|
101
102
|
except Exception as e:
|
|
102
103
|
if on_error == "skip":
|
|
@@ -175,17 +176,22 @@ class FetchDataframePartitionCall(BaseStepCall):
|
|
|
175
176
|
# split into chunks and send to workers
|
|
176
177
|
futures = []
|
|
177
178
|
for df2 in split_data_frame(df, partition_size):
|
|
178
|
-
futures.append(executor.submit(self.exec_sub_steps, df2))
|
|
179
|
+
futures.append([executor.submit(self.exec_sub_steps, df2), len(df2)])
|
|
179
180
|
|
|
180
|
-
|
|
181
|
+
error = None
|
|
182
|
+
for future, rows_count in futures:
|
|
181
183
|
try:
|
|
182
184
|
results.append(future.result())
|
|
185
|
+
run_query.set_progress(processed_rows=rows_count)
|
|
183
186
|
except Exception as e:
|
|
184
187
|
if on_error == "skip":
|
|
185
188
|
logger.error(e)
|
|
186
189
|
else:
|
|
187
190
|
executor.shutdown()
|
|
188
|
-
|
|
191
|
+
error = e
|
|
192
|
+
|
|
193
|
+
if error:
|
|
194
|
+
raise error
|
|
189
195
|
if self.sql_query.stop_event is not None and self.sql_query.stop_event.is_set():
|
|
190
196
|
executor.shutdown()
|
|
191
197
|
raise RuntimeError("Query is interrupted")
|
|
@@ -2,7 +2,15 @@ from collections import defaultdict
|
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
|
-
from mindsdb_sql_parser.ast import
|
|
5
|
+
from mindsdb_sql_parser.ast import (
|
|
6
|
+
Identifier,
|
|
7
|
+
Select,
|
|
8
|
+
Star,
|
|
9
|
+
Constant,
|
|
10
|
+
Function,
|
|
11
|
+
Variable,
|
|
12
|
+
BinaryOperation,
|
|
13
|
+
)
|
|
6
14
|
|
|
7
15
|
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import SERVER_VARIABLES
|
|
8
16
|
from mindsdb.api.executor.planner.step_result import Result
|
|
@@ -52,13 +60,8 @@ class SubSelectStepCall(BaseStepCall):
|
|
|
52
60
|
|
|
53
61
|
# inject previous step values
|
|
54
62
|
if isinstance(query, Select):
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
if isinstance(node, Parameter) and isinstance(node.value, Result):
|
|
58
|
-
prev_result = self.steps_data[node.value.step_num]
|
|
59
|
-
return Constant(prev_result.get_column_values(col_idx=0)[0])
|
|
60
|
-
|
|
61
|
-
query_traversal(query, inject_values)
|
|
63
|
+
fill_params = get_fill_param_fnc(self.steps_data)
|
|
64
|
+
query_traversal(query, fill_params)
|
|
62
65
|
|
|
63
66
|
df = result.to_df()
|
|
64
67
|
res = query_df(df, query, session=self.session)
|