MindsDB 25.7.2.0__py3-none-any.whl → 25.7.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +1 -1
- mindsdb/api/a2a/common/server/server.py +16 -6
- mindsdb/api/executor/command_executor.py +213 -137
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
- mindsdb/api/executor/planner/plan_join.py +3 -0
- mindsdb/api/executor/planner/plan_join_ts.py +117 -100
- mindsdb/api/executor/planner/query_planner.py +1 -0
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
- mindsdb/api/http/initialize.py +16 -43
- mindsdb/api/http/namespaces/agents.py +24 -21
- mindsdb/api/http/namespaces/chatbots.py +83 -120
- mindsdb/api/http/namespaces/file.py +1 -1
- mindsdb/api/http/namespaces/jobs.py +38 -60
- mindsdb/api/http/namespaces/tree.py +69 -61
- mindsdb/api/mcp/start.py +2 -0
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
- mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
- mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
- mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -76
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +16 -3
- mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
- mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
- mindsdb/integrations/handlers/salesforce_handler/constants.py +208 -0
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +142 -81
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -4
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
- mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
- mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
- mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
- mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
- mindsdb/integrations/utilities/handler_utils.py +32 -12
- mindsdb/interfaces/agents/agents_controller.py +169 -110
- mindsdb/interfaces/agents/langchain_agent.py +10 -3
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -8
- mindsdb/interfaces/database/database.py +38 -13
- mindsdb/interfaces/database/integrations.py +20 -5
- mindsdb/interfaces/database/projects.py +63 -16
- mindsdb/interfaces/database/views.py +86 -60
- mindsdb/interfaces/jobs/jobs_controller.py +103 -110
- mindsdb/interfaces/knowledge_base/controller.py +33 -5
- mindsdb/interfaces/knowledge_base/evaluate.py +53 -9
- mindsdb/interfaces/knowledge_base/executor.py +24 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
- mindsdb/interfaces/query_context/context_controller.py +100 -133
- mindsdb/interfaces/skills/skills_controller.py +18 -6
- mindsdb/interfaces/storage/db.py +40 -6
- mindsdb/interfaces/variables/variables_controller.py +8 -15
- mindsdb/utilities/config.py +3 -3
- mindsdb/utilities/functions.py +72 -60
- mindsdb/utilities/log.py +38 -6
- mindsdb/utilities/ps.py +7 -7
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/METADATA +262 -263
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/RECORD +69 -68
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.2.0.dist-info → mindsdb-25.7.4.0.dist-info}/top_level.txt +0 -0
|
@@ -217,6 +217,17 @@ class KnowledgeBaseQueryExecutor:
|
|
|
217
217
|
f'Operator "{content_condition.op}" is not supported for condition: {content_condition}'
|
|
218
218
|
)
|
|
219
219
|
|
|
220
|
+
@staticmethod
|
|
221
|
+
def to_include_content(content_condition: BinaryOperation) -> List[str]:
|
|
222
|
+
"""
|
|
223
|
+
Handles positive conditions for content. Returns list of content values
|
|
224
|
+
"""
|
|
225
|
+
if content_condition.op == "IN":
|
|
226
|
+
return [item.value for item in content_condition.args[1].items]
|
|
227
|
+
|
|
228
|
+
elif content_condition.op in ("=", "LIKE"):
|
|
229
|
+
return [content_condition.args[1].value]
|
|
230
|
+
|
|
220
231
|
def to_excluded_ids(
|
|
221
232
|
self, content_condition: BinaryOperation, other_conditions: List[BinaryOperation]
|
|
222
233
|
) -> Optional[List[str]]:
|
|
@@ -290,11 +301,17 @@ class KnowledgeBaseQueryExecutor:
|
|
|
290
301
|
if len(content_filters) > 0:
|
|
291
302
|
content_filters2 = []
|
|
292
303
|
exclude_ids = set()
|
|
304
|
+
include_contents = set()
|
|
293
305
|
# exclude content conditions
|
|
294
306
|
for condition in content_filters:
|
|
295
307
|
ids = self.to_excluded_ids(condition, other_filters)
|
|
296
308
|
if ids is not None:
|
|
297
309
|
exclude_ids.update(ids)
|
|
310
|
+
continue
|
|
311
|
+
contents = self.to_include_content(condition)
|
|
312
|
+
if contents is not None:
|
|
313
|
+
include_contents.update(contents)
|
|
314
|
+
continue
|
|
298
315
|
else:
|
|
299
316
|
# keep origin content filter
|
|
300
317
|
content_filters2.append(condition)
|
|
@@ -305,6 +322,13 @@ class KnowledgeBaseQueryExecutor:
|
|
|
305
322
|
condition = BinaryOperation(op="NOT IN", args=[Identifier(self.id_column), Tuple(values)])
|
|
306
323
|
other_filters.append(condition)
|
|
307
324
|
# execute content filters
|
|
325
|
+
if include_contents:
|
|
326
|
+
content = " AND ".join(include_contents)
|
|
327
|
+
result = self.execute_content_condition(
|
|
328
|
+
BinaryOperation(op="=", args=[Identifier(self.content_column), Constant(content)]),
|
|
329
|
+
other_filters,
|
|
330
|
+
)
|
|
331
|
+
results.append(result)
|
|
308
332
|
for condition in content_filters2:
|
|
309
333
|
result = self.execute_content_condition(condition, other_filters)
|
|
310
334
|
results.append(result)
|
|
@@ -54,12 +54,12 @@ class LLMClient:
|
|
|
54
54
|
|
|
55
55
|
self.client = module.Handler
|
|
56
56
|
|
|
57
|
-
def completion(self, messages: List[dict]) -> str:
|
|
57
|
+
def completion(self, messages: List[dict], json_output: bool = False) -> str:
|
|
58
58
|
"""
|
|
59
59
|
Call LLM completion and get response
|
|
60
60
|
"""
|
|
61
61
|
params = self.params
|
|
62
|
-
|
|
62
|
+
params["json_output"] = json_output
|
|
63
63
|
if self.provider in ("azure_openai", "openai"):
|
|
64
64
|
response = self.client.chat.completions.create(
|
|
65
65
|
model=params["model_name"],
|
|
@@ -69,6 +69,6 @@ class LLMClient:
|
|
|
69
69
|
else:
|
|
70
70
|
kwargs = params.copy()
|
|
71
71
|
model = kwargs.pop("model_name")
|
|
72
|
-
|
|
72
|
+
kwargs.pop("provider", None)
|
|
73
73
|
response = self.client.completion(self.provider, model=model, messages=messages, args=kwargs)
|
|
74
74
|
return response.choices[0].message.content
|
|
@@ -1,16 +1,17 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import html
|
|
3
|
+
import asyncio
|
|
1
4
|
from typing import List, Dict, Optional, Any
|
|
5
|
+
|
|
2
6
|
import pandas as pd
|
|
3
7
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
4
|
-
import
|
|
5
|
-
|
|
8
|
+
from langchain_core.documents import Document as LangchainDocument
|
|
6
9
|
|
|
7
10
|
from mindsdb.integrations.utilities.rag.splitters.file_splitter import (
|
|
8
11
|
FileSplitter,
|
|
9
12
|
FileSplitterConfig,
|
|
10
13
|
)
|
|
11
|
-
|
|
12
14
|
from mindsdb.interfaces.agents.langchain_agent import create_chat_model
|
|
13
|
-
|
|
14
15
|
from mindsdb.interfaces.knowledge_base.preprocessing.models import (
|
|
15
16
|
PreprocessingConfig,
|
|
16
17
|
ProcessedChunk,
|
|
@@ -21,7 +22,6 @@ from mindsdb.interfaces.knowledge_base.preprocessing.models import (
|
|
|
21
22
|
)
|
|
22
23
|
from mindsdb.utilities import log
|
|
23
24
|
|
|
24
|
-
from langchain_core.documents import Document as LangchainDocument
|
|
25
25
|
|
|
26
26
|
logger = log.getLogger(__name__)
|
|
27
27
|
|
|
@@ -123,11 +123,11 @@ class ContextualPreprocessor(DocumentPreprocessor):
|
|
|
123
123
|
|
|
124
124
|
DEFAULT_CONTEXT_TEMPLATE = """
|
|
125
125
|
<document>
|
|
126
|
-
{
|
|
126
|
+
{WHOLE_DOCUMENT}
|
|
127
127
|
</document>
|
|
128
128
|
Here is the chunk we want to situate within the whole document
|
|
129
129
|
<chunk>
|
|
130
|
-
{
|
|
130
|
+
{CHUNK_CONTENT}
|
|
131
131
|
</chunk>
|
|
132
132
|
Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else."""
|
|
133
133
|
|
|
@@ -149,12 +149,20 @@ Please give a short succinct context to situate this chunk within the overall do
|
|
|
149
149
|
self.summarize = self.config.summarize
|
|
150
150
|
|
|
151
151
|
def _prepare_prompts(self, chunk_contents: list[str], full_documents: list[str]) -> list[str]:
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
152
|
+
def tag_replacer(match):
|
|
153
|
+
tag = match.group(0)
|
|
154
|
+
if tag.lower() not in ["<document>", "</document>", "<chunk>", "</chunk>"]:
|
|
155
|
+
return tag
|
|
156
|
+
return html.escape(tag)
|
|
157
|
+
|
|
158
|
+
tag_pattern = r"</?document>|</?chunk>"
|
|
159
|
+
prompts = []
|
|
160
|
+
for chunk_content, full_document in zip(chunk_contents, full_documents):
|
|
161
|
+
chunk_content = re.sub(tag_pattern, tag_replacer, chunk_content, flags=re.IGNORECASE)
|
|
162
|
+
full_document = re.sub(tag_pattern, tag_replacer, full_document, flags=re.IGNORECASE)
|
|
163
|
+
prompts.append(
|
|
164
|
+
self.DEFAULT_CONTEXT_TEMPLATE.format(WHOLE_DOCUMENT=full_document, CHUNK_CONTENT=chunk_content)
|
|
165
|
+
)
|
|
158
166
|
|
|
159
167
|
return prompts
|
|
160
168
|
|
|
@@ -7,9 +7,7 @@ import pandas as pd
|
|
|
7
7
|
|
|
8
8
|
from mindsdb_sql_parser import Select, Star, OrderBy
|
|
9
9
|
|
|
10
|
-
from mindsdb_sql_parser.ast import
|
|
11
|
-
Identifier, BinaryOperation, Last, Constant, ASTNode
|
|
12
|
-
)
|
|
10
|
+
from mindsdb_sql_parser.ast import Identifier, BinaryOperation, Last, Constant, ASTNode
|
|
13
11
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
14
12
|
from mindsdb.utilities.cache import get_cache
|
|
15
13
|
|
|
@@ -22,15 +20,15 @@ from .last_query import LastQuery
|
|
|
22
20
|
|
|
23
21
|
class RunningQuery:
|
|
24
22
|
"""
|
|
25
|
-
|
|
23
|
+
Query in progres
|
|
26
24
|
"""
|
|
27
25
|
|
|
28
|
-
OBJECT_TYPE =
|
|
26
|
+
OBJECT_TYPE = "query"
|
|
29
27
|
|
|
30
28
|
def __init__(self, record: db.Queries):
|
|
31
29
|
self.record = record
|
|
32
30
|
self.sql = record.sql
|
|
33
|
-
self.database = record.database or config.get(
|
|
31
|
+
self.database = record.database or config.get("default_project")
|
|
34
32
|
|
|
35
33
|
def get_partitions(self, dn, step_call, query: Select) -> Iterable:
|
|
36
34
|
"""
|
|
@@ -41,7 +39,7 @@ class RunningQuery:
|
|
|
41
39
|
:param query: AST query to execute
|
|
42
40
|
:return: generator with query results
|
|
43
41
|
"""
|
|
44
|
-
if dn.has_support_stream():
|
|
42
|
+
if hasattr(dn, "has_support_stream") and dn.has_support_stream():
|
|
45
43
|
query2 = self.get_partition_query(step_call.current_step_num, query, stream=True)
|
|
46
44
|
|
|
47
45
|
for df in dn.query_stream(query2, fetch_size=self.batch_size):
|
|
@@ -53,10 +51,7 @@ class RunningQuery:
|
|
|
53
51
|
while True:
|
|
54
52
|
query2 = self.get_partition_query(step_call.current_step_num, query, stream=False)
|
|
55
53
|
|
|
56
|
-
response = dn.query(
|
|
57
|
-
query=query2,
|
|
58
|
-
session=step_call.session
|
|
59
|
-
)
|
|
54
|
+
response = dn.query(query=query2, session=step_call.session)
|
|
60
55
|
df = response.data_frame
|
|
61
56
|
|
|
62
57
|
if df is None or len(df) == 0:
|
|
@@ -68,22 +63,22 @@ class RunningQuery:
|
|
|
68
63
|
|
|
69
64
|
def get_partition_query(self, step_num: int, query: Select, stream=False) -> Select:
|
|
70
65
|
"""
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
66
|
+
Generate query for fetching the next partition
|
|
67
|
+
It wraps query to
|
|
68
|
+
select * from ({query})
|
|
69
|
+
where {track_column} > {previous_value}
|
|
70
|
+
order by track_column
|
|
71
|
+
limit size {batch_size}
|
|
72
|
+
And fill track_column, previous_value, batch_size
|
|
78
73
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
If steam is true:
|
|
75
|
+
- if track_column is defined:
|
|
76
|
+
- don't add limit
|
|
77
|
+
- else:
|
|
78
|
+
- return user query without modifications
|
|
84
79
|
"""
|
|
85
80
|
|
|
86
|
-
track_column = self.record.parameters.get(
|
|
81
|
+
track_column = self.record.parameters.get("track_column")
|
|
87
82
|
if track_column is None and stream:
|
|
88
83
|
# if no track column for stream fetching: it is not resumable query, execute original query
|
|
89
84
|
|
|
@@ -93,31 +88,30 @@ class RunningQuery:
|
|
|
93
88
|
return query
|
|
94
89
|
|
|
95
90
|
if not stream and track_column is None:
|
|
96
|
-
raise ValueError(
|
|
91
|
+
raise ValueError("Track column is not defined")
|
|
97
92
|
|
|
98
93
|
query = Select(
|
|
99
94
|
targets=[Star()],
|
|
100
95
|
from_table=query,
|
|
101
96
|
order_by=[OrderBy(Identifier(track_column))],
|
|
102
|
-
|
|
103
97
|
)
|
|
104
98
|
if not stream:
|
|
105
99
|
query.limit = Constant(self.batch_size)
|
|
106
100
|
|
|
107
|
-
track_value = self.record.context.get(
|
|
101
|
+
track_value = self.record.context.get("track_value")
|
|
108
102
|
# is it different step?
|
|
109
|
-
cur_step_num = self.record.context.get(
|
|
103
|
+
cur_step_num = self.record.context.get("step_num")
|
|
110
104
|
if cur_step_num is not None and cur_step_num != step_num:
|
|
111
105
|
# reset track_value
|
|
112
106
|
track_value = None
|
|
113
|
-
self.record.context[
|
|
114
|
-
self.record.context[
|
|
115
|
-
flag_modified(self.record,
|
|
107
|
+
self.record.context["track_value"] = None
|
|
108
|
+
self.record.context["step_num"] = step_num
|
|
109
|
+
flag_modified(self.record, "context")
|
|
116
110
|
db.session.commit()
|
|
117
111
|
|
|
118
112
|
if track_value is not None:
|
|
119
113
|
query.where = BinaryOperation(
|
|
120
|
-
op=
|
|
114
|
+
op=">",
|
|
121
115
|
args=[Identifier(track_column), Constant(track_value)],
|
|
122
116
|
)
|
|
123
117
|
|
|
@@ -126,24 +120,22 @@ class RunningQuery:
|
|
|
126
120
|
def get_info(self):
|
|
127
121
|
record = self.record
|
|
128
122
|
return {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
123
|
+
"id": record.id,
|
|
124
|
+
"sql": record.sql,
|
|
125
|
+
"database": record.database,
|
|
126
|
+
"started_at": record.started_at,
|
|
127
|
+
"finished_at": record.finished_at,
|
|
128
|
+
"parameters": record.parameters,
|
|
129
|
+
"context": record.context,
|
|
130
|
+
"processed_rows": record.processed_rows,
|
|
131
|
+
"error": record.error,
|
|
132
|
+
"updated_at": record.updated_at,
|
|
139
133
|
}
|
|
140
134
|
|
|
141
135
|
def add_to_task(self):
|
|
142
|
-
|
|
143
136
|
task_record = db.Tasks(
|
|
144
137
|
company_id=ctx.company_id,
|
|
145
138
|
user_class=ctx.user_class,
|
|
146
|
-
|
|
147
139
|
object_type=self.OBJECT_TYPE,
|
|
148
140
|
object_id=self.record.id,
|
|
149
141
|
)
|
|
@@ -163,24 +155,24 @@ class RunningQuery:
|
|
|
163
155
|
|
|
164
156
|
def set_params(self, params: dict):
|
|
165
157
|
"""
|
|
166
|
-
|
|
158
|
+
Store parameters of the step which is about to be split into partitions
|
|
167
159
|
"""
|
|
168
160
|
|
|
169
|
-
if
|
|
170
|
-
params[
|
|
161
|
+
if "batch_size" not in params:
|
|
162
|
+
params["batch_size"] = 1000
|
|
171
163
|
|
|
172
164
|
self.record.parameters = params
|
|
173
|
-
self.batch_size = self.record.parameters[
|
|
165
|
+
self.batch_size = self.record.parameters["batch_size"]
|
|
174
166
|
db.session.commit()
|
|
175
167
|
|
|
176
168
|
def get_max_track_value(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
|
|
177
169
|
"""
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
170
|
+
return max value to use in `set_progress`.
|
|
171
|
+
this function is called before execution substeps,
|
|
172
|
+
`set_progress` function - after
|
|
181
173
|
"""
|
|
182
|
-
if
|
|
183
|
-
track_column = self.record.parameters[
|
|
174
|
+
if "track_column" in self.record.parameters:
|
|
175
|
+
track_column = self.record.parameters["track_column"]
|
|
184
176
|
return df[track_column].max()
|
|
185
177
|
else:
|
|
186
178
|
# stream mode
|
|
@@ -188,7 +180,7 @@ class RunningQuery:
|
|
|
188
180
|
|
|
189
181
|
def set_progress(self, df: pd.DataFrame, max_track_value: int):
|
|
190
182
|
"""
|
|
191
|
-
|
|
183
|
+
Store progres of the query, it is called after processing of batch
|
|
192
184
|
"""
|
|
193
185
|
|
|
194
186
|
if len(df) == 0:
|
|
@@ -196,26 +188,26 @@ class RunningQuery:
|
|
|
196
188
|
|
|
197
189
|
self.record.processed_rows = self.record.processed_rows + len(df)
|
|
198
190
|
|
|
199
|
-
cur_value = self.record.context.get(
|
|
191
|
+
cur_value = self.record.context.get("track_value")
|
|
200
192
|
new_value = max_track_value
|
|
201
193
|
if new_value is not None:
|
|
202
194
|
if cur_value is None or new_value > cur_value:
|
|
203
|
-
self.record.context[
|
|
204
|
-
flag_modified(self.record,
|
|
195
|
+
self.record.context["track_value"] = new_value
|
|
196
|
+
flag_modified(self.record, "context")
|
|
205
197
|
|
|
206
198
|
db.session.commit()
|
|
207
199
|
|
|
208
200
|
def on_error(self, error: Exception, step_num: int, steps_data: dict):
|
|
209
201
|
"""
|
|
210
|
-
|
|
211
|
-
|
|
202
|
+
Saves error of the query in database
|
|
203
|
+
Also saves step data and current step num to be able to resume query
|
|
212
204
|
"""
|
|
213
205
|
self.record.error = str(error)
|
|
214
|
-
self.record.context[
|
|
215
|
-
flag_modified(self.record,
|
|
206
|
+
self.record.context["step_num"] = step_num
|
|
207
|
+
flag_modified(self.record, "context")
|
|
216
208
|
|
|
217
209
|
# save steps_data
|
|
218
|
-
cache = get_cache(
|
|
210
|
+
cache = get_cache("steps_data")
|
|
219
211
|
data = pickle.dumps(steps_data, protocol=5)
|
|
220
212
|
cache.set(str(self.record.id), data)
|
|
221
213
|
|
|
@@ -223,10 +215,10 @@ class RunningQuery:
|
|
|
223
215
|
|
|
224
216
|
def mark_as_run(self):
|
|
225
217
|
"""
|
|
226
|
-
|
|
218
|
+
Mark query as running and reset error of the query
|
|
227
219
|
"""
|
|
228
220
|
if self.record.finished_at is not None:
|
|
229
|
-
raise RuntimeError(
|
|
221
|
+
raise RuntimeError("The query already finished")
|
|
230
222
|
|
|
231
223
|
if self.record.started_at is None:
|
|
232
224
|
self.record.started_at = dt.datetime.now()
|
|
@@ -235,13 +227,13 @@ class RunningQuery:
|
|
|
235
227
|
self.record.error = None
|
|
236
228
|
db.session.commit()
|
|
237
229
|
else:
|
|
238
|
-
raise RuntimeError(
|
|
230
|
+
raise RuntimeError("The query might be running already")
|
|
239
231
|
|
|
240
232
|
def get_state(self) -> dict:
|
|
241
233
|
"""
|
|
242
|
-
|
|
234
|
+
Returns stored state for resuming the query
|
|
243
235
|
"""
|
|
244
|
-
cache = get_cache(
|
|
236
|
+
cache = get_cache("steps_data")
|
|
245
237
|
key = self.record.id
|
|
246
238
|
data = cache.get(key)
|
|
247
239
|
cache.delete(key)
|
|
@@ -249,13 +241,13 @@ class RunningQuery:
|
|
|
249
241
|
steps_data = pickle.loads(data)
|
|
250
242
|
|
|
251
243
|
return {
|
|
252
|
-
|
|
253
|
-
|
|
244
|
+
"step_num": self.record.context.get("step_num"),
|
|
245
|
+
"steps_data": steps_data,
|
|
254
246
|
}
|
|
255
247
|
|
|
256
248
|
def finish(self):
|
|
257
249
|
"""
|
|
258
|
-
|
|
250
|
+
Mark query as finished
|
|
259
251
|
"""
|
|
260
252
|
|
|
261
253
|
self.record.finished_at = dt.datetime.now()
|
|
@@ -263,7 +255,7 @@ class RunningQuery:
|
|
|
263
255
|
|
|
264
256
|
|
|
265
257
|
class QueryContextController:
|
|
266
|
-
IGNORE_CONTEXT =
|
|
258
|
+
IGNORE_CONTEXT = "<IGNORE>"
|
|
267
259
|
|
|
268
260
|
def handle_db_context_vars(self, query: ASTNode, dn, session) -> tuple:
|
|
269
261
|
"""
|
|
@@ -300,9 +292,9 @@ class QueryContextController:
|
|
|
300
292
|
values = self._get_init_last_values(l_query, dn, session)
|
|
301
293
|
if rec is None:
|
|
302
294
|
self.__add_context_record(context_name, query_str, values)
|
|
303
|
-
if context_name.startswith(
|
|
295
|
+
if context_name.startswith("job-if-"):
|
|
304
296
|
# add context for job also
|
|
305
|
-
self.__add_context_record(context_name.replace(
|
|
297
|
+
self.__add_context_record(context_name.replace("job-if", "job"), query_str, values)
|
|
306
298
|
else:
|
|
307
299
|
rec.values = values
|
|
308
300
|
else:
|
|
@@ -319,20 +311,19 @@ class QueryContextController:
|
|
|
319
311
|
|
|
320
312
|
def remove_lasts(self, query):
|
|
321
313
|
def replace_lasts(node, **kwargs):
|
|
322
|
-
|
|
323
314
|
# find last in where
|
|
324
315
|
if isinstance(node, BinaryOperation):
|
|
325
316
|
if isinstance(node.args[0], Identifier) and isinstance(node.args[1], Last):
|
|
326
317
|
node.args = [Constant(0), Constant(0)]
|
|
327
|
-
node.op =
|
|
318
|
+
node.op = "="
|
|
328
319
|
|
|
329
320
|
# find lasts
|
|
330
321
|
query_traversal(query, replace_lasts)
|
|
331
322
|
return query
|
|
332
323
|
|
|
333
|
-
def _result_callback(
|
|
334
|
-
|
|
335
|
-
|
|
324
|
+
def _result_callback(
|
|
325
|
+
self, l_query: LastQuery, context_name: str, query_str: str, df: pd.DataFrame, columns_info: list
|
|
326
|
+
):
|
|
336
327
|
"""
|
|
337
328
|
This function handlers result from executed query and updates context variables with new values
|
|
338
329
|
|
|
@@ -352,12 +343,12 @@ class QueryContextController:
|
|
|
352
343
|
values = {}
|
|
353
344
|
# get max values
|
|
354
345
|
for info in l_query.get_last_columns():
|
|
355
|
-
target_idx = info[
|
|
346
|
+
target_idx = info["target_idx"]
|
|
356
347
|
if target_idx is not None:
|
|
357
348
|
# get by index
|
|
358
|
-
col_name = columns_info[target_idx][
|
|
349
|
+
col_name = columns_info[target_idx]["name"]
|
|
359
350
|
else:
|
|
360
|
-
col_name = info[
|
|
351
|
+
col_name = info["column_name"]
|
|
361
352
|
# get by name
|
|
362
353
|
if col_name not in df:
|
|
363
354
|
continue
|
|
@@ -377,7 +368,7 @@ class QueryContextController:
|
|
|
377
368
|
continue
|
|
378
369
|
|
|
379
370
|
if value is not None:
|
|
380
|
-
values[info[
|
|
371
|
+
values[info["table_name"]] = {info["column_name"]: value}
|
|
381
372
|
|
|
382
373
|
self.__update_context_record(context_name, query_str, values)
|
|
383
374
|
|
|
@@ -389,10 +380,9 @@ class QueryContextController:
|
|
|
389
380
|
"""
|
|
390
381
|
|
|
391
382
|
context_name = self.gen_context_name(object_type, object_id)
|
|
392
|
-
for rec in
|
|
393
|
-
context_name=context_name,
|
|
394
|
-
|
|
395
|
-
).all():
|
|
383
|
+
for rec in (
|
|
384
|
+
db.session.query(db.QueryContext).filter_by(context_name=context_name, company_id=ctx.company_id).all()
|
|
385
|
+
):
|
|
396
386
|
db.session.delete(rec)
|
|
397
387
|
db.session.commit()
|
|
398
388
|
|
|
@@ -404,11 +394,7 @@ class QueryContextController:
|
|
|
404
394
|
"""
|
|
405
395
|
last_values = {}
|
|
406
396
|
for query, info in l_query.get_init_queries():
|
|
407
|
-
|
|
408
|
-
response = dn.query(
|
|
409
|
-
query=query,
|
|
410
|
-
session=session
|
|
411
|
-
)
|
|
397
|
+
response = dn.query(query=query, session=session)
|
|
412
398
|
data = response.data_frame
|
|
413
399
|
columns_info = response.columns
|
|
414
400
|
|
|
@@ -419,7 +405,7 @@ class QueryContextController:
|
|
|
419
405
|
|
|
420
406
|
idx = None
|
|
421
407
|
for i, col in enumerate(columns_info):
|
|
422
|
-
if col[
|
|
408
|
+
if col["name"].upper() == info["column_name"].upper():
|
|
423
409
|
idx = i
|
|
424
410
|
break
|
|
425
411
|
|
|
@@ -429,7 +415,7 @@ class QueryContextController:
|
|
|
429
415
|
value = row[idx]
|
|
430
416
|
|
|
431
417
|
if value is not None:
|
|
432
|
-
last_values[info[
|
|
418
|
+
last_values[info["table_name"]] = {info["column_name"]: value}
|
|
433
419
|
|
|
434
420
|
return last_values
|
|
435
421
|
|
|
@@ -446,7 +432,7 @@ class QueryContextController:
|
|
|
446
432
|
if len(context_stack) > 0:
|
|
447
433
|
return context_stack[-1]
|
|
448
434
|
else:
|
|
449
|
-
return
|
|
435
|
+
return ""
|
|
450
436
|
|
|
451
437
|
def set_context(self, object_type: str = None, object_id: int = None):
|
|
452
438
|
"""
|
|
@@ -482,9 +468,9 @@ class QueryContextController:
|
|
|
482
468
|
"""
|
|
483
469
|
|
|
484
470
|
if object_type is None:
|
|
485
|
-
return
|
|
471
|
+
return ""
|
|
486
472
|
if object_id is not None:
|
|
487
|
-
object_type +=
|
|
473
|
+
object_type += "-" + str(object_id)
|
|
488
474
|
return object_type
|
|
489
475
|
|
|
490
476
|
def get_context_vars(self, object_type: str, object_id: int) -> List[dict]:
|
|
@@ -495,10 +481,7 @@ class QueryContextController:
|
|
|
495
481
|
"""
|
|
496
482
|
context_name = self.gen_context_name(object_type, object_id)
|
|
497
483
|
vars = []
|
|
498
|
-
for rec in db.session.query(db.QueryContext).filter_by(
|
|
499
|
-
context_name=context_name,
|
|
500
|
-
company_id=ctx.company_id
|
|
501
|
-
):
|
|
484
|
+
for rec in db.session.query(db.QueryContext).filter_by(context_name=context_name, company_id=ctx.company_id):
|
|
502
485
|
if rec.values is not None:
|
|
503
486
|
vars.append(rec.values)
|
|
504
487
|
|
|
@@ -510,21 +493,17 @@ class QueryContextController:
|
|
|
510
493
|
Find and return record for context and query string
|
|
511
494
|
"""
|
|
512
495
|
|
|
513
|
-
return
|
|
514
|
-
query
|
|
515
|
-
context_name=context_name,
|
|
516
|
-
|
|
517
|
-
)
|
|
496
|
+
return (
|
|
497
|
+
db.session.query(db.QueryContext)
|
|
498
|
+
.filter_by(query=query_str, context_name=context_name, company_id=ctx.company_id)
|
|
499
|
+
.first()
|
|
500
|
+
)
|
|
518
501
|
|
|
519
502
|
def __add_context_record(self, context_name: str, query_str: str, values: dict) -> db.QueryContext:
|
|
520
503
|
"""
|
|
521
504
|
Creates record (for context and query string) with values and returns it
|
|
522
505
|
"""
|
|
523
|
-
rec = db.QueryContext(
|
|
524
|
-
query=query_str,
|
|
525
|
-
context_name=context_name,
|
|
526
|
-
company_id=ctx.company_id,
|
|
527
|
-
values=values)
|
|
506
|
+
rec = db.QueryContext(query=query_str, context_name=context_name, company_id=ctx.company_id, values=values)
|
|
528
507
|
db.session.add(rec)
|
|
529
508
|
return rec
|
|
530
509
|
|
|
@@ -538,27 +517,23 @@ class QueryContextController:
|
|
|
538
517
|
|
|
539
518
|
def get_query(self, query_id: int) -> RunningQuery:
|
|
540
519
|
"""
|
|
541
|
-
|
|
520
|
+
Get running query by id
|
|
542
521
|
"""
|
|
543
522
|
|
|
544
|
-
rec = db.Queries.query.filter(
|
|
545
|
-
db.Queries.id == query_id,
|
|
546
|
-
db.Queries.company_id == ctx.company_id
|
|
547
|
-
).first()
|
|
523
|
+
rec = db.Queries.query.filter(db.Queries.id == query_id, db.Queries.company_id == ctx.company_id).first()
|
|
548
524
|
|
|
549
525
|
if rec is None:
|
|
550
|
-
raise RuntimeError(f
|
|
526
|
+
raise RuntimeError(f"Query not found: {query_id}")
|
|
551
527
|
return RunningQuery(rec)
|
|
552
528
|
|
|
553
529
|
def create_query(self, query: ASTNode, database: str = None) -> RunningQuery:
|
|
554
530
|
"""
|
|
555
|
-
|
|
531
|
+
Create a new running query from AST query
|
|
556
532
|
"""
|
|
557
533
|
|
|
558
534
|
# remove old queries
|
|
559
535
|
remove_query = db.session.query(db.Queries).filter(
|
|
560
|
-
db.Queries.company_id == ctx.company_id,
|
|
561
|
-
db.Queries.finished_at < (dt.datetime.now() - dt.timedelta(days=1))
|
|
536
|
+
db.Queries.company_id == ctx.company_id, db.Queries.finished_at < (dt.datetime.now() - dt.timedelta(days=1))
|
|
562
537
|
)
|
|
563
538
|
for rec in remove_query.all():
|
|
564
539
|
self.get_query(rec.id).remove_from_task()
|
|
@@ -576,27 +551,19 @@ class QueryContextController:
|
|
|
576
551
|
|
|
577
552
|
def list_queries(self) -> List[dict]:
|
|
578
553
|
"""
|
|
579
|
-
|
|
554
|
+
Get list of all running queries with metadata
|
|
580
555
|
"""
|
|
581
556
|
|
|
582
|
-
query = db.session.query(db.Queries).filter(
|
|
583
|
-
|
|
584
|
-
)
|
|
585
|
-
return [
|
|
586
|
-
RunningQuery(record).get_info()
|
|
587
|
-
for record in query
|
|
588
|
-
]
|
|
557
|
+
query = db.session.query(db.Queries).filter(db.Queries.company_id == ctx.company_id)
|
|
558
|
+
return [RunningQuery(record).get_info() for record in query]
|
|
589
559
|
|
|
590
560
|
def cancel_query(self, query_id: int):
|
|
591
561
|
"""
|
|
592
|
-
|
|
562
|
+
Cancels running query by id
|
|
593
563
|
"""
|
|
594
|
-
rec = db.Queries.query.filter(
|
|
595
|
-
db.Queries.id == query_id,
|
|
596
|
-
db.Queries.company_id == ctx.company_id
|
|
597
|
-
).first()
|
|
564
|
+
rec = db.Queries.query.filter(db.Queries.id == query_id, db.Queries.company_id == ctx.company_id).first()
|
|
598
565
|
if rec is None:
|
|
599
|
-
raise RuntimeError(f
|
|
566
|
+
raise RuntimeError(f"Query not found: {query_id}")
|
|
600
567
|
|
|
601
568
|
self.get_query(rec.id).remove_from_task()
|
|
602
569
|
|