MindsDB 25.7.3.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +11 -1
- mindsdb/api/a2a/common/server/server.py +16 -6
- mindsdb/api/executor/command_executor.py +215 -150
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
- mindsdb/api/executor/planner/plan_join.py +3 -0
- mindsdb/api/executor/planner/plan_join_ts.py +117 -100
- mindsdb/api/executor/planner/query_planner.py +1 -0
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
- mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
- mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
- mindsdb/api/executor/utilities/sql.py +30 -0
- mindsdb/api/http/initialize.py +18 -44
- mindsdb/api/http/namespaces/agents.py +23 -20
- mindsdb/api/http/namespaces/chatbots.py +83 -120
- mindsdb/api/http/namespaces/file.py +1 -1
- mindsdb/api/http/namespaces/jobs.py +38 -60
- mindsdb/api/http/namespaces/tree.py +69 -61
- mindsdb/api/http/namespaces/views.py +56 -72
- mindsdb/api/mcp/start.py +2 -0
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
- mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
- mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
- mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
- mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
- mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
- mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
- mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
- mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
- mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
- mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -77
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +5 -2
- mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
- mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
- mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
- mindsdb/integrations/handlers/salesforce_handler/constants.py +215 -0
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +141 -80
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +0 -1
- mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
- mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
- mindsdb/integrations/libs/llm/config.py +0 -14
- mindsdb/integrations/libs/llm/utils.py +0 -15
- mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
- mindsdb/integrations/utilities/files/file_reader.py +5 -19
- mindsdb/integrations/utilities/handler_utils.py +32 -12
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +246 -149
- mindsdb/interfaces/agents/constants.py +0 -1
- mindsdb/interfaces/agents/langchain_agent.py +11 -6
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +4 -4
- mindsdb/interfaces/database/database.py +38 -13
- mindsdb/interfaces/database/integrations.py +20 -5
- mindsdb/interfaces/database/projects.py +174 -23
- mindsdb/interfaces/database/views.py +86 -60
- mindsdb/interfaces/jobs/jobs_controller.py +103 -110
- mindsdb/interfaces/knowledge_base/controller.py +33 -6
- mindsdb/interfaces/knowledge_base/evaluate.py +2 -1
- mindsdb/interfaces/knowledge_base/executor.py +24 -0
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
- mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
- mindsdb/interfaces/query_context/context_controller.py +111 -145
- mindsdb/interfaces/skills/skills_controller.py +18 -6
- mindsdb/interfaces/storage/db.py +40 -6
- mindsdb/interfaces/variables/variables_controller.py +8 -15
- mindsdb/utilities/config.py +5 -3
- mindsdb/utilities/fs.py +54 -17
- mindsdb/utilities/functions.py +72 -60
- mindsdb/utilities/log.py +38 -6
- mindsdb/utilities/ps.py +7 -7
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/METADATA +282 -268
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/RECORD +94 -92
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
- mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
- mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
- mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
- mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
- /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TextSplitter:
|
|
5
|
+
def __init__(
|
|
6
|
+
self,
|
|
7
|
+
chunk_size: int = 1000,
|
|
8
|
+
chunk_overlap: int = 200,
|
|
9
|
+
separators: List[str] = None,
|
|
10
|
+
k_range: float = 0.5,
|
|
11
|
+
k_ratio: float = 1,
|
|
12
|
+
):
|
|
13
|
+
"""
|
|
14
|
+
Split text into chunks. The logic:
|
|
15
|
+
- Get a piece of text with chunk_size and try to find the separator at the end of the piece.
|
|
16
|
+
- The allowed range to find the separator is defined by k_range and k_ratio using formula:
|
|
17
|
+
k_range * chunk_size / (num * k_ratio + 1)
|
|
18
|
+
num - is number of a separator from the list
|
|
19
|
+
- if the separator is not in the rage: switch to the next separator
|
|
20
|
+
- if the found separator is in the middle of the sentence, use overlapping:
|
|
21
|
+
- the found text is the current chunk
|
|
22
|
+
- repeat the search with less strict k_range and k_ratio
|
|
23
|
+
- the found text will be the beginning of the next chunk
|
|
24
|
+
|
|
25
|
+
:param chunk_size: size of the chunk, which must not be exceeded
|
|
26
|
+
:param separators: list of separators in order of priority
|
|
27
|
+
:param k_range: defines the range to look for the separator
|
|
28
|
+
:param k_ratio: defines how much to shrink the range for the next separator
|
|
29
|
+
"""
|
|
30
|
+
if separators is None:
|
|
31
|
+
separators = ["\n\n", "\n", ". ", " ", ""]
|
|
32
|
+
self.chunk_size = chunk_size
|
|
33
|
+
self.chunk_overlap = chunk_overlap
|
|
34
|
+
self.separators = separators
|
|
35
|
+
self.k_range = k_range
|
|
36
|
+
self.k_ratio = k_ratio
|
|
37
|
+
|
|
38
|
+
def split_text(self, text: str) -> List[str]:
|
|
39
|
+
chunks = []
|
|
40
|
+
|
|
41
|
+
while True:
|
|
42
|
+
if len(text) < self.chunk_size:
|
|
43
|
+
chunks.append(text)
|
|
44
|
+
break
|
|
45
|
+
|
|
46
|
+
sep, chunk, shift = self.get_next_chunk(text, self.k_range, self.k_ratio)
|
|
47
|
+
chunks.append(chunk)
|
|
48
|
+
|
|
49
|
+
text = text[shift:]
|
|
50
|
+
return chunks
|
|
51
|
+
|
|
52
|
+
def get_next_chunk(self, text: str, k_range: float, k_ratio: float):
|
|
53
|
+
# returns chunk with separator and shift for the next search iteration
|
|
54
|
+
|
|
55
|
+
chunk = text[: self.chunk_size]
|
|
56
|
+
# positions = []
|
|
57
|
+
for i, sep in enumerate(self.separators):
|
|
58
|
+
pos = chunk.rfind(sep)
|
|
59
|
+
|
|
60
|
+
vpos = self.chunk_size - pos
|
|
61
|
+
if vpos < k_range * self.chunk_size / (i * k_ratio + 1):
|
|
62
|
+
shift = len(sep) + pos
|
|
63
|
+
if sep.strip(" ") == "":
|
|
64
|
+
# overlapping
|
|
65
|
+
sep2, _, shift2 = self.get_next_chunk(text, k_range * 1.5, 0)
|
|
66
|
+
if sep2.strip(" ") != "":
|
|
67
|
+
# use shift of previous separator
|
|
68
|
+
if shift - shift2 < self.chunk_overlap:
|
|
69
|
+
shift = shift2
|
|
70
|
+
|
|
71
|
+
return sep, chunk[:pos], shift
|
|
72
|
+
|
|
73
|
+
raise RuntimeError("Cannot split text")
|
|
@@ -7,9 +7,7 @@ import pandas as pd
|
|
|
7
7
|
|
|
8
8
|
from mindsdb_sql_parser import Select, Star, OrderBy
|
|
9
9
|
|
|
10
|
-
from mindsdb_sql_parser.ast import
|
|
11
|
-
Identifier, BinaryOperation, Last, Constant, ASTNode
|
|
12
|
-
)
|
|
10
|
+
from mindsdb_sql_parser.ast import Identifier, BinaryOperation, Last, Constant, ASTNode
|
|
13
11
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
14
12
|
from mindsdb.utilities.cache import get_cache
|
|
15
13
|
|
|
@@ -22,15 +20,15 @@ from .last_query import LastQuery
|
|
|
22
20
|
|
|
23
21
|
class RunningQuery:
|
|
24
22
|
"""
|
|
25
|
-
|
|
23
|
+
Query in progres
|
|
26
24
|
"""
|
|
27
25
|
|
|
28
|
-
OBJECT_TYPE =
|
|
26
|
+
OBJECT_TYPE = "query"
|
|
29
27
|
|
|
30
28
|
def __init__(self, record: db.Queries):
|
|
31
29
|
self.record = record
|
|
32
30
|
self.sql = record.sql
|
|
33
|
-
self.database = record.database or config.get(
|
|
31
|
+
self.database = record.database or config.get("default_project")
|
|
34
32
|
|
|
35
33
|
def get_partitions(self, dn, step_call, query: Select) -> Iterable:
|
|
36
34
|
"""
|
|
@@ -41,22 +39,19 @@ class RunningQuery:
|
|
|
41
39
|
:param query: AST query to execute
|
|
42
40
|
:return: generator with query results
|
|
43
41
|
"""
|
|
44
|
-
if dn.has_support_stream():
|
|
42
|
+
if hasattr(dn, "has_support_stream") and dn.has_support_stream():
|
|
45
43
|
query2 = self.get_partition_query(step_call.current_step_num, query, stream=True)
|
|
46
44
|
|
|
47
45
|
for df in dn.query_stream(query2, fetch_size=self.batch_size):
|
|
48
46
|
max_track_value = self.get_max_track_value(df)
|
|
49
47
|
yield df
|
|
50
|
-
self.set_progress(
|
|
48
|
+
self.set_progress(max_track_value=max_track_value)
|
|
51
49
|
|
|
52
50
|
else:
|
|
53
51
|
while True:
|
|
54
52
|
query2 = self.get_partition_query(step_call.current_step_num, query, stream=False)
|
|
55
53
|
|
|
56
|
-
response = dn.query(
|
|
57
|
-
query=query2,
|
|
58
|
-
session=step_call.session
|
|
59
|
-
)
|
|
54
|
+
response = dn.query(query=query2, session=step_call.session)
|
|
60
55
|
df = response.data_frame
|
|
61
56
|
|
|
62
57
|
if df is None or len(df) == 0:
|
|
@@ -64,26 +59,26 @@ class RunningQuery:
|
|
|
64
59
|
|
|
65
60
|
max_track_value = self.get_max_track_value(df)
|
|
66
61
|
yield df
|
|
67
|
-
self.set_progress(
|
|
62
|
+
self.set_progress(max_track_value=max_track_value)
|
|
68
63
|
|
|
69
64
|
def get_partition_query(self, step_num: int, query: Select, stream=False) -> Select:
|
|
70
65
|
"""
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
66
|
+
Generate query for fetching the next partition
|
|
67
|
+
It wraps query to
|
|
68
|
+
select * from ({query})
|
|
69
|
+
where {track_column} > {previous_value}
|
|
70
|
+
order by track_column
|
|
71
|
+
limit size {batch_size}
|
|
72
|
+
And fill track_column, previous_value, batch_size
|
|
78
73
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
If steam is true:
|
|
75
|
+
- if track_column is defined:
|
|
76
|
+
- don't add limit
|
|
77
|
+
- else:
|
|
78
|
+
- return user query without modifications
|
|
84
79
|
"""
|
|
85
80
|
|
|
86
|
-
track_column = self.record.parameters.get(
|
|
81
|
+
track_column = self.record.parameters.get("track_column")
|
|
87
82
|
if track_column is None and stream:
|
|
88
83
|
# if no track column for stream fetching: it is not resumable query, execute original query
|
|
89
84
|
|
|
@@ -93,31 +88,30 @@ class RunningQuery:
|
|
|
93
88
|
return query
|
|
94
89
|
|
|
95
90
|
if not stream and track_column is None:
|
|
96
|
-
raise ValueError(
|
|
91
|
+
raise ValueError("Track column is not defined")
|
|
97
92
|
|
|
98
93
|
query = Select(
|
|
99
94
|
targets=[Star()],
|
|
100
95
|
from_table=query,
|
|
101
96
|
order_by=[OrderBy(Identifier(track_column))],
|
|
102
|
-
|
|
103
97
|
)
|
|
104
98
|
if not stream:
|
|
105
99
|
query.limit = Constant(self.batch_size)
|
|
106
100
|
|
|
107
|
-
track_value = self.record.context.get(
|
|
101
|
+
track_value = self.record.context.get("track_value")
|
|
108
102
|
# is it different step?
|
|
109
|
-
cur_step_num = self.record.context.get(
|
|
103
|
+
cur_step_num = self.record.context.get("step_num")
|
|
110
104
|
if cur_step_num is not None and cur_step_num != step_num:
|
|
111
105
|
# reset track_value
|
|
112
106
|
track_value = None
|
|
113
|
-
self.record.context[
|
|
114
|
-
self.record.context[
|
|
115
|
-
flag_modified(self.record,
|
|
107
|
+
self.record.context["track_value"] = None
|
|
108
|
+
self.record.context["step_num"] = step_num
|
|
109
|
+
flag_modified(self.record, "context")
|
|
116
110
|
db.session.commit()
|
|
117
111
|
|
|
118
112
|
if track_value is not None:
|
|
119
113
|
query.where = BinaryOperation(
|
|
120
|
-
op=
|
|
114
|
+
op=">",
|
|
121
115
|
args=[Identifier(track_column), Constant(track_value)],
|
|
122
116
|
)
|
|
123
117
|
|
|
@@ -126,24 +120,22 @@ class RunningQuery:
|
|
|
126
120
|
def get_info(self):
|
|
127
121
|
record = self.record
|
|
128
122
|
return {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
123
|
+
"id": record.id,
|
|
124
|
+
"sql": record.sql,
|
|
125
|
+
"database": record.database,
|
|
126
|
+
"started_at": record.started_at,
|
|
127
|
+
"finished_at": record.finished_at,
|
|
128
|
+
"parameters": record.parameters,
|
|
129
|
+
"context": record.context,
|
|
130
|
+
"processed_rows": record.processed_rows,
|
|
131
|
+
"error": record.error,
|
|
132
|
+
"updated_at": record.updated_at,
|
|
139
133
|
}
|
|
140
134
|
|
|
141
135
|
def add_to_task(self):
|
|
142
|
-
|
|
143
136
|
task_record = db.Tasks(
|
|
144
137
|
company_id=ctx.company_id,
|
|
145
138
|
user_class=ctx.user_class,
|
|
146
|
-
|
|
147
139
|
object_type=self.OBJECT_TYPE,
|
|
148
140
|
object_id=self.record.id,
|
|
149
141
|
)
|
|
@@ -163,59 +155,58 @@ class RunningQuery:
|
|
|
163
155
|
|
|
164
156
|
def set_params(self, params: dict):
|
|
165
157
|
"""
|
|
166
|
-
|
|
158
|
+
Store parameters of the step which is about to be split into partitions
|
|
167
159
|
"""
|
|
168
160
|
|
|
169
|
-
if
|
|
170
|
-
params[
|
|
161
|
+
if "batch_size" not in params:
|
|
162
|
+
params["batch_size"] = 1000
|
|
171
163
|
|
|
172
164
|
self.record.parameters = params
|
|
173
|
-
self.batch_size = self.record.parameters[
|
|
165
|
+
self.batch_size = self.record.parameters["batch_size"]
|
|
174
166
|
db.session.commit()
|
|
175
167
|
|
|
176
168
|
def get_max_track_value(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
|
|
177
169
|
"""
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
170
|
+
return max value to use in `set_progress`.
|
|
171
|
+
this function is called before execution substeps,
|
|
172
|
+
`set_progress` function - after
|
|
181
173
|
"""
|
|
182
|
-
if
|
|
183
|
-
track_column = self.record.parameters[
|
|
174
|
+
if "track_column" in self.record.parameters:
|
|
175
|
+
track_column = self.record.parameters["track_column"]
|
|
184
176
|
return df[track_column].max()
|
|
185
177
|
else:
|
|
186
178
|
# stream mode
|
|
187
179
|
return None
|
|
188
180
|
|
|
189
|
-
def set_progress(self,
|
|
181
|
+
def set_progress(self, processed_rows: int = None, max_track_value: int = None):
|
|
190
182
|
"""
|
|
191
|
-
|
|
183
|
+
Store progres of the query, it is called after processing of batch
|
|
192
184
|
"""
|
|
193
185
|
|
|
194
|
-
if
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
self.record.processed_rows = self.record.processed_rows + len(df)
|
|
198
|
-
|
|
199
|
-
cur_value = self.record.context.get('track_value')
|
|
200
|
-
new_value = max_track_value
|
|
201
|
-
if new_value is not None:
|
|
202
|
-
if cur_value is None or new_value > cur_value:
|
|
203
|
-
self.record.context['track_value'] = new_value
|
|
204
|
-
flag_modified(self.record, 'context')
|
|
186
|
+
if processed_rows is not None and processed_rows > 0:
|
|
187
|
+
self.record.processed_rows = self.record.processed_rows + processed_rows
|
|
188
|
+
db.session.commit()
|
|
205
189
|
|
|
206
|
-
|
|
190
|
+
if max_track_value is not None:
|
|
191
|
+
cur_value = self.record.context.get("track_value")
|
|
192
|
+
new_value = max_track_value
|
|
193
|
+
if new_value is not None:
|
|
194
|
+
if cur_value is None or new_value > cur_value:
|
|
195
|
+
self.record.context["track_value"] = new_value
|
|
196
|
+
flag_modified(self.record, "context")
|
|
197
|
+
db.session.commit()
|
|
207
198
|
|
|
208
199
|
def on_error(self, error: Exception, step_num: int, steps_data: dict):
|
|
209
200
|
"""
|
|
210
|
-
|
|
211
|
-
|
|
201
|
+
Saves error of the query in database
|
|
202
|
+
Also saves step data and current step num to be able to resume query
|
|
212
203
|
"""
|
|
213
204
|
self.record.error = str(error)
|
|
214
|
-
self.record.context[
|
|
215
|
-
flag_modified(self.record,
|
|
205
|
+
self.record.context["step_num"] = step_num
|
|
206
|
+
flag_modified(self.record, "context")
|
|
216
207
|
|
|
217
208
|
# save steps_data
|
|
218
|
-
cache = get_cache(
|
|
209
|
+
cache = get_cache("steps_data")
|
|
219
210
|
data = pickle.dumps(steps_data, protocol=5)
|
|
220
211
|
cache.set(str(self.record.id), data)
|
|
221
212
|
|
|
@@ -223,10 +214,10 @@ class RunningQuery:
|
|
|
223
214
|
|
|
224
215
|
def mark_as_run(self):
|
|
225
216
|
"""
|
|
226
|
-
|
|
217
|
+
Mark query as running and reset error of the query
|
|
227
218
|
"""
|
|
228
219
|
if self.record.finished_at is not None:
|
|
229
|
-
raise RuntimeError(
|
|
220
|
+
raise RuntimeError("The query already finished")
|
|
230
221
|
|
|
231
222
|
if self.record.started_at is None:
|
|
232
223
|
self.record.started_at = dt.datetime.now()
|
|
@@ -235,13 +226,13 @@ class RunningQuery:
|
|
|
235
226
|
self.record.error = None
|
|
236
227
|
db.session.commit()
|
|
237
228
|
else:
|
|
238
|
-
raise RuntimeError(
|
|
229
|
+
raise RuntimeError("The query might be running already")
|
|
239
230
|
|
|
240
231
|
def get_state(self) -> dict:
|
|
241
232
|
"""
|
|
242
|
-
|
|
233
|
+
Returns stored state for resuming the query
|
|
243
234
|
"""
|
|
244
|
-
cache = get_cache(
|
|
235
|
+
cache = get_cache("steps_data")
|
|
245
236
|
key = self.record.id
|
|
246
237
|
data = cache.get(key)
|
|
247
238
|
cache.delete(key)
|
|
@@ -249,13 +240,13 @@ class RunningQuery:
|
|
|
249
240
|
steps_data = pickle.loads(data)
|
|
250
241
|
|
|
251
242
|
return {
|
|
252
|
-
|
|
253
|
-
|
|
243
|
+
"step_num": self.record.context.get("step_num"),
|
|
244
|
+
"steps_data": steps_data,
|
|
254
245
|
}
|
|
255
246
|
|
|
256
247
|
def finish(self):
|
|
257
248
|
"""
|
|
258
|
-
|
|
249
|
+
Mark query as finished
|
|
259
250
|
"""
|
|
260
251
|
|
|
261
252
|
self.record.finished_at = dt.datetime.now()
|
|
@@ -263,7 +254,7 @@ class RunningQuery:
|
|
|
263
254
|
|
|
264
255
|
|
|
265
256
|
class QueryContextController:
|
|
266
|
-
IGNORE_CONTEXT =
|
|
257
|
+
IGNORE_CONTEXT = "<IGNORE>"
|
|
267
258
|
|
|
268
259
|
def handle_db_context_vars(self, query: ASTNode, dn, session) -> tuple:
|
|
269
260
|
"""
|
|
@@ -300,9 +291,9 @@ class QueryContextController:
|
|
|
300
291
|
values = self._get_init_last_values(l_query, dn, session)
|
|
301
292
|
if rec is None:
|
|
302
293
|
self.__add_context_record(context_name, query_str, values)
|
|
303
|
-
if context_name.startswith(
|
|
294
|
+
if context_name.startswith("job-if-"):
|
|
304
295
|
# add context for job also
|
|
305
|
-
self.__add_context_record(context_name.replace(
|
|
296
|
+
self.__add_context_record(context_name.replace("job-if", "job"), query_str, values)
|
|
306
297
|
else:
|
|
307
298
|
rec.values = values
|
|
308
299
|
else:
|
|
@@ -319,20 +310,19 @@ class QueryContextController:
|
|
|
319
310
|
|
|
320
311
|
def remove_lasts(self, query):
|
|
321
312
|
def replace_lasts(node, **kwargs):
|
|
322
|
-
|
|
323
313
|
# find last in where
|
|
324
314
|
if isinstance(node, BinaryOperation):
|
|
325
315
|
if isinstance(node.args[0], Identifier) and isinstance(node.args[1], Last):
|
|
326
316
|
node.args = [Constant(0), Constant(0)]
|
|
327
|
-
node.op =
|
|
317
|
+
node.op = "="
|
|
328
318
|
|
|
329
319
|
# find lasts
|
|
330
320
|
query_traversal(query, replace_lasts)
|
|
331
321
|
return query
|
|
332
322
|
|
|
333
|
-
def _result_callback(
|
|
334
|
-
|
|
335
|
-
|
|
323
|
+
def _result_callback(
|
|
324
|
+
self, l_query: LastQuery, context_name: str, query_str: str, df: pd.DataFrame, columns_info: list
|
|
325
|
+
):
|
|
336
326
|
"""
|
|
337
327
|
This function handlers result from executed query and updates context variables with new values
|
|
338
328
|
|
|
@@ -352,12 +342,12 @@ class QueryContextController:
|
|
|
352
342
|
values = {}
|
|
353
343
|
# get max values
|
|
354
344
|
for info in l_query.get_last_columns():
|
|
355
|
-
target_idx = info[
|
|
345
|
+
target_idx = info["target_idx"]
|
|
356
346
|
if target_idx is not None:
|
|
357
347
|
# get by index
|
|
358
|
-
col_name = columns_info[target_idx][
|
|
348
|
+
col_name = columns_info[target_idx]["name"]
|
|
359
349
|
else:
|
|
360
|
-
col_name = info[
|
|
350
|
+
col_name = info["column_name"]
|
|
361
351
|
# get by name
|
|
362
352
|
if col_name not in df:
|
|
363
353
|
continue
|
|
@@ -377,7 +367,7 @@ class QueryContextController:
|
|
|
377
367
|
continue
|
|
378
368
|
|
|
379
369
|
if value is not None:
|
|
380
|
-
values[info[
|
|
370
|
+
values[info["table_name"]] = {info["column_name"]: value}
|
|
381
371
|
|
|
382
372
|
self.__update_context_record(context_name, query_str, values)
|
|
383
373
|
|
|
@@ -389,10 +379,9 @@ class QueryContextController:
|
|
|
389
379
|
"""
|
|
390
380
|
|
|
391
381
|
context_name = self.gen_context_name(object_type, object_id)
|
|
392
|
-
for rec in
|
|
393
|
-
context_name=context_name,
|
|
394
|
-
|
|
395
|
-
).all():
|
|
382
|
+
for rec in (
|
|
383
|
+
db.session.query(db.QueryContext).filter_by(context_name=context_name, company_id=ctx.company_id).all()
|
|
384
|
+
):
|
|
396
385
|
db.session.delete(rec)
|
|
397
386
|
db.session.commit()
|
|
398
387
|
|
|
@@ -404,11 +393,7 @@ class QueryContextController:
|
|
|
404
393
|
"""
|
|
405
394
|
last_values = {}
|
|
406
395
|
for query, info in l_query.get_init_queries():
|
|
407
|
-
|
|
408
|
-
response = dn.query(
|
|
409
|
-
query=query,
|
|
410
|
-
session=session
|
|
411
|
-
)
|
|
396
|
+
response = dn.query(query=query, session=session)
|
|
412
397
|
data = response.data_frame
|
|
413
398
|
columns_info = response.columns
|
|
414
399
|
|
|
@@ -419,7 +404,7 @@ class QueryContextController:
|
|
|
419
404
|
|
|
420
405
|
idx = None
|
|
421
406
|
for i, col in enumerate(columns_info):
|
|
422
|
-
if col[
|
|
407
|
+
if col["name"].upper() == info["column_name"].upper():
|
|
423
408
|
idx = i
|
|
424
409
|
break
|
|
425
410
|
|
|
@@ -429,7 +414,7 @@ class QueryContextController:
|
|
|
429
414
|
value = row[idx]
|
|
430
415
|
|
|
431
416
|
if value is not None:
|
|
432
|
-
last_values[info[
|
|
417
|
+
last_values[info["table_name"]] = {info["column_name"]: value}
|
|
433
418
|
|
|
434
419
|
return last_values
|
|
435
420
|
|
|
@@ -446,7 +431,7 @@ class QueryContextController:
|
|
|
446
431
|
if len(context_stack) > 0:
|
|
447
432
|
return context_stack[-1]
|
|
448
433
|
else:
|
|
449
|
-
return
|
|
434
|
+
return ""
|
|
450
435
|
|
|
451
436
|
def set_context(self, object_type: str = None, object_id: int = None):
|
|
452
437
|
"""
|
|
@@ -482,9 +467,9 @@ class QueryContextController:
|
|
|
482
467
|
"""
|
|
483
468
|
|
|
484
469
|
if object_type is None:
|
|
485
|
-
return
|
|
470
|
+
return ""
|
|
486
471
|
if object_id is not None:
|
|
487
|
-
object_type +=
|
|
472
|
+
object_type += "-" + str(object_id)
|
|
488
473
|
return object_type
|
|
489
474
|
|
|
490
475
|
def get_context_vars(self, object_type: str, object_id: int) -> List[dict]:
|
|
@@ -495,10 +480,7 @@ class QueryContextController:
|
|
|
495
480
|
"""
|
|
496
481
|
context_name = self.gen_context_name(object_type, object_id)
|
|
497
482
|
vars = []
|
|
498
|
-
for rec in db.session.query(db.QueryContext).filter_by(
|
|
499
|
-
context_name=context_name,
|
|
500
|
-
company_id=ctx.company_id
|
|
501
|
-
):
|
|
483
|
+
for rec in db.session.query(db.QueryContext).filter_by(context_name=context_name, company_id=ctx.company_id):
|
|
502
484
|
if rec.values is not None:
|
|
503
485
|
vars.append(rec.values)
|
|
504
486
|
|
|
@@ -510,21 +492,17 @@ class QueryContextController:
|
|
|
510
492
|
Find and return record for context and query string
|
|
511
493
|
"""
|
|
512
494
|
|
|
513
|
-
return
|
|
514
|
-
query
|
|
515
|
-
context_name=context_name,
|
|
516
|
-
|
|
517
|
-
)
|
|
495
|
+
return (
|
|
496
|
+
db.session.query(db.QueryContext)
|
|
497
|
+
.filter_by(query=query_str, context_name=context_name, company_id=ctx.company_id)
|
|
498
|
+
.first()
|
|
499
|
+
)
|
|
518
500
|
|
|
519
501
|
def __add_context_record(self, context_name: str, query_str: str, values: dict) -> db.QueryContext:
|
|
520
502
|
"""
|
|
521
503
|
Creates record (for context and query string) with values and returns it
|
|
522
504
|
"""
|
|
523
|
-
rec = db.QueryContext(
|
|
524
|
-
query=query_str,
|
|
525
|
-
context_name=context_name,
|
|
526
|
-
company_id=ctx.company_id,
|
|
527
|
-
values=values)
|
|
505
|
+
rec = db.QueryContext(query=query_str, context_name=context_name, company_id=ctx.company_id, values=values)
|
|
528
506
|
db.session.add(rec)
|
|
529
507
|
return rec
|
|
530
508
|
|
|
@@ -538,27 +516,23 @@ class QueryContextController:
|
|
|
538
516
|
|
|
539
517
|
def get_query(self, query_id: int) -> RunningQuery:
|
|
540
518
|
"""
|
|
541
|
-
|
|
519
|
+
Get running query by id
|
|
542
520
|
"""
|
|
543
521
|
|
|
544
|
-
rec = db.Queries.query.filter(
|
|
545
|
-
db.Queries.id == query_id,
|
|
546
|
-
db.Queries.company_id == ctx.company_id
|
|
547
|
-
).first()
|
|
522
|
+
rec = db.Queries.query.filter(db.Queries.id == query_id, db.Queries.company_id == ctx.company_id).first()
|
|
548
523
|
|
|
549
524
|
if rec is None:
|
|
550
|
-
raise RuntimeError(f
|
|
525
|
+
raise RuntimeError(f"Query not found: {query_id}")
|
|
551
526
|
return RunningQuery(rec)
|
|
552
527
|
|
|
553
528
|
def create_query(self, query: ASTNode, database: str = None) -> RunningQuery:
|
|
554
529
|
"""
|
|
555
|
-
|
|
530
|
+
Create a new running query from AST query
|
|
556
531
|
"""
|
|
557
532
|
|
|
558
533
|
# remove old queries
|
|
559
534
|
remove_query = db.session.query(db.Queries).filter(
|
|
560
|
-
db.Queries.company_id == ctx.company_id,
|
|
561
|
-
db.Queries.finished_at < (dt.datetime.now() - dt.timedelta(days=1))
|
|
535
|
+
db.Queries.company_id == ctx.company_id, db.Queries.finished_at < (dt.datetime.now() - dt.timedelta(days=1))
|
|
562
536
|
)
|
|
563
537
|
for rec in remove_query.all():
|
|
564
538
|
self.get_query(rec.id).remove_from_task()
|
|
@@ -576,27 +550,19 @@ class QueryContextController:
|
|
|
576
550
|
|
|
577
551
|
def list_queries(self) -> List[dict]:
|
|
578
552
|
"""
|
|
579
|
-
|
|
553
|
+
Get list of all running queries with metadata
|
|
580
554
|
"""
|
|
581
555
|
|
|
582
|
-
query = db.session.query(db.Queries).filter(
|
|
583
|
-
|
|
584
|
-
)
|
|
585
|
-
return [
|
|
586
|
-
RunningQuery(record).get_info()
|
|
587
|
-
for record in query
|
|
588
|
-
]
|
|
556
|
+
query = db.session.query(db.Queries).filter(db.Queries.company_id == ctx.company_id)
|
|
557
|
+
return [RunningQuery(record).get_info() for record in query]
|
|
589
558
|
|
|
590
559
|
def cancel_query(self, query_id: int):
|
|
591
560
|
"""
|
|
592
|
-
|
|
561
|
+
Cancels running query by id
|
|
593
562
|
"""
|
|
594
|
-
rec = db.Queries.query.filter(
|
|
595
|
-
db.Queries.id == query_id,
|
|
596
|
-
db.Queries.company_id == ctx.company_id
|
|
597
|
-
).first()
|
|
563
|
+
rec = db.Queries.query.filter(db.Queries.id == query_id, db.Queries.company_id == ctx.company_id).first()
|
|
598
564
|
if rec is None:
|
|
599
|
-
raise RuntimeError(f
|
|
565
|
+
raise RuntimeError(f"Query not found: {query_id}")
|
|
600
566
|
|
|
601
567
|
self.get_query(rec.id).remove_from_task()
|
|
602
568
|
|