MindsDB 25.1.2.1__py3-none-any.whl → 25.1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/METADATA +246 -255
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/RECORD +94 -83
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +5 -3
- mindsdb/api/executor/__init__.py +0 -1
- mindsdb/api/executor/command_executor.py +2 -1
- mindsdb/api/executor/data_types/answer.py +1 -1
- mindsdb/api/executor/datahub/datanodes/datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +8 -3
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +9 -26
- mindsdb/api/executor/sql_query/__init__.py +1 -0
- mindsdb/api/executor/sql_query/result_set.py +36 -21
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/join_step.py +4 -4
- mindsdb/api/executor/sql_query/steps/map_reduce_step.py +6 -39
- mindsdb/api/executor/utilities/sql.py +2 -10
- mindsdb/api/http/namespaces/agents.py +3 -1
- mindsdb/api/http/namespaces/knowledge_bases.py +3 -3
- mindsdb/api/http/namespaces/sql.py +3 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +7 -0
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +2 -1
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +2 -2
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/databricks_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/file_handler.py +1 -1
- mindsdb/integrations/handlers/file_handler/requirements.txt +0 -4
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +17 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +15 -1
- mindsdb/integrations/handlers/jira_handler/jira_table.py +52 -31
- mindsdb/integrations/handlers/langchain_embedding_handler/fastapi_embeddings.py +82 -0
- mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +8 -1
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +8 -0
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +50 -16
- mindsdb/integrations/handlers/pinecone_handler/pinecone_handler.py +123 -72
- mindsdb/integrations/handlers/pinecone_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +12 -6
- mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +5 -3
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +13 -2
- mindsdb/integrations/handlers/slack_handler/slack_tables.py +21 -1
- mindsdb/integrations/handlers/web_handler/requirements.txt +0 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +2 -2
- mindsdb/integrations/utilities/files/__init__.py +0 -0
- mindsdb/integrations/utilities/files/file_reader.py +258 -0
- mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +2 -1
- mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py +8 -3
- mindsdb/integrations/utilities/rag/chains/map_reduce_summarizer_chain.py +5 -9
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py +76 -27
- mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +18 -1
- mindsdb/integrations/utilities/rag/pipelines/rag.py +74 -21
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +166 -108
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +108 -78
- mindsdb/integrations/utilities/rag/settings.py +37 -16
- mindsdb/integrations/utilities/sql_utils.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +18 -8
- mindsdb/interfaces/agents/constants.py +1 -0
- mindsdb/interfaces/agents/langchain_agent.py +124 -157
- mindsdb/interfaces/agents/langfuse_callback_handler.py +4 -37
- mindsdb/interfaces/agents/mindsdb_database_agent.py +21 -13
- mindsdb/interfaces/chatbot/chatbot_controller.py +7 -11
- mindsdb/interfaces/chatbot/chatbot_task.py +16 -5
- mindsdb/interfaces/chatbot/memory.py +58 -13
- mindsdb/interfaces/database/integrations.py +5 -1
- mindsdb/interfaces/database/projects.py +55 -16
- mindsdb/interfaces/database/views.py +12 -25
- mindsdb/interfaces/knowledge_base/controller.py +39 -15
- mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +7 -26
- mindsdb/interfaces/model/functions.py +15 -4
- mindsdb/interfaces/model/model_controller.py +4 -7
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +51 -40
- mindsdb/interfaces/skills/retrieval_tool.py +10 -3
- mindsdb/interfaces/skills/skill_tool.py +97 -54
- mindsdb/interfaces/skills/skills_controller.py +7 -3
- mindsdb/interfaces/skills/sql_agent.py +127 -41
- mindsdb/interfaces/storage/db.py +1 -1
- mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py +88 -0
- mindsdb/utilities/cache.py +7 -4
- mindsdb/utilities/context.py +11 -1
- mindsdb/utilities/langfuse.py +279 -0
- mindsdb/utilities/log.py +20 -2
- mindsdb/utilities/otel/__init__.py +206 -0
- mindsdb/utilities/otel/logger.py +25 -0
- mindsdb/utilities/otel/meter.py +19 -0
- mindsdb/utilities/otel/metric_handlers/__init__.py +25 -0
- mindsdb/utilities/otel/tracer.py +16 -0
- mindsdb/utilities/partitioning.py +52 -0
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- mindsdb/utilities/utils.py +34 -0
- mindsdb/utilities/otel.py +0 -72
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.2.1.dist-info → MindsDB-25.1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -14,9 +14,7 @@ from mindsdb_sql_parser.ast import (
|
|
|
14
14
|
from mindsdb.utilities.exception import EntityNotExistsError
|
|
15
15
|
from mindsdb.api.executor.datahub.datanodes.datanode import DataNode
|
|
16
16
|
from mindsdb.api.executor.datahub.classes.tables_row import TablesRow
|
|
17
|
-
from mindsdb.
|
|
18
|
-
from mindsdb.api.executor.utilities.sql import query_df
|
|
19
|
-
from mindsdb.interfaces.query_context.context_controller import query_context_controller
|
|
17
|
+
from mindsdb.utilities.partitioning import process_dataframe_in_partitions
|
|
20
18
|
|
|
21
19
|
|
|
22
20
|
class ProjectDataNode(DataNode):
|
|
@@ -51,7 +49,7 @@ class ProjectDataNode(DataNode):
|
|
|
51
49
|
tables = self.project.get_tables()
|
|
52
50
|
return table_name in tables
|
|
53
51
|
|
|
54
|
-
def get_table_columns(self, table_name):
|
|
52
|
+
def get_table_columns(self, table_name, schema_name=None):
|
|
55
53
|
return [
|
|
56
54
|
{'name': name}
|
|
57
55
|
for name in self.project.get_columns(table_name)
|
|
@@ -65,6 +63,12 @@ class ProjectDataNode(DataNode):
|
|
|
65
63
|
if model_metadata['update_status'] == 'available':
|
|
66
64
|
raise Exception(f"model '{model_name}' is obsolete and needs to be updated. Run 'RETRAIN {model_name};'")
|
|
67
65
|
ml_handler = self.integration_controller.get_ml_handler(model_metadata['engine_name'])
|
|
66
|
+
if params is not None and 'partition_size' in params:
|
|
67
|
+
def callback(chunk):
|
|
68
|
+
return ml_handler.predict(model_name, chunk, project_name=self.project.name,
|
|
69
|
+
version=version, params=params)
|
|
70
|
+
return pd.concat(process_dataframe_in_partitions(df, callback, params['partition_size']))
|
|
71
|
+
|
|
68
72
|
return ml_handler.predict(model_name, df, project_name=self.project.name, version=version, params=params)
|
|
69
73
|
|
|
70
74
|
def query(self, query=None, native_query=None, session=None):
|
|
@@ -115,28 +119,7 @@ class ProjectDataNode(DataNode):
|
|
|
115
119
|
|
|
116
120
|
if self.project.get_view(query_table):
|
|
117
121
|
# this is the view
|
|
118
|
-
|
|
119
|
-
view_meta = self.project.query_view(query)
|
|
120
|
-
|
|
121
|
-
query_context_controller.set_context('view', view_meta['id'])
|
|
122
|
-
|
|
123
|
-
try:
|
|
124
|
-
sqlquery = SQLQuery(
|
|
125
|
-
view_meta['query_ast'],
|
|
126
|
-
session=session
|
|
127
|
-
)
|
|
128
|
-
result = sqlquery.fetch(view='dataframe')
|
|
129
|
-
|
|
130
|
-
finally:
|
|
131
|
-
query_context_controller.release_context('view', view_meta['id'])
|
|
132
|
-
|
|
133
|
-
if result['success'] is False:
|
|
134
|
-
raise Exception(f"Cant execute view query: {view_meta['query_ast']}")
|
|
135
|
-
df = result['result']
|
|
136
|
-
# remove duplicated columns
|
|
137
|
-
df = df.loc[:, ~df.columns.duplicated()]
|
|
138
|
-
|
|
139
|
-
df = query_df(df, query, session=session)
|
|
122
|
+
df = self.project.query_view(query, session)
|
|
140
123
|
|
|
141
124
|
columns_info = [
|
|
142
125
|
{
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .sql_query import SQLQuery
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
from typing import List
|
|
2
1
|
import copy
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
3
4
|
import numpy as np
|
|
4
5
|
import pandas as pd
|
|
5
6
|
|
|
@@ -35,6 +36,19 @@ class Column:
|
|
|
35
36
|
return f'{self.__class__.__name__}({self.__dict__})'
|
|
36
37
|
|
|
37
38
|
|
|
39
|
+
def rename_df_columns(df: pd.DataFrame, names: Optional[List] = None) -> None:
|
|
40
|
+
"""Inplace rename of dataframe columns
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
df (pd.DataFrame): dataframe
|
|
44
|
+
names (Optional[List]): columns names to set
|
|
45
|
+
"""
|
|
46
|
+
if names is not None:
|
|
47
|
+
df.columns = names
|
|
48
|
+
else:
|
|
49
|
+
df.columns = list(range(len(df.columns)))
|
|
50
|
+
|
|
51
|
+
|
|
38
52
|
class ResultSet:
|
|
39
53
|
def __init__(self, columns=None, values: List[List] = None, df: pd.DataFrame = None):
|
|
40
54
|
'''
|
|
@@ -73,20 +87,19 @@ class ResultSet:
|
|
|
73
87
|
# --- converters ---
|
|
74
88
|
|
|
75
89
|
def from_df(self, df, database=None, table_name=None, table_alias=None):
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
for i, col in enumerate(df.columns):
|
|
80
|
-
self._columns.append(Column(
|
|
81
|
-
name=col,
|
|
90
|
+
self._columns = [
|
|
91
|
+
Column(
|
|
92
|
+
name=column_name,
|
|
82
93
|
table_name=table_name,
|
|
83
94
|
table_alias=table_alias,
|
|
84
95
|
database=database,
|
|
85
|
-
type=
|
|
86
|
-
)
|
|
96
|
+
type=column_dtype
|
|
97
|
+
) for column_name, column_dtype
|
|
98
|
+
in zip(df.columns, df.dtypes)
|
|
99
|
+
]
|
|
87
100
|
|
|
88
|
-
|
|
89
|
-
self._df = df
|
|
101
|
+
rename_df_columns(df)
|
|
102
|
+
self._df = df
|
|
90
103
|
|
|
91
104
|
return self
|
|
92
105
|
|
|
@@ -97,9 +110,6 @@ class ResultSet:
|
|
|
97
110
|
if col.alias is not None:
|
|
98
111
|
alias_idx[col.alias] = col
|
|
99
112
|
|
|
100
|
-
# resp_dict = df.to_dict(orient='split')
|
|
101
|
-
# self._records = resp_dict['data']
|
|
102
|
-
|
|
103
113
|
for col in df.columns:
|
|
104
114
|
if col in col_names or strict:
|
|
105
115
|
column = col_names[col]
|
|
@@ -109,13 +119,16 @@ class ResultSet:
|
|
|
109
119
|
column = Column(col)
|
|
110
120
|
self._columns.append(column)
|
|
111
121
|
|
|
112
|
-
|
|
122
|
+
rename_df_columns(df)
|
|
123
|
+
self._df = df
|
|
113
124
|
|
|
114
125
|
return self
|
|
115
126
|
|
|
116
127
|
def to_df(self):
|
|
117
|
-
|
|
118
|
-
|
|
128
|
+
columns_names = self.get_column_names()
|
|
129
|
+
df = self.get_raw_df()
|
|
130
|
+
rename_df_columns(df, columns_names)
|
|
131
|
+
return df
|
|
119
132
|
|
|
120
133
|
def to_df_cols(self, prefix=''):
|
|
121
134
|
# returns dataframe and dict of columns
|
|
@@ -128,7 +141,9 @@ class ResultSet:
|
|
|
128
141
|
columns.append(name)
|
|
129
142
|
col_names[name] = col
|
|
130
143
|
|
|
131
|
-
|
|
144
|
+
df = self.get_raw_df()
|
|
145
|
+
rename_df_columns(df, columns)
|
|
146
|
+
return df, col_names
|
|
132
147
|
|
|
133
148
|
# --- tables ---
|
|
134
149
|
|
|
@@ -174,7 +189,7 @@ class ResultSet:
|
|
|
174
189
|
self._columns.pop(idx)
|
|
175
190
|
|
|
176
191
|
self._df.drop(idx, axis=1, inplace=True)
|
|
177
|
-
|
|
192
|
+
rename_df_columns(self._df)
|
|
178
193
|
|
|
179
194
|
@property
|
|
180
195
|
def columns(self):
|
|
@@ -226,7 +241,7 @@ class ResultSet:
|
|
|
226
241
|
if len(df.columns) != len(self._columns):
|
|
227
242
|
raise WrongArgumentError(f'Record length mismatch columns length: {len(df.columns)} != {len(self.columns)}')
|
|
228
243
|
|
|
229
|
-
|
|
244
|
+
rename_df_columns(df)
|
|
230
245
|
|
|
231
246
|
if self._df is None:
|
|
232
247
|
self._df = df
|
|
@@ -269,7 +284,7 @@ class ResultSet:
|
|
|
269
284
|
def get_column_values(self, col_idx):
|
|
270
285
|
# get by column index
|
|
271
286
|
df = self.get_raw_df()
|
|
272
|
-
return list(df[col_idx])
|
|
287
|
+
return list(df[df.columns[col_idx]])
|
|
273
288
|
|
|
274
289
|
def set_column_values(self, col_name, values):
|
|
275
290
|
# values is one value or list of values
|
|
@@ -213,7 +213,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
213
213
|
columns = list(table_df.columns)
|
|
214
214
|
for col_idx, name in cols_to_rename.items():
|
|
215
215
|
columns[col_idx] = name
|
|
216
|
-
table_df =
|
|
216
|
+
table_df.columns = columns
|
|
217
217
|
|
|
218
218
|
version = None
|
|
219
219
|
if len(step.predictor.parts) > 1 and step.predictor.parts[-1].isdigit():
|
|
@@ -90,15 +90,15 @@ class JoinStepCall(BaseStepCall):
|
|
|
90
90
|
table_b, names_b = right_data.to_df_cols(prefix='B')
|
|
91
91
|
|
|
92
92
|
query = f"""
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
93
|
+
SELECT * FROM table_a {join_type} table_b
|
|
94
|
+
ON {join_condition}
|
|
95
|
+
"""
|
|
96
96
|
resp_df, _description = query_df_with_type_infer_fallback(query, {
|
|
97
97
|
'table_a': table_a,
|
|
98
98
|
'table_b': table_b
|
|
99
99
|
})
|
|
100
100
|
|
|
101
|
-
resp_df
|
|
101
|
+
resp_df.replace({np.nan: None}, inplace=True)
|
|
102
102
|
|
|
103
103
|
names_a.update(names_b)
|
|
104
104
|
data = ResultSet().from_df_cols(resp_df, col_names=names_a)
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import copy
|
|
3
2
|
|
|
4
3
|
from mindsdb_sql_parser.ast import (
|
|
@@ -15,8 +14,7 @@ from mindsdb.api.executor.planner.steps import (
|
|
|
15
14
|
|
|
16
15
|
from mindsdb.api.executor.sql_query.result_set import ResultSet
|
|
17
16
|
from mindsdb.api.executor.exceptions import LogicError
|
|
18
|
-
from mindsdb.utilities.
|
|
19
|
-
from mindsdb.utilities.context_executor import execute_in_threads
|
|
17
|
+
from mindsdb.utilities.partitioning import process_dataframe_in_partitions
|
|
20
18
|
|
|
21
19
|
from .base import BaseStepCall
|
|
22
20
|
|
|
@@ -88,43 +86,12 @@ class MapReduceStepCall(BaseStepCall):
|
|
|
88
86
|
|
|
89
87
|
df = input_data.get_raw_df()
|
|
90
88
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
chunk = 0
|
|
94
|
-
while chunk * partition < len(df):
|
|
95
|
-
# create results with partition
|
|
96
|
-
df1 = df.iloc[chunk * partition: (chunk + 1) * partition]
|
|
97
|
-
chunk += 1
|
|
98
|
-
yield df1, substeps, input_idx, input_columns
|
|
99
|
-
|
|
100
|
-
tasks = split_data_f(df)
|
|
101
|
-
|
|
102
|
-
# workers count
|
|
103
|
-
is_cloud = Config().get('cloud', False)
|
|
104
|
-
if is_cloud:
|
|
105
|
-
max_threads = int(os.getenv('MAX_QUERY_PARTITIONS', 10))
|
|
106
|
-
else:
|
|
107
|
-
max_threads = os.cpu_count() - 2
|
|
108
|
-
|
|
109
|
-
# don't exceed chunk_count
|
|
110
|
-
chunk_count = int(len(df) / partition)
|
|
111
|
-
max_threads = min(max_threads, chunk_count)
|
|
112
|
-
|
|
113
|
-
if max_threads < 1:
|
|
114
|
-
max_threads = 1
|
|
89
|
+
def callback(chunk):
|
|
90
|
+
return self._exec_partition(chunk, substeps, input_idx, input_columns)
|
|
115
91
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
for task in tasks:
|
|
120
|
-
sub_data = self._exec_partition(*task)
|
|
121
|
-
if sub_data:
|
|
122
|
-
data = join_query_data(data, sub_data)
|
|
123
|
-
|
|
124
|
-
else:
|
|
125
|
-
for sub_data in execute_in_threads(self._exec_partition, tasks, thread_count=max_threads):
|
|
126
|
-
if sub_data:
|
|
127
|
-
data = join_query_data(data, sub_data)
|
|
92
|
+
for result in process_dataframe_in_partitions(df, callback, partition):
|
|
93
|
+
if result:
|
|
94
|
+
data = join_query_data(data, result)
|
|
128
95
|
|
|
129
96
|
return data
|
|
130
97
|
|
|
@@ -182,14 +182,6 @@ def query_df(df, query, session=None):
|
|
|
182
182
|
df = df.astype({'CONNECTION_DATA': 'string'})
|
|
183
183
|
|
|
184
184
|
result_df, description = query_df_with_type_infer_fallback(query_str, {'df': df}, user_functions=user_functions)
|
|
185
|
-
result_df
|
|
186
|
-
|
|
187
|
-
new_column_names = {}
|
|
188
|
-
real_column_names = [x[0] for x in description]
|
|
189
|
-
for i, duck_column_name in enumerate(result_df.columns):
|
|
190
|
-
new_column_names[duck_column_name] = real_column_names[i]
|
|
191
|
-
result_df = result_df.rename(
|
|
192
|
-
new_column_names,
|
|
193
|
-
axis='columns'
|
|
194
|
-
)
|
|
185
|
+
result_df.replace({np.nan: None}, inplace=True)
|
|
186
|
+
result_df.columns = [x[0] for x in description]
|
|
195
187
|
return result_df
|
|
@@ -475,11 +475,13 @@ class AgentCompletions(Resource):
|
|
|
475
475
|
|
|
476
476
|
output_col = agents_controller.assistant_column
|
|
477
477
|
model_output = completion.iloc[-1][output_col]
|
|
478
|
+
trace_id = completion.iloc[-1]['trace_id']
|
|
478
479
|
|
|
479
480
|
response = {
|
|
480
481
|
'message': {
|
|
481
482
|
'content': model_output,
|
|
482
|
-
'role': 'assistant'
|
|
483
|
+
'role': 'assistant',
|
|
484
|
+
'trace_id': trace_id
|
|
483
485
|
}
|
|
484
486
|
}
|
|
485
487
|
|
|
@@ -185,8 +185,10 @@ class KnowledgeBaseResource(Resource):
|
|
|
185
185
|
)
|
|
186
186
|
|
|
187
187
|
try:
|
|
188
|
+
kb_data = request.json['knowledge_base']
|
|
189
|
+
|
|
188
190
|
# Retrieve the knowledge base table for updates
|
|
189
|
-
table = session.kb_controller.get_table(knowledge_base_name, project.id)
|
|
191
|
+
table = session.kb_controller.get_table(knowledge_base_name, project.id, params=kb_data.get('params'))
|
|
190
192
|
if table is None:
|
|
191
193
|
return http_error(
|
|
192
194
|
HTTPStatus.NOT_FOUND,
|
|
@@ -194,8 +196,6 @@ class KnowledgeBaseResource(Resource):
|
|
|
194
196
|
f'Knowledge Base with name {knowledge_base_name} does not exist'
|
|
195
197
|
)
|
|
196
198
|
|
|
197
|
-
kb_data = request.json['knowledge_base']
|
|
198
|
-
|
|
199
199
|
# Set up dependencies for DocumentLoader
|
|
200
200
|
file_controller = FileController()
|
|
201
201
|
file_splitter_config = FileSplitterConfig()
|
|
@@ -78,6 +78,7 @@ class Query(Resource):
|
|
|
78
78
|
"error_code": 0,
|
|
79
79
|
"error_message": str(e),
|
|
80
80
|
}
|
|
81
|
+
logger.error(f"Error query processing: \n{traceback.format_exc()}")
|
|
81
82
|
|
|
82
83
|
except UnknownError as e:
|
|
83
84
|
# unclassified
|
|
@@ -87,6 +88,7 @@ class Query(Resource):
|
|
|
87
88
|
"error_code": 0,
|
|
88
89
|
"error_message": str(e),
|
|
89
90
|
}
|
|
91
|
+
logger.error(f"Error query processing: \n{traceback.format_exc()}")
|
|
90
92
|
|
|
91
93
|
except Exception as e:
|
|
92
94
|
error_type = "unexpected"
|
|
@@ -95,7 +97,7 @@ class Query(Resource):
|
|
|
95
97
|
"error_code": 0,
|
|
96
98
|
"error_message": str(e),
|
|
97
99
|
}
|
|
98
|
-
logger.
|
|
100
|
+
logger.error(f"Error query processing: \n{traceback.format_exc()}")
|
|
99
101
|
|
|
100
102
|
if query_response.get("type") == SQL_RESPONSE_TYPE.ERROR:
|
|
101
103
|
error_type = "expected"
|
|
@@ -2,7 +2,8 @@ from mindsdb_sql_parser import parse_sql
|
|
|
2
2
|
from mindsdb.api.executor.planner import utils as planner_utils
|
|
3
3
|
|
|
4
4
|
import mindsdb.utilities.profiler as profiler
|
|
5
|
-
from mindsdb.api.executor import Column
|
|
5
|
+
from mindsdb.api.executor.sql_query.result_set import Column
|
|
6
|
+
from mindsdb.api.executor.sql_query import SQLQuery
|
|
6
7
|
from mindsdb.api.executor.command_executor import ExecuteCommands
|
|
7
8
|
from mindsdb.api.mysql.mysql_proxy.utilities import ErSqlSyntaxError
|
|
8
9
|
from mindsdb.utilities import log
|
|
@@ -83,6 +83,7 @@ from mindsdb.api.mysql.mysql_proxy.utilities.lightwood_dtype import dtype
|
|
|
83
83
|
from mindsdb.utilities import log
|
|
84
84
|
from mindsdb.utilities.config import Config
|
|
85
85
|
from mindsdb.utilities.context import context as ctx
|
|
86
|
+
from mindsdb.utilities.otel.metric_handlers import get_query_request_counter
|
|
86
87
|
from mindsdb.utilities.wizards import make_ssl_cert
|
|
87
88
|
|
|
88
89
|
logger = log.getLogger(__name__)
|
|
@@ -562,6 +563,12 @@ class MysqlProxy(SocketServer.BaseRequestHandler):
|
|
|
562
563
|
data=executor.data,
|
|
563
564
|
status=executor.server_status,
|
|
564
565
|
)
|
|
566
|
+
|
|
567
|
+
# Increment the counter and include metadata in attributes
|
|
568
|
+
metadata = ctx.metadata(query=sql)
|
|
569
|
+
query_request_counter = get_query_request_counter()
|
|
570
|
+
query_request_counter.add(1, metadata)
|
|
571
|
+
|
|
565
572
|
return resp
|
|
566
573
|
|
|
567
574
|
def answer_stmt_prepare(self, sql):
|
|
@@ -6,7 +6,8 @@ from mindsdb.api.executor.planner import utils as planner_utils
|
|
|
6
6
|
from numpy import dtype as np_dtype
|
|
7
7
|
from pandas.api import types as pd_types
|
|
8
8
|
|
|
9
|
-
from mindsdb.api.executor import SQLQuery
|
|
9
|
+
from mindsdb.api.executor.sql_query import SQLQuery
|
|
10
|
+
from mindsdb.api.executor.sql_query.result_set import Column
|
|
10
11
|
from mindsdb.api.mysql.mysql_proxy.utilities.lightwood_dtype import dtype
|
|
11
12
|
from mindsdb.api.executor.command_executor import ExecuteCommands
|
|
12
13
|
from mindsdb.api.mysql.mysql_proxy.utilities import SqlApiException
|
|
@@ -286,7 +286,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
286
286
|
else:
|
|
287
287
|
# general get query
|
|
288
288
|
result = collection.get(
|
|
289
|
-
ids=id_filters,
|
|
289
|
+
ids=id_filters or None,
|
|
290
290
|
where=filters,
|
|
291
291
|
limit=limit,
|
|
292
292
|
offset=offset,
|
|
@@ -475,7 +475,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
475
475
|
collections = self._client.list_collections()
|
|
476
476
|
collections_name = pd.DataFrame(
|
|
477
477
|
columns=["table_name"],
|
|
478
|
-
data=
|
|
478
|
+
data=collections,
|
|
479
479
|
)
|
|
480
480
|
return Response(resp_type=RESPONSE_TYPE.TABLE, data_frame=collections_name)
|
|
481
481
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
chromadb~=0.
|
|
1
|
+
chromadb~=0.6.3
|
|
@@ -1 +1 @@
|
|
|
1
|
-
databricks-sql-connector
|
|
1
|
+
databricks-sql-connector >= 3.7.1, < 4.0.0
|
|
@@ -276,7 +276,7 @@ class FileHandler(DatabaseHandler):
|
|
|
276
276
|
|
|
277
277
|
header = df.columns.values.tolist()
|
|
278
278
|
|
|
279
|
-
df
|
|
279
|
+
df.columns = [key.strip() for key in header]
|
|
280
280
|
df = df.applymap(clean_cell)
|
|
281
281
|
|
|
282
282
|
header = [x.strip() for x in header]
|
|
@@ -25,6 +25,11 @@ test_file_content = [
|
|
|
25
25
|
[3, -3, 0.3, "C"],
|
|
26
26
|
]
|
|
27
27
|
|
|
28
|
+
test_excel_sheet_content = [
|
|
29
|
+
["Sheet_Name"],
|
|
30
|
+
["Sheet1"],
|
|
31
|
+
]
|
|
32
|
+
|
|
28
33
|
file_records = [("one", 1, test_file_content[0]), ("two", 2, test_file_content[0])]
|
|
29
34
|
|
|
30
35
|
|
|
@@ -349,7 +354,18 @@ def test_get_file_path_with_url(mock_fetch_url):
|
|
|
349
354
|
],
|
|
350
355
|
)
|
|
351
356
|
def test_handle_source(file_path, expected_columns):
|
|
352
|
-
|
|
357
|
+
sheet_name = None
|
|
358
|
+
# Excel files return a list of sheets when queried without a sheet name
|
|
359
|
+
if file_path.endswith(".xlsx"):
|
|
360
|
+
df, _ = FileHandler._handle_source(file_path)
|
|
361
|
+
assert isinstance(df, pandas.DataFrame)
|
|
362
|
+
|
|
363
|
+
assert df.columns.tolist() == test_excel_sheet_content[0]
|
|
364
|
+
assert len(df) == len(test_excel_sheet_content) - 1
|
|
365
|
+
assert df.values.tolist() == test_excel_sheet_content[1:]
|
|
366
|
+
sheet_name = test_excel_sheet_content[1][0]
|
|
367
|
+
|
|
368
|
+
df, _ = FileHandler._handle_source(file_path, sheet_name=sheet_name)
|
|
353
369
|
assert isinstance(df, pandas.DataFrame)
|
|
354
370
|
assert df.columns.tolist() == expected_columns
|
|
355
371
|
|
|
@@ -55,9 +55,23 @@ class JiraHandler(APIHandler):
|
|
|
55
55
|
return self.connection
|
|
56
56
|
|
|
57
57
|
s = requests.Session()
|
|
58
|
+
if self.connection_data.get("cloud", False):
|
|
59
|
+
params = {
|
|
60
|
+
"cloud": True,
|
|
61
|
+
"username": self.connection_data['jira_username'],
|
|
62
|
+
"password": self.connection_data['jira_api_token'],
|
|
63
|
+
"url": self.connection_data['jira_url'],
|
|
64
|
+
}
|
|
65
|
+
else:
|
|
66
|
+
params = {
|
|
67
|
+
"cloud": False,
|
|
68
|
+
"url": self.connection_data['jira_url'],
|
|
69
|
+
"session": s
|
|
70
|
+
}
|
|
71
|
+
|
|
58
72
|
s.headers['Authorization'] = f"Bearer {self.connection_data['jira_api_token']}"
|
|
59
73
|
|
|
60
|
-
self.connection = Jira(
|
|
74
|
+
self.connection = Jira(**params)
|
|
61
75
|
self.is_connected = True
|
|
62
76
|
|
|
63
77
|
|
|
@@ -10,9 +10,26 @@ from mindsdb_sql_parser import ast
|
|
|
10
10
|
|
|
11
11
|
logger = log.getLogger(__name__)
|
|
12
12
|
|
|
13
|
+
|
|
14
|
+
def flatten_json(nested_json, parent_key="", separator="."):
|
|
15
|
+
"""
|
|
16
|
+
Recursively flattens a nested JSON object into a dictionary with dot notation keys.
|
|
17
|
+
"""
|
|
18
|
+
items = []
|
|
19
|
+
for k, v in nested_json.items():
|
|
20
|
+
new_key = f"{parent_key}{separator}{k}" if parent_key else k
|
|
21
|
+
if isinstance(v, dict):
|
|
22
|
+
items.extend(flatten_json(v, new_key, separator=separator).items())
|
|
23
|
+
else:
|
|
24
|
+
items.append((new_key, v))
|
|
25
|
+
return dict(items)
|
|
26
|
+
|
|
27
|
+
|
|
13
28
|
class JiraProjectsTable(APITable):
|
|
14
29
|
"""Jira Projects Table implementation"""
|
|
30
|
+
|
|
15
31
|
_MAX_API_RESULTS = 100
|
|
32
|
+
|
|
16
33
|
def select(self, query: ast.Select) -> pd.DataFrame:
|
|
17
34
|
"""Pulls data from the Jira "get_all_project_issues" API endpoint
|
|
18
35
|
Parameters
|
|
@@ -42,8 +59,8 @@ class JiraProjectsTable(APITable):
|
|
|
42
59
|
|
|
43
60
|
for an_order in query.order_by:
|
|
44
61
|
if an_order.field.parts[0] != "key":
|
|
45
|
-
continue
|
|
46
|
-
if an_order.field.parts[1] in ["reporter","assignee","status"]:
|
|
62
|
+
continue
|
|
63
|
+
if an_order.field.parts[1] in ["reporter", "assignee", "status"]:
|
|
47
64
|
if issues_kwargs != {}:
|
|
48
65
|
raise ValueError(
|
|
49
66
|
"Duplicate order conditions found for reporter,status and assignee"
|
|
@@ -61,9 +78,9 @@ class JiraProjectsTable(APITable):
|
|
|
61
78
|
raise ValueError(
|
|
62
79
|
f"Order by unknown column {an_order.field.parts[1]}"
|
|
63
80
|
)
|
|
64
|
-
project = self.handler.connection_data[
|
|
81
|
+
project = self.handler.connection_data["project"]
|
|
65
82
|
jira_project_df = self.call_jira_api(project)
|
|
66
|
-
|
|
83
|
+
|
|
67
84
|
selected_columns = []
|
|
68
85
|
for target in query.targets:
|
|
69
86
|
if isinstance(target, ast.Star):
|
|
@@ -74,7 +91,6 @@ class JiraProjectsTable(APITable):
|
|
|
74
91
|
else:
|
|
75
92
|
raise ValueError(f"Unknown query target {type(target)}")
|
|
76
93
|
|
|
77
|
-
|
|
78
94
|
if len(jira_project_df) == 0:
|
|
79
95
|
jira_project_df = pd.DataFrame([], columns=selected_columns)
|
|
80
96
|
return jira_project_df
|
|
@@ -88,7 +104,7 @@ class JiraProjectsTable(APITable):
|
|
|
88
104
|
by=order_by_conditions["columns"],
|
|
89
105
|
ascending=order_by_conditions["ascending"],
|
|
90
106
|
)
|
|
91
|
-
|
|
107
|
+
|
|
92
108
|
if query.limit:
|
|
93
109
|
jira_project_df = jira_project_df.head(total_results)
|
|
94
110
|
|
|
@@ -102,12 +118,12 @@ class JiraProjectsTable(APITable):
|
|
|
102
118
|
List of columns
|
|
103
119
|
"""
|
|
104
120
|
return [
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
121
|
+
"key",
|
|
122
|
+
"summary",
|
|
123
|
+
"status",
|
|
124
|
+
"reporter",
|
|
125
|
+
"assignee",
|
|
126
|
+
"priority",
|
|
111
127
|
]
|
|
112
128
|
|
|
113
129
|
def call_jira_api(self, project):
|
|
@@ -116,36 +132,41 @@ class JiraProjectsTable(APITable):
|
|
|
116
132
|
max_records = jira.get_project_issues_count(project)
|
|
117
133
|
max_records = 100
|
|
118
134
|
jql_query = self.handler.construct_jql()
|
|
119
|
-
max_results = self._MAX_API_RESULTS
|
|
135
|
+
max_results = self._MAX_API_RESULTS
|
|
120
136
|
start_index = 0
|
|
121
137
|
total = 1
|
|
122
138
|
fields = [
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
139
|
+
"key",
|
|
140
|
+
"fields.summary",
|
|
141
|
+
"fields.status.name",
|
|
142
|
+
"fields.reporter.displayName",
|
|
143
|
+
"fields.assignee.displayName",
|
|
144
|
+
"fields.priority.name",
|
|
129
145
|
]
|
|
130
146
|
|
|
131
147
|
all_jira_issues_df = pd.DataFrame(columns=fields)
|
|
132
148
|
|
|
133
149
|
while start_index <= total:
|
|
134
|
-
results = self.handler.connect().jql(
|
|
135
|
-
|
|
150
|
+
results = self.handler.connect().jql(
|
|
151
|
+
jql_query, start=start_index, limit=max_results
|
|
152
|
+
)
|
|
153
|
+
flattened_data = [flatten_json(item) for item in results["issues"]]
|
|
154
|
+
df = pd.DataFrame(flattened_data)
|
|
136
155
|
df = df[fields]
|
|
137
156
|
start_index += max_results
|
|
138
|
-
total =
|
|
157
|
+
total = results["total"]
|
|
139
158
|
all_jira_issues_df = pd.concat([all_jira_issues_df, df], axis=0)
|
|
140
159
|
|
|
160
|
+
all_jira_issues_df = all_jira_issues_df.rename(
|
|
161
|
+
columns={
|
|
162
|
+
"key": "key",
|
|
163
|
+
"fields.summary": "summary",
|
|
164
|
+
"fields.reporter.displayName": "reporter",
|
|
165
|
+
"fields.assignee.displayName": "assignee",
|
|
166
|
+
"fields.priority.name": "priority",
|
|
167
|
+
"fields.status.name": "status",
|
|
168
|
+
},
|
|
169
|
+
errors="ignore",
|
|
170
|
+
)
|
|
141
171
|
|
|
142
|
-
all_jira_issues_df = all_jira_issues_df.rename(columns={
|
|
143
|
-
'key': 'key',
|
|
144
|
-
'fields.summary': 'summary',
|
|
145
|
-
'fields.reporter.name':'reporter',
|
|
146
|
-
'fields.assignee.name':'assignee',
|
|
147
|
-
'fields.priority.name':'priority',
|
|
148
|
-
'fields.status.name':'status'})
|
|
149
|
-
|
|
150
172
|
return all_jira_issues_df
|
|
151
|
-
|