MindsDB 25.2.3.0__py3-none-any.whl → 25.3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +16 -11
- mindsdb/api/executor/command_executor.py +1 -1
- mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -2
- mindsdb/api/executor/planner/query_planner.py +6 -2
- mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -1
- mindsdb/api/http/initialize.py +8 -5
- mindsdb/api/http/namespaces/agents.py +0 -7
- mindsdb/api/http/namespaces/config.py +0 -48
- mindsdb/api/http/namespaces/knowledge_bases.py +1 -1
- mindsdb/api/http/namespaces/util.py +0 -28
- mindsdb/api/mongo/classes/query_sql.py +2 -1
- mindsdb/api/mongo/responders/aggregate.py +2 -2
- mindsdb/api/mongo/responders/coll_stats.py +3 -2
- mindsdb/api/mongo/responders/db_stats.py +2 -1
- mindsdb/api/mongo/responders/insert.py +4 -2
- mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +5 -4
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +2 -4
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
- mindsdb/integrations/handlers/dspy_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/gmail_handler/connection_args.py +2 -2
- mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +19 -66
- mindsdb/integrations/handlers/gmail_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/google_calendar_handler/connection_args.py +15 -0
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +31 -41
- mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/langchain_embedding_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/langchain_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/openai_handler/constants.py +3 -1
- mindsdb/integrations/handlers/openai_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/rag_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +33 -8
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +3 -2
- mindsdb/integrations/handlers/web_handler/web_handler.py +42 -33
- mindsdb/integrations/handlers/youtube_handler/__init__.py +2 -0
- mindsdb/integrations/handlers/youtube_handler/connection_args.py +32 -0
- mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +2 -38
- mindsdb/integrations/libs/llm/utils.py +7 -1
- mindsdb/integrations/libs/process_cache.py +2 -2
- mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py +29 -38
- mindsdb/integrations/utilities/pydantic_utils.py +208 -0
- mindsdb/integrations/utilities/rag/chains/local_context_summarizer_chain.py +227 -0
- mindsdb/integrations/utilities/rag/pipelines/rag.py +11 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +800 -135
- mindsdb/integrations/utilities/rag/settings.py +390 -152
- mindsdb/integrations/utilities/sql_utils.py +2 -1
- mindsdb/interfaces/agents/agents_controller.py +14 -10
- mindsdb/interfaces/agents/callback_handlers.py +52 -5
- mindsdb/interfaces/agents/langchain_agent.py +5 -3
- mindsdb/interfaces/agents/mindsdb_chat_model.py +4 -2
- mindsdb/interfaces/chatbot/chatbot_controller.py +9 -8
- mindsdb/interfaces/database/database.py +3 -2
- mindsdb/interfaces/database/integrations.py +1 -1
- mindsdb/interfaces/database/projects.py +28 -2
- mindsdb/interfaces/jobs/jobs_controller.py +4 -1
- mindsdb/interfaces/jobs/scheduler.py +1 -1
- mindsdb/interfaces/knowledge_base/preprocessing/constants.py +2 -2
- mindsdb/interfaces/model/model_controller.py +5 -2
- mindsdb/interfaces/skills/retrieval_tool.py +128 -39
- mindsdb/interfaces/skills/skill_tool.py +7 -7
- mindsdb/interfaces/skills/skills_controller.py +10 -6
- mindsdb/interfaces/skills/sql_agent.py +6 -1
- mindsdb/interfaces/storage/db.py +14 -12
- mindsdb/interfaces/storage/json.py +59 -0
- mindsdb/interfaces/storage/model_fs.py +85 -3
- mindsdb/interfaces/triggers/triggers_controller.py +2 -1
- mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py +17 -3
- mindsdb/migrations/versions/2025-02-10_6ab9903fc59a_del_log_table.py +33 -0
- mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py +29 -0
- mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py +41 -0
- mindsdb/utilities/config.py +6 -1
- mindsdb/utilities/functions.py +11 -0
- mindsdb/utilities/log.py +17 -2
- mindsdb/utilities/ml_task_queue/consumer.py +4 -2
- mindsdb/utilities/render/sqlalchemy_render.py +4 -0
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/METADATA +226 -247
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/RECORD +83 -80
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/WHEEL +1 -1
- mindsdb/integrations/handlers/gmail_handler/utils.py +0 -45
- mindsdb/utilities/log_controller.py +0 -39
- mindsdb/utilities/telemetry.py +0 -44
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,33 +1,24 @@
|
|
|
1
|
+
import traceback
|
|
2
|
+
|
|
1
3
|
from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
|
|
2
4
|
from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
|
|
5
|
+
from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
|
|
6
|
+
from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
|
|
3
7
|
|
|
4
8
|
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
|
|
5
9
|
from mindsdb.interfaces.skills.skill_tool import skill_tool
|
|
6
10
|
from mindsdb.interfaces.storage import db
|
|
7
11
|
from mindsdb.interfaces.storage.db import KnowledgeBase
|
|
8
12
|
from mindsdb.utilities import log
|
|
13
|
+
from langchain_core.documents import Document
|
|
9
14
|
from langchain_core.tools import Tool
|
|
15
|
+
from mindsdb.integrations.libs.response import RESPONSE_TYPE
|
|
10
16
|
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
|
|
11
17
|
|
|
12
18
|
logger = log.getLogger(__name__)
|
|
13
19
|
|
|
14
20
|
|
|
15
|
-
def
|
|
16
|
-
"""
|
|
17
|
-
Builds a retrieval tool i.e RAG
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
tool: Tool configuration dictionary
|
|
21
|
-
pred_args: Predictor arguments dictionary
|
|
22
|
-
skill: Skills database object
|
|
23
|
-
|
|
24
|
-
Returns:
|
|
25
|
-
Tool: Configured retrieval tool
|
|
26
|
-
|
|
27
|
-
Raises:
|
|
28
|
-
ValueError: If knowledge base is not found or configuration is invalid
|
|
29
|
-
"""
|
|
30
|
-
# build RAG config
|
|
21
|
+
def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipelineModel:
|
|
31
22
|
tools_config = tool['config']
|
|
32
23
|
tools_config.update(pred_args)
|
|
33
24
|
|
|
@@ -71,34 +62,132 @@ def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
|
71
62
|
logger.debug("Using default embedding model as no knowledge base provided")
|
|
72
63
|
|
|
73
64
|
# Load and validate config
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
65
|
+
return load_rag_config(tools_config, kb_params, embeddings_model)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _build_rag_pipeline_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
69
|
+
rag_config = _load_rag_config(tool, pred_args, skill)
|
|
70
|
+
# build retriever
|
|
71
|
+
rag_pipeline = RAG(rag_config)
|
|
72
|
+
logger.debug(f"RAG pipeline created with config: {rag_config}")
|
|
73
|
+
|
|
74
|
+
def rag_wrapper(query: str) -> str:
|
|
75
|
+
try:
|
|
76
|
+
result = rag_pipeline(query)
|
|
77
|
+
logger.debug(f"RAG pipeline result: {result}")
|
|
78
|
+
return result['answer']
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.error(f"Error in RAG pipeline: {str(e)}")
|
|
81
|
+
logger.error(traceback.format_exc())
|
|
82
|
+
return f"Error in retrieval: {str(e)}"
|
|
83
|
+
|
|
84
|
+
# Create RAG tool
|
|
85
|
+
tools_config = tool['config']
|
|
86
|
+
tools_config.update(pred_args)
|
|
87
|
+
return Tool(
|
|
88
|
+
func=rag_wrapper,
|
|
89
|
+
name=tool['name'],
|
|
90
|
+
description=tool['description'],
|
|
91
|
+
response_format='content',
|
|
92
|
+
# Return directly by default since we already use an LLM against retrieved context to generate a response.
|
|
93
|
+
return_direct=tools_config.get('return_direct', True)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
98
|
+
if 'source' not in tool:
|
|
99
|
+
raise ValueError("Knowledge base for tool not found")
|
|
100
|
+
kb_name = tool['source']
|
|
101
|
+
executor = skill_tool.get_command_executor()
|
|
102
|
+
kb = _get_knowledge_base(kb_name, skill.project_id, executor)
|
|
103
|
+
if not kb:
|
|
104
|
+
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
105
|
+
kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
|
|
106
|
+
vector_db_handler = kb_table.get_vector_db()
|
|
107
|
+
|
|
108
|
+
rag_config = _load_rag_config(tool, pred_args, skill)
|
|
109
|
+
metadata_config = rag_config.metadata_config
|
|
110
|
+
|
|
111
|
+
def _get_document_by_name(name: str):
|
|
112
|
+
if metadata_config.name_column_index is not None:
|
|
113
|
+
tsquery_str = ' & '.join(name.split(' '))
|
|
114
|
+
documents_response = vector_db_handler.native_query(
|
|
115
|
+
f'SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery(\'{tsquery_str}\') LIMIT 1;'
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
documents_response = vector_db_handler.native_query(
|
|
119
|
+
f'SELECT * FROM {metadata_config.table} WHERE "{metadata_config.name_column}" ILIKE \'%{name}%\' LIMIT 1;'
|
|
120
|
+
)
|
|
121
|
+
if documents_response.resp_type == RESPONSE_TYPE.ERROR:
|
|
122
|
+
raise RuntimeError(f'There was an error looking up documents: {documents_response.error_message}')
|
|
123
|
+
if documents_response.data_frame.empty:
|
|
124
|
+
return None
|
|
125
|
+
document_row = documents_response.data_frame.head(1)
|
|
126
|
+
# Restore document from chunks, keeping in mind max context.
|
|
127
|
+
id_filter_condition = FilterCondition(
|
|
128
|
+
f"{metadata_config.embeddings_metadata_column}->>'{metadata_config.doc_id_key}'",
|
|
129
|
+
FilterOperator.EQUAL,
|
|
130
|
+
str(document_row.get(metadata_config.id_column).item())
|
|
131
|
+
)
|
|
132
|
+
document_chunks_df = vector_db_handler.select(
|
|
133
|
+
metadata_config.embeddings_table,
|
|
134
|
+
conditions=[id_filter_condition]
|
|
97
135
|
)
|
|
136
|
+
if document_chunks_df.empty:
|
|
137
|
+
return None
|
|
138
|
+
sort_col = 'chunk_id' if 'chunk_id' in document_chunks_df.columns else 'id'
|
|
139
|
+
document_chunks_df.sort_values(by=sort_col)
|
|
140
|
+
content = ''
|
|
141
|
+
for _, chunk in document_chunks_df.iterrows():
|
|
142
|
+
if len(content) > metadata_config.max_document_context:
|
|
143
|
+
break
|
|
144
|
+
content += chunk.get(metadata_config.content_column, '')
|
|
145
|
+
|
|
146
|
+
return Document(
|
|
147
|
+
page_content=content,
|
|
148
|
+
metadata=document_row.to_dict(orient='records')[0]
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def _lookup_document_by_name(name: str):
|
|
152
|
+
found_document = _get_document_by_name(name)
|
|
153
|
+
if found_document is None:
|
|
154
|
+
return f'I could not find any document with name {name}. Please make sure the document name matches exactly.'
|
|
155
|
+
return f"I found document {found_document.metadata.get(metadata_config.id_column)} with name {found_document.metadata.get(metadata_config.name_column)}. Here is the full document to use as context:\n\n{found_document.page_content}"
|
|
156
|
+
|
|
157
|
+
return Tool(
|
|
158
|
+
func=_lookup_document_by_name,
|
|
159
|
+
name=tool.get('name', '') + '_name_lookup',
|
|
160
|
+
description='You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.',
|
|
161
|
+
return_direct=False
|
|
162
|
+
)
|
|
98
163
|
|
|
164
|
+
|
|
165
|
+
def build_retrieval_tools(tool: dict, pred_args: dict, skill: db.Skills):
|
|
166
|
+
"""
|
|
167
|
+
Builds a list of tools for retrieval i.e RAG
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
tool: Tool configuration dictionary
|
|
171
|
+
pred_args: Predictor arguments dictionary
|
|
172
|
+
skill: Skills database object
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Tool: Configured list of retrieval tools
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
ValueError: If knowledge base is not found or configuration is invalid
|
|
179
|
+
"""
|
|
180
|
+
# Catch configuration errors before creating tools.
|
|
181
|
+
try:
|
|
182
|
+
rag_config = _load_rag_config(tool, pred_args, skill)
|
|
99
183
|
except Exception as e:
|
|
100
184
|
logger.error(f"Error building RAG pipeline: {str(e)}")
|
|
101
185
|
raise ValueError(f"Failed to build RAG pipeline: {str(e)}")
|
|
186
|
+
tools = [_build_rag_pipeline_tool(tool, pred_args, skill)]
|
|
187
|
+
if rag_config.metadata_config is None:
|
|
188
|
+
return tools
|
|
189
|
+
tools.append(_build_name_lookup_tool(tool, pred_args, skill))
|
|
190
|
+
return tools
|
|
102
191
|
|
|
103
192
|
|
|
104
193
|
def _get_knowledge_base(knowledge_base_name: str, project_id, executor) -> KnowledgeBase:
|
|
@@ -10,6 +10,7 @@ from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant
|
|
|
10
10
|
|
|
11
11
|
from mindsdb.utilities import log
|
|
12
12
|
from mindsdb.utilities.cache import get_cache
|
|
13
|
+
from mindsdb.utilities.config import config
|
|
13
14
|
from mindsdb.interfaces.storage import db
|
|
14
15
|
from mindsdb.interfaces.skills.sql_agent import SQLAgent
|
|
15
16
|
from mindsdb.integrations.libs.vectordatabase_handler import TableField
|
|
@@ -106,7 +107,7 @@ class SkillToolController:
|
|
|
106
107
|
from mindsdb.api.executor.controllers import SessionController # Top-level import produces circular import in some cases TODO: figure out a fix without losing runtime improvements (context: see #9304) # noqa
|
|
107
108
|
|
|
108
109
|
sql_session = SessionController()
|
|
109
|
-
sql_session.database = '
|
|
110
|
+
sql_session.database = config.get('default_project')
|
|
110
111
|
|
|
111
112
|
self.command_executor = ExecuteCommands(sql_session)
|
|
112
113
|
return self.command_executor
|
|
@@ -222,8 +223,8 @@ class SkillToolController:
|
|
|
222
223
|
pred_args = {}
|
|
223
224
|
pred_args['llm'] = llm
|
|
224
225
|
|
|
225
|
-
from .retrieval_tool import
|
|
226
|
-
return
|
|
226
|
+
from .retrieval_tool import build_retrieval_tools
|
|
227
|
+
return build_retrieval_tools(tool, pred_args, skill)
|
|
227
228
|
|
|
228
229
|
def _get_rag_query_function(self, skill: db.Skills):
|
|
229
230
|
session_controller = self.get_command_executor().session
|
|
@@ -295,10 +296,9 @@ class SkillToolController:
|
|
|
295
296
|
for skill in skills
|
|
296
297
|
]
|
|
297
298
|
elif skill_type == SkillType.RETRIEVAL:
|
|
298
|
-
tools[skill_type] = [
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
]
|
|
299
|
+
tools[skill_type] = []
|
|
300
|
+
for skill in skills:
|
|
301
|
+
tools[skill_type] += self._make_retrieval_tools(skill, llm, embedding_model)
|
|
302
302
|
return tools
|
|
303
303
|
|
|
304
304
|
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
|
-
from sqlalchemy import null
|
|
4
|
+
from sqlalchemy import null, func
|
|
5
5
|
from sqlalchemy.orm.attributes import flag_modified
|
|
6
6
|
|
|
7
7
|
from mindsdb.interfaces.storage import db
|
|
8
8
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
9
|
+
from mindsdb.utilities.config import config
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
default_project = config.get('default_project')
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
class SkillsController:
|
|
@@ -16,7 +20,7 @@ class SkillsController:
|
|
|
16
20
|
project_controller = ProjectController()
|
|
17
21
|
self.project_controller = project_controller
|
|
18
22
|
|
|
19
|
-
def get_skill(self, skill_name: str, project_name: str =
|
|
23
|
+
def get_skill(self, skill_name: str, project_name: str = default_project) -> Optional[db.Skills]:
|
|
20
24
|
'''
|
|
21
25
|
Gets a skill by name. Skills are expected to have unique names.
|
|
22
26
|
|
|
@@ -33,7 +37,7 @@ class SkillsController:
|
|
|
33
37
|
|
|
34
38
|
project = self.project_controller.get(name=project_name)
|
|
35
39
|
return db.Skills.query.filter(
|
|
36
|
-
db.Skills.name == skill_name,
|
|
40
|
+
func.lower(db.Skills.name) == func.lower(skill_name),
|
|
37
41
|
db.Skills.project_id == project.id,
|
|
38
42
|
db.Skills.deleted_at == null()
|
|
39
43
|
).first()
|
|
@@ -90,7 +94,7 @@ class SkillsController:
|
|
|
90
94
|
ValueError: If `project_name` does not exist or skill already exists
|
|
91
95
|
'''
|
|
92
96
|
if project_name is None:
|
|
93
|
-
project_name =
|
|
97
|
+
project_name = default_project
|
|
94
98
|
project = self.project_controller.get(name=project_name)
|
|
95
99
|
|
|
96
100
|
skill = self.get_skill(name, project_name)
|
|
@@ -113,7 +117,7 @@ class SkillsController:
|
|
|
113
117
|
self,
|
|
114
118
|
skill_name: str,
|
|
115
119
|
new_name: str = None,
|
|
116
|
-
project_name: str =
|
|
120
|
+
project_name: str = default_project,
|
|
117
121
|
type: str = None,
|
|
118
122
|
params: Dict[str, str] = None):
|
|
119
123
|
'''
|
|
@@ -158,7 +162,7 @@ class SkillsController:
|
|
|
158
162
|
|
|
159
163
|
return existing_skill
|
|
160
164
|
|
|
161
|
-
def delete_skill(self, skill_name: str, project_name: str =
|
|
165
|
+
def delete_skill(self, skill_name: str, project_name: str = default_project):
|
|
162
166
|
'''
|
|
163
167
|
Deletes a skill by name.
|
|
164
168
|
|
|
@@ -287,6 +287,7 @@ class SQLAgent:
|
|
|
287
287
|
return info
|
|
288
288
|
|
|
289
289
|
def _get_sample_rows(self, table: str, fields: List[str]) -> str:
|
|
290
|
+
logger.info(f'_get_sample_rows: table={table} fields={fields}')
|
|
290
291
|
command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
|
|
291
292
|
try:
|
|
292
293
|
ret = self._call_engine(command)
|
|
@@ -300,7 +301,7 @@ class SQLAgent:
|
|
|
300
301
|
map(lambda row: [truncate_value(value) for value in row], sample_rows))
|
|
301
302
|
sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
|
|
302
303
|
except Exception as e:
|
|
303
|
-
logger.
|
|
304
|
+
logger.info(f'_get_sample_rows error: {e}')
|
|
304
305
|
sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
|
|
305
306
|
|
|
306
307
|
return sample_rows_str
|
|
@@ -347,14 +348,18 @@ class SQLAgent:
|
|
|
347
348
|
|
|
348
349
|
def get_table_info_safe(self, table_names: Optional[List[str]] = None) -> str:
|
|
349
350
|
try:
|
|
351
|
+
logger.info(f'get_table_info_safe: {table_names}')
|
|
350
352
|
return self.get_table_info(table_names)
|
|
351
353
|
except Exception as e:
|
|
354
|
+
logger.info(f'get_table_info_safe error: {e}')
|
|
352
355
|
return f"Error: {e}"
|
|
353
356
|
|
|
354
357
|
def query_safe(self, command: str, fetch: str = "all") -> str:
|
|
355
358
|
try:
|
|
359
|
+
logger.info(f'query_safe (fetch={fetch}): {command}')
|
|
356
360
|
return self.query(command, fetch)
|
|
357
361
|
except Exception as e:
|
|
362
|
+
logger.info(f'query_safe error: {e}')
|
|
358
363
|
msg = f"Error: {e}"
|
|
359
364
|
if 'does not exist' in msg and ' relation ' in msg:
|
|
360
365
|
msg += '\nAvailable tables: ' + ', '.join(self.get_usable_table_names())
|
mindsdb/interfaces/storage/db.py
CHANGED
|
@@ -10,6 +10,7 @@ from sqlalchemy import (
|
|
|
10
10
|
DateTime,
|
|
11
11
|
Index,
|
|
12
12
|
Integer,
|
|
13
|
+
LargeBinary,
|
|
13
14
|
Numeric,
|
|
14
15
|
String,
|
|
15
16
|
UniqueConstraint,
|
|
@@ -213,23 +214,12 @@ class Project(Base):
|
|
|
213
214
|
deleted_at = Column(DateTime)
|
|
214
215
|
name = Column(String, nullable=False)
|
|
215
216
|
company_id = Column(Integer, default=0)
|
|
217
|
+
metadata_: dict = Column("metadata", JSON, nullable=True)
|
|
216
218
|
__table_args__ = (
|
|
217
219
|
UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
|
|
218
220
|
)
|
|
219
221
|
|
|
220
222
|
|
|
221
|
-
class Log(Base):
|
|
222
|
-
__tablename__ = "log"
|
|
223
|
-
|
|
224
|
-
id = Column(Integer, primary_key=True)
|
|
225
|
-
created_at = Column(DateTime, default=datetime.datetime.now)
|
|
226
|
-
log_type = Column(String) # log, info, warning, traceback etc
|
|
227
|
-
source = Column(String) # file + line
|
|
228
|
-
company_id = Column(Integer)
|
|
229
|
-
payload = Column(String)
|
|
230
|
-
created_at_index = Index("some_index", "created_at_index")
|
|
231
|
-
|
|
232
|
-
|
|
233
223
|
class Integration(Base):
|
|
234
224
|
__tablename__ = "integration"
|
|
235
225
|
id = Column(Integer, primary_key=True)
|
|
@@ -288,8 +278,20 @@ class JsonStorage(Base):
|
|
|
288
278
|
resource_id = Column(Integer)
|
|
289
279
|
name = Column(String)
|
|
290
280
|
content = Column(JSON)
|
|
281
|
+
encrypted_content = Column(LargeBinary, nullable=True)
|
|
291
282
|
company_id = Column(Integer)
|
|
292
283
|
|
|
284
|
+
def to_dict(self) -> Dict:
|
|
285
|
+
return {
|
|
286
|
+
"id": self.id,
|
|
287
|
+
"resource_group": self.resource_group,
|
|
288
|
+
"resource_id": self.resource_id,
|
|
289
|
+
"name": self.name,
|
|
290
|
+
"content": self.content,
|
|
291
|
+
"encrypted_content": self.encrypted_content,
|
|
292
|
+
"company_id": self.company_id,
|
|
293
|
+
}
|
|
294
|
+
|
|
293
295
|
|
|
294
296
|
class Jobs(Base):
|
|
295
297
|
__tablename__ = "jobs"
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from mindsdb.utilities.functions import decrypt_json, encrypt_json
|
|
2
|
+
from mindsdb.utilities.config import config
|
|
1
3
|
from mindsdb.interfaces.storage import db
|
|
2
4
|
from mindsdb.interfaces.storage.fs import RESOURCE_GROUP
|
|
3
5
|
from mindsdb.utilities.context import context as ctx
|
|
@@ -90,8 +92,65 @@ class JsonStorage:
|
|
|
90
92
|
logger.error('cant delete records from JSON storage')
|
|
91
93
|
|
|
92
94
|
|
|
95
|
+
class EncryptedJsonStorage(JsonStorage):
|
|
96
|
+
def __init__(self, resource_group: str, resource_id: int):
|
|
97
|
+
super().__init__(resource_group, resource_id)
|
|
98
|
+
self.secret_key = config.get('secret_key', 'dummy-key')
|
|
99
|
+
|
|
100
|
+
def __setitem__(self, key: str, value: dict) -> None:
|
|
101
|
+
if isinstance(value, dict) is False:
|
|
102
|
+
raise TypeError(f"got {type(value)} instead of dict")
|
|
103
|
+
|
|
104
|
+
encrypted_value = encrypt_json(value, self.secret_key)
|
|
105
|
+
|
|
106
|
+
existing_record = self.get_record(key)
|
|
107
|
+
if existing_record is None:
|
|
108
|
+
record = db.JsonStorage(
|
|
109
|
+
name=key,
|
|
110
|
+
resource_group=self.resource_group,
|
|
111
|
+
resource_id=self.resource_id,
|
|
112
|
+
company_id=ctx.company_id,
|
|
113
|
+
encrypted_content=encrypted_value
|
|
114
|
+
)
|
|
115
|
+
db.session.add(record)
|
|
116
|
+
else:
|
|
117
|
+
existing_record.encrypted_content = encrypted_value
|
|
118
|
+
db.session.commit()
|
|
119
|
+
|
|
120
|
+
def set_bytes(self, key: str, encrypted_value: bytes):
|
|
121
|
+
existing_record = self.get_record(key)
|
|
122
|
+
if existing_record is None:
|
|
123
|
+
record = db.JsonStorage(
|
|
124
|
+
name=key,
|
|
125
|
+
resource_group=self.resource_group,
|
|
126
|
+
resource_id=self.resource_id,
|
|
127
|
+
company_id=ctx.company_id,
|
|
128
|
+
encrypted_content=encrypted_value
|
|
129
|
+
)
|
|
130
|
+
db.session.add(record)
|
|
131
|
+
else:
|
|
132
|
+
existing_record.encrypted_content = encrypted_value
|
|
133
|
+
db.session.commit()
|
|
134
|
+
|
|
135
|
+
def set_str(self, key: str, encrypted_value: str):
|
|
136
|
+
self.set_bytes(key, encrypted_value.encode())
|
|
137
|
+
|
|
138
|
+
def __getitem__(self, key: str) -> dict:
|
|
139
|
+
record = self.get_record(key)
|
|
140
|
+
if record is None:
|
|
141
|
+
return None
|
|
142
|
+
return decrypt_json(record.encrypted_content, self.secret_key)
|
|
143
|
+
|
|
144
|
+
|
|
93
145
|
def get_json_storage(resource_id: int, resource_group: str = RESOURCE_GROUP.PREDICTOR):
|
|
94
146
|
return JsonStorage(
|
|
95
147
|
resource_group=resource_group,
|
|
96
148
|
resource_id=resource_id,
|
|
97
149
|
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def get_encrypted_json_storage(resource_id: int, resource_group: str = RESOURCE_GROUP.PREDICTOR):
|
|
153
|
+
return EncryptedJsonStorage(
|
|
154
|
+
resource_group=resource_group,
|
|
155
|
+
resource_id=resource_id,
|
|
156
|
+
)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
|
+
import json
|
|
3
4
|
import io
|
|
4
5
|
import zipfile
|
|
5
6
|
from typing import Union
|
|
@@ -7,7 +8,10 @@ from typing import Union
|
|
|
7
8
|
import mindsdb.interfaces.storage.db as db
|
|
8
9
|
|
|
9
10
|
from .fs import RESOURCE_GROUP, FileStorageFactory, SERVICE_FILES_NAMES
|
|
10
|
-
from .json import get_json_storage
|
|
11
|
+
from .json import get_json_storage, get_encrypted_json_storage
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
JSON_STORAGE_FILE = 'json_storage.json'
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
class ModelStorage:
|
|
@@ -119,6 +123,13 @@ class ModelStorage:
|
|
|
119
123
|
)
|
|
120
124
|
return json_storage.set(name, data)
|
|
121
125
|
|
|
126
|
+
def encrypted_json_set(self, name: str, data: dict) -> None:
|
|
127
|
+
json_storage = get_encrypted_json_storage(
|
|
128
|
+
resource_id=self.predictor_id,
|
|
129
|
+
resource_group=RESOURCE_GROUP.PREDICTOR
|
|
130
|
+
)
|
|
131
|
+
return json_storage.set(name, data)
|
|
132
|
+
|
|
122
133
|
def json_get(self, name):
|
|
123
134
|
json_storage = get_json_storage(
|
|
124
135
|
resource_id=self.predictor_id,
|
|
@@ -126,6 +137,13 @@ class ModelStorage:
|
|
|
126
137
|
)
|
|
127
138
|
return json_storage.get(name)
|
|
128
139
|
|
|
140
|
+
def encrypted_json_get(self, name: str) -> dict:
|
|
141
|
+
json_storage = get_encrypted_json_storage(
|
|
142
|
+
resource_id=self.predictor_id,
|
|
143
|
+
resource_group=RESOURCE_GROUP.PREDICTOR
|
|
144
|
+
)
|
|
145
|
+
return json_storage.get(name)
|
|
146
|
+
|
|
129
147
|
def json_list(self):
|
|
130
148
|
...
|
|
131
149
|
|
|
@@ -237,6 +255,13 @@ class HandlerStorage:
|
|
|
237
255
|
)
|
|
238
256
|
return json_storage.set(name, content)
|
|
239
257
|
|
|
258
|
+
def encrypted_json_set(self, name: str, content: dict) -> None:
|
|
259
|
+
json_storage = get_encrypted_json_storage(
|
|
260
|
+
resource_id=self.integration_id,
|
|
261
|
+
resource_group=RESOURCE_GROUP.INTEGRATION
|
|
262
|
+
)
|
|
263
|
+
return json_storage.set(name, content)
|
|
264
|
+
|
|
240
265
|
def json_get(self, name):
|
|
241
266
|
json_storage = get_json_storage(
|
|
242
267
|
resource_id=self.integration_id,
|
|
@@ -244,6 +269,13 @@ class HandlerStorage:
|
|
|
244
269
|
)
|
|
245
270
|
return json_storage.get(name)
|
|
246
271
|
|
|
272
|
+
def encrypted_json_get(self, name: str) -> dict:
|
|
273
|
+
json_storage = get_encrypted_json_storage(
|
|
274
|
+
resource_id=self.integration_id,
|
|
275
|
+
resource_group=RESOURCE_GROUP.INTEGRATION
|
|
276
|
+
)
|
|
277
|
+
return json_storage.get(name)
|
|
278
|
+
|
|
247
279
|
def json_list(self):
|
|
248
280
|
...
|
|
249
281
|
|
|
@@ -251,8 +283,11 @@ class HandlerStorage:
|
|
|
251
283
|
...
|
|
252
284
|
|
|
253
285
|
def export_files(self) -> bytes:
|
|
254
|
-
|
|
286
|
+
json_storage = self.export_json_storage()
|
|
287
|
+
|
|
288
|
+
if self.is_empty() and not json_storage:
|
|
255
289
|
return None
|
|
290
|
+
|
|
256
291
|
folder_path = self.folder_get('')
|
|
257
292
|
|
|
258
293
|
zip_fd = io.BytesIO()
|
|
@@ -265,6 +300,11 @@ class HandlerStorage:
|
|
|
265
300
|
abs_path = os.path.join(root, file_name)
|
|
266
301
|
zipf.write(abs_path, os.path.relpath(abs_path, folder_path))
|
|
267
302
|
|
|
303
|
+
# If JSON storage is not empty, add it to the zip file.
|
|
304
|
+
if json_storage:
|
|
305
|
+
json_str = json.dumps(json_storage)
|
|
306
|
+
zipf.writestr(JSON_STORAGE_FILE, json_str)
|
|
307
|
+
|
|
268
308
|
zip_fd.seek(0)
|
|
269
309
|
return zip_fd.read()
|
|
270
310
|
|
|
@@ -277,6 +317,48 @@ class HandlerStorage:
|
|
|
277
317
|
zip_fd.seek(0)
|
|
278
318
|
|
|
279
319
|
with zipfile.ZipFile(zip_fd, 'r') as zip_ref:
|
|
280
|
-
zip_ref.
|
|
320
|
+
for name in zip_ref.namelist():
|
|
321
|
+
# If JSON storage file is in the zip file, import the content to the JSON storage.
|
|
322
|
+
# Thereafter, remove the file from the folder.
|
|
323
|
+
if name == JSON_STORAGE_FILE:
|
|
324
|
+
json_storage = zip_ref.read(JSON_STORAGE_FILE)
|
|
325
|
+
self.import_json_storage(json_storage)
|
|
326
|
+
|
|
327
|
+
else:
|
|
328
|
+
zip_ref.extract(name, folder_path)
|
|
281
329
|
|
|
282
330
|
self.folder_sync('')
|
|
331
|
+
|
|
332
|
+
def export_json_storage(self) -> list[dict]:
|
|
333
|
+
json_storage = get_json_storage(
|
|
334
|
+
resource_id=self.integration_id,
|
|
335
|
+
resource_group=RESOURCE_GROUP.INTEGRATION
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
records = []
|
|
339
|
+
for record in json_storage.get_all_records():
|
|
340
|
+
record_dict = record.to_dict()
|
|
341
|
+
if record_dict.get('encrypted_content'):
|
|
342
|
+
record_dict['encrypted_content'] = record_dict['encrypted_content'].decode()
|
|
343
|
+
records.append(record_dict)
|
|
344
|
+
|
|
345
|
+
return records
|
|
346
|
+
|
|
347
|
+
def import_json_storage(self, records: bytes) -> None:
|
|
348
|
+
json_storage = get_json_storage(
|
|
349
|
+
resource_id=self.integration_id,
|
|
350
|
+
resource_group=RESOURCE_GROUP.INTEGRATION
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
encrypted_json_storage = get_encrypted_json_storage(
|
|
354
|
+
resource_id=self.integration_id,
|
|
355
|
+
resource_group=RESOURCE_GROUP.INTEGRATION
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
records = json.loads(records.decode())
|
|
359
|
+
|
|
360
|
+
for record in records:
|
|
361
|
+
if record['encrypted_content']:
|
|
362
|
+
encrypted_json_storage.set_str(record['name'], record['encrypted_content'])
|
|
363
|
+
else:
|
|
364
|
+
json_storage.set(record['name'], record['content'])
|
|
@@ -5,6 +5,7 @@ from mindsdb_sql_parser import parse_sql, ParsingException
|
|
|
5
5
|
from mindsdb.interfaces.storage import db
|
|
6
6
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
7
7
|
from mindsdb.utilities.context import context as ctx
|
|
8
|
+
from mindsdb.utilities.config import config
|
|
8
9
|
|
|
9
10
|
from mindsdb.api.executor.controllers.session_controller import SessionController
|
|
10
11
|
|
|
@@ -16,7 +17,7 @@ class TriggersController:
|
|
|
16
17
|
name = name.lower()
|
|
17
18
|
|
|
18
19
|
if project_name is None:
|
|
19
|
-
project_name = '
|
|
20
|
+
project_name = config.get('default_project')
|
|
20
21
|
project_controller = ProjectController()
|
|
21
22
|
project = project_controller.get(name=project_name)
|
|
22
23
|
|
|
@@ -32,12 +32,26 @@ def upgrade():
|
|
|
32
32
|
sa.UniqueConstraint('name', 'company_id', name='unique_project_name_company_id')
|
|
33
33
|
)
|
|
34
34
|
|
|
35
|
+
project_table = sa.Table(
|
|
36
|
+
'project',
|
|
37
|
+
sa.MetaData(),
|
|
38
|
+
sa.Column('id', sa.Integer()),
|
|
39
|
+
sa.Column('name', sa.String()),
|
|
40
|
+
sa.Column('company_id', sa.Integer()),
|
|
41
|
+
)
|
|
42
|
+
|
|
35
43
|
conn = op.get_bind()
|
|
36
44
|
session = sa.orm.Session(bind=conn)
|
|
37
45
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
46
|
+
conn.execute(
|
|
47
|
+
project_table.insert().values(
|
|
48
|
+
name='mindsdb'
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
project_record = conn.execute(
|
|
53
|
+
project_table.select().where(project_table.c.name == 'mindsdb')
|
|
54
|
+
).fetchone()
|
|
41
55
|
|
|
42
56
|
with op.batch_alter_table('predictor', schema=None) as batch_op:
|
|
43
57
|
batch_op.add_column(sa.Column('project_id', sa.Integer()))
|