MindsDB 25.2.3.0__py3-none-any.whl → 25.3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (86) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +16 -11
  3. mindsdb/api/executor/command_executor.py +1 -1
  4. mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -2
  5. mindsdb/api/executor/planner/query_planner.py +6 -2
  6. mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -1
  7. mindsdb/api/http/initialize.py +8 -5
  8. mindsdb/api/http/namespaces/agents.py +0 -7
  9. mindsdb/api/http/namespaces/config.py +0 -48
  10. mindsdb/api/http/namespaces/knowledge_bases.py +1 -1
  11. mindsdb/api/http/namespaces/util.py +0 -28
  12. mindsdb/api/mongo/classes/query_sql.py +2 -1
  13. mindsdb/api/mongo/responders/aggregate.py +2 -2
  14. mindsdb/api/mongo/responders/coll_stats.py +3 -2
  15. mindsdb/api/mongo/responders/db_stats.py +2 -1
  16. mindsdb/api/mongo/responders/insert.py +4 -2
  17. mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py +2 -1
  18. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +5 -4
  19. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +2 -4
  20. mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -1
  21. mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
  22. mindsdb/integrations/handlers/dspy_handler/requirements.txt +0 -1
  23. mindsdb/integrations/handlers/gmail_handler/connection_args.py +2 -2
  24. mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +19 -66
  25. mindsdb/integrations/handlers/gmail_handler/requirements.txt +0 -1
  26. mindsdb/integrations/handlers/google_calendar_handler/connection_args.py +15 -0
  27. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +31 -41
  28. mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +0 -2
  29. mindsdb/integrations/handlers/langchain_embedding_handler/requirements.txt +0 -1
  30. mindsdb/integrations/handlers/langchain_handler/requirements.txt +0 -1
  31. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +0 -1
  32. mindsdb/integrations/handlers/openai_handler/constants.py +3 -1
  33. mindsdb/integrations/handlers/openai_handler/requirements.txt +0 -1
  34. mindsdb/integrations/handlers/rag_handler/requirements.txt +0 -1
  35. mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +33 -8
  36. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +3 -2
  37. mindsdb/integrations/handlers/web_handler/web_handler.py +42 -33
  38. mindsdb/integrations/handlers/youtube_handler/__init__.py +2 -0
  39. mindsdb/integrations/handlers/youtube_handler/connection_args.py +32 -0
  40. mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +2 -38
  41. mindsdb/integrations/libs/llm/utils.py +7 -1
  42. mindsdb/integrations/libs/process_cache.py +2 -2
  43. mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py +29 -38
  44. mindsdb/integrations/utilities/pydantic_utils.py +208 -0
  45. mindsdb/integrations/utilities/rag/chains/local_context_summarizer_chain.py +227 -0
  46. mindsdb/integrations/utilities/rag/pipelines/rag.py +11 -4
  47. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +800 -135
  48. mindsdb/integrations/utilities/rag/settings.py +390 -152
  49. mindsdb/integrations/utilities/sql_utils.py +2 -1
  50. mindsdb/interfaces/agents/agents_controller.py +14 -10
  51. mindsdb/interfaces/agents/callback_handlers.py +52 -5
  52. mindsdb/interfaces/agents/langchain_agent.py +5 -3
  53. mindsdb/interfaces/agents/mindsdb_chat_model.py +4 -2
  54. mindsdb/interfaces/chatbot/chatbot_controller.py +9 -8
  55. mindsdb/interfaces/database/database.py +3 -2
  56. mindsdb/interfaces/database/integrations.py +1 -1
  57. mindsdb/interfaces/database/projects.py +28 -2
  58. mindsdb/interfaces/jobs/jobs_controller.py +4 -1
  59. mindsdb/interfaces/jobs/scheduler.py +1 -1
  60. mindsdb/interfaces/knowledge_base/preprocessing/constants.py +2 -2
  61. mindsdb/interfaces/model/model_controller.py +5 -2
  62. mindsdb/interfaces/skills/retrieval_tool.py +128 -39
  63. mindsdb/interfaces/skills/skill_tool.py +7 -7
  64. mindsdb/interfaces/skills/skills_controller.py +10 -6
  65. mindsdb/interfaces/skills/sql_agent.py +6 -1
  66. mindsdb/interfaces/storage/db.py +14 -12
  67. mindsdb/interfaces/storage/json.py +59 -0
  68. mindsdb/interfaces/storage/model_fs.py +85 -3
  69. mindsdb/interfaces/triggers/triggers_controller.py +2 -1
  70. mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py +17 -3
  71. mindsdb/migrations/versions/2025-02-10_6ab9903fc59a_del_log_table.py +33 -0
  72. mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py +29 -0
  73. mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py +41 -0
  74. mindsdb/utilities/config.py +6 -1
  75. mindsdb/utilities/functions.py +11 -0
  76. mindsdb/utilities/log.py +17 -2
  77. mindsdb/utilities/ml_task_queue/consumer.py +4 -2
  78. mindsdb/utilities/render/sqlalchemy_render.py +4 -0
  79. {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/METADATA +226 -247
  80. {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/RECORD +83 -80
  81. {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/WHEEL +1 -1
  82. mindsdb/integrations/handlers/gmail_handler/utils.py +0 -45
  83. mindsdb/utilities/log_controller.py +0 -39
  84. mindsdb/utilities/telemetry.py +0 -44
  85. {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/LICENSE +0 -0
  86. {MindsDB-25.2.3.0.dist-info → mindsdb-25.3.1.0.dist-info}/top_level.txt +0 -0
@@ -1,33 +1,24 @@
1
+ import traceback
2
+
1
3
  from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
2
4
  from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
5
+ from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
6
+ from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
3
7
 
4
8
  from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
5
9
  from mindsdb.interfaces.skills.skill_tool import skill_tool
6
10
  from mindsdb.interfaces.storage import db
7
11
  from mindsdb.interfaces.storage.db import KnowledgeBase
8
12
  from mindsdb.utilities import log
13
+ from langchain_core.documents import Document
9
14
  from langchain_core.tools import Tool
15
+ from mindsdb.integrations.libs.response import RESPONSE_TYPE
10
16
  from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
11
17
 
12
18
  logger = log.getLogger(__name__)
13
19
 
14
20
 
15
- def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
16
- """
17
- Builds a retrieval tool i.e RAG
18
-
19
- Args:
20
- tool: Tool configuration dictionary
21
- pred_args: Predictor arguments dictionary
22
- skill: Skills database object
23
-
24
- Returns:
25
- Tool: Configured retrieval tool
26
-
27
- Raises:
28
- ValueError: If knowledge base is not found or configuration is invalid
29
- """
30
- # build RAG config
21
+ def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipelineModel:
31
22
  tools_config = tool['config']
32
23
  tools_config.update(pred_args)
33
24
 
@@ -71,34 +62,132 @@ def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
71
62
  logger.debug("Using default embedding model as no knowledge base provided")
72
63
 
73
64
  # Load and validate config
74
- try:
75
- rag_config = load_rag_config(tools_config, kb_params, embeddings_model)
76
- # build retriever
77
- rag_pipeline = RAG(rag_config)
78
- logger.debug(f"RAG pipeline created with config: {rag_config}")
79
-
80
- def rag_wrapper(query: str) -> str:
81
- try:
82
- result = rag_pipeline(query)
83
- logger.debug(f"RAG pipeline result: {result}")
84
- return result['answer']
85
- except Exception as e:
86
- logger.error(f"Error in RAG pipeline: {str(e)}")
87
- return f"Error in retrieval: {str(e)}"
88
-
89
- # Create RAG tool
90
- return Tool(
91
- func=rag_wrapper,
92
- name=tool['name'],
93
- description=tool['description'],
94
- response_format='content',
95
- # Return directly by default since we already use an LLM against retrieved context to generate a response.
96
- return_direct=tools_config.get('return_direct', True)
65
+ return load_rag_config(tools_config, kb_params, embeddings_model)
66
+
67
+
68
+ def _build_rag_pipeline_tool(tool: dict, pred_args: dict, skill: db.Skills):
69
+ rag_config = _load_rag_config(tool, pred_args, skill)
70
+ # build retriever
71
+ rag_pipeline = RAG(rag_config)
72
+ logger.debug(f"RAG pipeline created with config: {rag_config}")
73
+
74
+ def rag_wrapper(query: str) -> str:
75
+ try:
76
+ result = rag_pipeline(query)
77
+ logger.debug(f"RAG pipeline result: {result}")
78
+ return result['answer']
79
+ except Exception as e:
80
+ logger.error(f"Error in RAG pipeline: {str(e)}")
81
+ logger.error(traceback.format_exc())
82
+ return f"Error in retrieval: {str(e)}"
83
+
84
+ # Create RAG tool
85
+ tools_config = tool['config']
86
+ tools_config.update(pred_args)
87
+ return Tool(
88
+ func=rag_wrapper,
89
+ name=tool['name'],
90
+ description=tool['description'],
91
+ response_format='content',
92
+ # Return directly by default since we already use an LLM against retrieved context to generate a response.
93
+ return_direct=tools_config.get('return_direct', True)
94
+ )
95
+
96
+
97
+ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
98
+ if 'source' not in tool:
99
+ raise ValueError("Knowledge base for tool not found")
100
+ kb_name = tool['source']
101
+ executor = skill_tool.get_command_executor()
102
+ kb = _get_knowledge_base(kb_name, skill.project_id, executor)
103
+ if not kb:
104
+ raise ValueError(f"Knowledge base not found: {kb_name}")
105
+ kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
106
+ vector_db_handler = kb_table.get_vector_db()
107
+
108
+ rag_config = _load_rag_config(tool, pred_args, skill)
109
+ metadata_config = rag_config.metadata_config
110
+
111
+ def _get_document_by_name(name: str):
112
+ if metadata_config.name_column_index is not None:
113
+ tsquery_str = ' & '.join(name.split(' '))
114
+ documents_response = vector_db_handler.native_query(
115
+ f'SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery(\'{tsquery_str}\') LIMIT 1;'
116
+ )
117
+ else:
118
+ documents_response = vector_db_handler.native_query(
119
+ f'SELECT * FROM {metadata_config.table} WHERE "{metadata_config.name_column}" ILIKE \'%{name}%\' LIMIT 1;'
120
+ )
121
+ if documents_response.resp_type == RESPONSE_TYPE.ERROR:
122
+ raise RuntimeError(f'There was an error looking up documents: {documents_response.error_message}')
123
+ if documents_response.data_frame.empty:
124
+ return None
125
+ document_row = documents_response.data_frame.head(1)
126
+ # Restore document from chunks, keeping in mind max context.
127
+ id_filter_condition = FilterCondition(
128
+ f"{metadata_config.embeddings_metadata_column}->>'{metadata_config.doc_id_key}'",
129
+ FilterOperator.EQUAL,
130
+ str(document_row.get(metadata_config.id_column).item())
131
+ )
132
+ document_chunks_df = vector_db_handler.select(
133
+ metadata_config.embeddings_table,
134
+ conditions=[id_filter_condition]
97
135
  )
136
+ if document_chunks_df.empty:
137
+ return None
138
+ sort_col = 'chunk_id' if 'chunk_id' in document_chunks_df.columns else 'id'
139
+ document_chunks_df.sort_values(by=sort_col)
140
+ content = ''
141
+ for _, chunk in document_chunks_df.iterrows():
142
+ if len(content) > metadata_config.max_document_context:
143
+ break
144
+ content += chunk.get(metadata_config.content_column, '')
145
+
146
+ return Document(
147
+ page_content=content,
148
+ metadata=document_row.to_dict(orient='records')[0]
149
+ )
150
+
151
+ def _lookup_document_by_name(name: str):
152
+ found_document = _get_document_by_name(name)
153
+ if found_document is None:
154
+ return f'I could not find any document with name {name}. Please make sure the document name matches exactly.'
155
+ return f"I found document {found_document.metadata.get(metadata_config.id_column)} with name {found_document.metadata.get(metadata_config.name_column)}. Here is the full document to use as context:\n\n{found_document.page_content}"
156
+
157
+ return Tool(
158
+ func=_lookup_document_by_name,
159
+ name=tool.get('name', '') + '_name_lookup',
160
+ description='You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.',
161
+ return_direct=False
162
+ )
98
163
 
164
+
165
+ def build_retrieval_tools(tool: dict, pred_args: dict, skill: db.Skills):
166
+ """
167
+ Builds a list of tools for retrieval i.e RAG
168
+
169
+ Args:
170
+ tool: Tool configuration dictionary
171
+ pred_args: Predictor arguments dictionary
172
+ skill: Skills database object
173
+
174
+ Returns:
175
+ Tool: Configured list of retrieval tools
176
+
177
+ Raises:
178
+ ValueError: If knowledge base is not found or configuration is invalid
179
+ """
180
+ # Catch configuration errors before creating tools.
181
+ try:
182
+ rag_config = _load_rag_config(tool, pred_args, skill)
99
183
  except Exception as e:
100
184
  logger.error(f"Error building RAG pipeline: {str(e)}")
101
185
  raise ValueError(f"Failed to build RAG pipeline: {str(e)}")
186
+ tools = [_build_rag_pipeline_tool(tool, pred_args, skill)]
187
+ if rag_config.metadata_config is None:
188
+ return tools
189
+ tools.append(_build_name_lookup_tool(tool, pred_args, skill))
190
+ return tools
102
191
 
103
192
 
104
193
  def _get_knowledge_base(knowledge_base_name: str, project_id, executor) -> KnowledgeBase:
@@ -10,6 +10,7 @@ from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant
10
10
 
11
11
  from mindsdb.utilities import log
12
12
  from mindsdb.utilities.cache import get_cache
13
+ from mindsdb.utilities.config import config
13
14
  from mindsdb.interfaces.storage import db
14
15
  from mindsdb.interfaces.skills.sql_agent import SQLAgent
15
16
  from mindsdb.integrations.libs.vectordatabase_handler import TableField
@@ -106,7 +107,7 @@ class SkillToolController:
106
107
  from mindsdb.api.executor.controllers import SessionController # Top-level import produces circular import in some cases TODO: figure out a fix without losing runtime improvements (context: see #9304) # noqa
107
108
 
108
109
  sql_session = SessionController()
109
- sql_session.database = 'mindsdb'
110
+ sql_session.database = config.get('default_project')
110
111
 
111
112
  self.command_executor = ExecuteCommands(sql_session)
112
113
  return self.command_executor
@@ -222,8 +223,8 @@ class SkillToolController:
222
223
  pred_args = {}
223
224
  pred_args['llm'] = llm
224
225
 
225
- from .retrieval_tool import build_retrieval_tool
226
- return build_retrieval_tool(tool, pred_args, skill)
226
+ from .retrieval_tool import build_retrieval_tools
227
+ return build_retrieval_tools(tool, pred_args, skill)
227
228
 
228
229
  def _get_rag_query_function(self, skill: db.Skills):
229
230
  session_controller = self.get_command_executor().session
@@ -295,10 +296,9 @@ class SkillToolController:
295
296
  for skill in skills
296
297
  ]
297
298
  elif skill_type == SkillType.RETRIEVAL:
298
- tools[skill_type] = [
299
- self._make_retrieval_tools(skill, llm, embedding_model)
300
- for skill in skills
301
- ]
299
+ tools[skill_type] = []
300
+ for skill in skills:
301
+ tools[skill_type] += self._make_retrieval_tools(skill, llm, embedding_model)
302
302
  return tools
303
303
 
304
304
 
@@ -1,11 +1,15 @@
1
1
  import datetime
2
2
  from typing import Dict, List, Optional
3
3
 
4
- from sqlalchemy import null
4
+ from sqlalchemy import null, func
5
5
  from sqlalchemy.orm.attributes import flag_modified
6
6
 
7
7
  from mindsdb.interfaces.storage import db
8
8
  from mindsdb.interfaces.database.projects import ProjectController
9
+ from mindsdb.utilities.config import config
10
+
11
+
12
+ default_project = config.get('default_project')
9
13
 
10
14
 
11
15
  class SkillsController:
@@ -16,7 +20,7 @@ class SkillsController:
16
20
  project_controller = ProjectController()
17
21
  self.project_controller = project_controller
18
22
 
19
- def get_skill(self, skill_name: str, project_name: str = 'mindsdb') -> Optional[db.Skills]:
23
+ def get_skill(self, skill_name: str, project_name: str = default_project) -> Optional[db.Skills]:
20
24
  '''
21
25
  Gets a skill by name. Skills are expected to have unique names.
22
26
 
@@ -33,7 +37,7 @@ class SkillsController:
33
37
 
34
38
  project = self.project_controller.get(name=project_name)
35
39
  return db.Skills.query.filter(
36
- db.Skills.name == skill_name,
40
+ func.lower(db.Skills.name) == func.lower(skill_name),
37
41
  db.Skills.project_id == project.id,
38
42
  db.Skills.deleted_at == null()
39
43
  ).first()
@@ -90,7 +94,7 @@ class SkillsController:
90
94
  ValueError: If `project_name` does not exist or skill already exists
91
95
  '''
92
96
  if project_name is None:
93
- project_name = 'mindsdb'
97
+ project_name = default_project
94
98
  project = self.project_controller.get(name=project_name)
95
99
 
96
100
  skill = self.get_skill(name, project_name)
@@ -113,7 +117,7 @@ class SkillsController:
113
117
  self,
114
118
  skill_name: str,
115
119
  new_name: str = None,
116
- project_name: str = 'mindsdb',
120
+ project_name: str = default_project,
117
121
  type: str = None,
118
122
  params: Dict[str, str] = None):
119
123
  '''
@@ -158,7 +162,7 @@ class SkillsController:
158
162
 
159
163
  return existing_skill
160
164
 
161
- def delete_skill(self, skill_name: str, project_name: str = 'mindsdb'):
165
+ def delete_skill(self, skill_name: str, project_name: str = default_project):
162
166
  '''
163
167
  Deletes a skill by name.
164
168
 
@@ -287,6 +287,7 @@ class SQLAgent:
287
287
  return info
288
288
 
289
289
  def _get_sample_rows(self, table: str, fields: List[str]) -> str:
290
+ logger.info(f'_get_sample_rows: table={table} fields={fields}')
290
291
  command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
291
292
  try:
292
293
  ret = self._call_engine(command)
@@ -300,7 +301,7 @@ class SQLAgent:
300
301
  map(lambda row: [truncate_value(value) for value in row], sample_rows))
301
302
  sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
302
303
  except Exception as e:
303
- logger.warning(e)
304
+ logger.info(f'_get_sample_rows error: {e}')
304
305
  sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
305
306
 
306
307
  return sample_rows_str
@@ -347,14 +348,18 @@ class SQLAgent:
347
348
 
348
349
  def get_table_info_safe(self, table_names: Optional[List[str]] = None) -> str:
349
350
  try:
351
+ logger.info(f'get_table_info_safe: {table_names}')
350
352
  return self.get_table_info(table_names)
351
353
  except Exception as e:
354
+ logger.info(f'get_table_info_safe error: {e}')
352
355
  return f"Error: {e}"
353
356
 
354
357
  def query_safe(self, command: str, fetch: str = "all") -> str:
355
358
  try:
359
+ logger.info(f'query_safe (fetch={fetch}): {command}')
356
360
  return self.query(command, fetch)
357
361
  except Exception as e:
362
+ logger.info(f'query_safe error: {e}')
358
363
  msg = f"Error: {e}"
359
364
  if 'does not exist' in msg and ' relation ' in msg:
360
365
  msg += '\nAvailable tables: ' + ', '.join(self.get_usable_table_names())
@@ -10,6 +10,7 @@ from sqlalchemy import (
10
10
  DateTime,
11
11
  Index,
12
12
  Integer,
13
+ LargeBinary,
13
14
  Numeric,
14
15
  String,
15
16
  UniqueConstraint,
@@ -213,23 +214,12 @@ class Project(Base):
213
214
  deleted_at = Column(DateTime)
214
215
  name = Column(String, nullable=False)
215
216
  company_id = Column(Integer, default=0)
217
+ metadata_: dict = Column("metadata", JSON, nullable=True)
216
218
  __table_args__ = (
217
219
  UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
218
220
  )
219
221
 
220
222
 
221
- class Log(Base):
222
- __tablename__ = "log"
223
-
224
- id = Column(Integer, primary_key=True)
225
- created_at = Column(DateTime, default=datetime.datetime.now)
226
- log_type = Column(String) # log, info, warning, traceback etc
227
- source = Column(String) # file + line
228
- company_id = Column(Integer)
229
- payload = Column(String)
230
- created_at_index = Index("some_index", "created_at_index")
231
-
232
-
233
223
  class Integration(Base):
234
224
  __tablename__ = "integration"
235
225
  id = Column(Integer, primary_key=True)
@@ -288,8 +278,20 @@ class JsonStorage(Base):
288
278
  resource_id = Column(Integer)
289
279
  name = Column(String)
290
280
  content = Column(JSON)
281
+ encrypted_content = Column(LargeBinary, nullable=True)
291
282
  company_id = Column(Integer)
292
283
 
284
+ def to_dict(self) -> Dict:
285
+ return {
286
+ "id": self.id,
287
+ "resource_group": self.resource_group,
288
+ "resource_id": self.resource_id,
289
+ "name": self.name,
290
+ "content": self.content,
291
+ "encrypted_content": self.encrypted_content,
292
+ "company_id": self.company_id,
293
+ }
294
+
293
295
 
294
296
  class Jobs(Base):
295
297
  __tablename__ = "jobs"
@@ -1,3 +1,5 @@
1
+ from mindsdb.utilities.functions import decrypt_json, encrypt_json
2
+ from mindsdb.utilities.config import config
1
3
  from mindsdb.interfaces.storage import db
2
4
  from mindsdb.interfaces.storage.fs import RESOURCE_GROUP
3
5
  from mindsdb.utilities.context import context as ctx
@@ -90,8 +92,65 @@ class JsonStorage:
90
92
  logger.error('cant delete records from JSON storage')
91
93
 
92
94
 
95
+ class EncryptedJsonStorage(JsonStorage):
96
+ def __init__(self, resource_group: str, resource_id: int):
97
+ super().__init__(resource_group, resource_id)
98
+ self.secret_key = config.get('secret_key', 'dummy-key')
99
+
100
+ def __setitem__(self, key: str, value: dict) -> None:
101
+ if isinstance(value, dict) is False:
102
+ raise TypeError(f"got {type(value)} instead of dict")
103
+
104
+ encrypted_value = encrypt_json(value, self.secret_key)
105
+
106
+ existing_record = self.get_record(key)
107
+ if existing_record is None:
108
+ record = db.JsonStorage(
109
+ name=key,
110
+ resource_group=self.resource_group,
111
+ resource_id=self.resource_id,
112
+ company_id=ctx.company_id,
113
+ encrypted_content=encrypted_value
114
+ )
115
+ db.session.add(record)
116
+ else:
117
+ existing_record.encrypted_content = encrypted_value
118
+ db.session.commit()
119
+
120
+ def set_bytes(self, key: str, encrypted_value: bytes):
121
+ existing_record = self.get_record(key)
122
+ if existing_record is None:
123
+ record = db.JsonStorage(
124
+ name=key,
125
+ resource_group=self.resource_group,
126
+ resource_id=self.resource_id,
127
+ company_id=ctx.company_id,
128
+ encrypted_content=encrypted_value
129
+ )
130
+ db.session.add(record)
131
+ else:
132
+ existing_record.encrypted_content = encrypted_value
133
+ db.session.commit()
134
+
135
+ def set_str(self, key: str, encrypted_value: str):
136
+ self.set_bytes(key, encrypted_value.encode())
137
+
138
+ def __getitem__(self, key: str) -> dict:
139
+ record = self.get_record(key)
140
+ if record is None:
141
+ return None
142
+ return decrypt_json(record.encrypted_content, self.secret_key)
143
+
144
+
93
145
  def get_json_storage(resource_id: int, resource_group: str = RESOURCE_GROUP.PREDICTOR):
94
146
  return JsonStorage(
95
147
  resource_group=resource_group,
96
148
  resource_id=resource_id,
97
149
  )
150
+
151
+
152
+ def get_encrypted_json_storage(resource_id: int, resource_group: str = RESOURCE_GROUP.PREDICTOR):
153
+ return EncryptedJsonStorage(
154
+ resource_group=resource_group,
155
+ resource_id=resource_id,
156
+ )
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import re
3
+ import json
3
4
  import io
4
5
  import zipfile
5
6
  from typing import Union
@@ -7,7 +8,10 @@ from typing import Union
7
8
  import mindsdb.interfaces.storage.db as db
8
9
 
9
10
  from .fs import RESOURCE_GROUP, FileStorageFactory, SERVICE_FILES_NAMES
10
- from .json import get_json_storage
11
+ from .json import get_json_storage, get_encrypted_json_storage
12
+
13
+
14
+ JSON_STORAGE_FILE = 'json_storage.json'
11
15
 
12
16
 
13
17
  class ModelStorage:
@@ -119,6 +123,13 @@ class ModelStorage:
119
123
  )
120
124
  return json_storage.set(name, data)
121
125
 
126
+ def encrypted_json_set(self, name: str, data: dict) -> None:
127
+ json_storage = get_encrypted_json_storage(
128
+ resource_id=self.predictor_id,
129
+ resource_group=RESOURCE_GROUP.PREDICTOR
130
+ )
131
+ return json_storage.set(name, data)
132
+
122
133
  def json_get(self, name):
123
134
  json_storage = get_json_storage(
124
135
  resource_id=self.predictor_id,
@@ -126,6 +137,13 @@ class ModelStorage:
126
137
  )
127
138
  return json_storage.get(name)
128
139
 
140
+ def encrypted_json_get(self, name: str) -> dict:
141
+ json_storage = get_encrypted_json_storage(
142
+ resource_id=self.predictor_id,
143
+ resource_group=RESOURCE_GROUP.PREDICTOR
144
+ )
145
+ return json_storage.get(name)
146
+
129
147
  def json_list(self):
130
148
  ...
131
149
 
@@ -237,6 +255,13 @@ class HandlerStorage:
237
255
  )
238
256
  return json_storage.set(name, content)
239
257
 
258
+ def encrypted_json_set(self, name: str, content: dict) -> None:
259
+ json_storage = get_encrypted_json_storage(
260
+ resource_id=self.integration_id,
261
+ resource_group=RESOURCE_GROUP.INTEGRATION
262
+ )
263
+ return json_storage.set(name, content)
264
+
240
265
  def json_get(self, name):
241
266
  json_storage = get_json_storage(
242
267
  resource_id=self.integration_id,
@@ -244,6 +269,13 @@ class HandlerStorage:
244
269
  )
245
270
  return json_storage.get(name)
246
271
 
272
+ def encrypted_json_get(self, name: str) -> dict:
273
+ json_storage = get_encrypted_json_storage(
274
+ resource_id=self.integration_id,
275
+ resource_group=RESOURCE_GROUP.INTEGRATION
276
+ )
277
+ return json_storage.get(name)
278
+
247
279
  def json_list(self):
248
280
  ...
249
281
 
@@ -251,8 +283,11 @@ class HandlerStorage:
251
283
  ...
252
284
 
253
285
  def export_files(self) -> bytes:
254
- if self.is_empty():
286
+ json_storage = self.export_json_storage()
287
+
288
+ if self.is_empty() and not json_storage:
255
289
  return None
290
+
256
291
  folder_path = self.folder_get('')
257
292
 
258
293
  zip_fd = io.BytesIO()
@@ -265,6 +300,11 @@ class HandlerStorage:
265
300
  abs_path = os.path.join(root, file_name)
266
301
  zipf.write(abs_path, os.path.relpath(abs_path, folder_path))
267
302
 
303
+ # If JSON storage is not empty, add it to the zip file.
304
+ if json_storage:
305
+ json_str = json.dumps(json_storage)
306
+ zipf.writestr(JSON_STORAGE_FILE, json_str)
307
+
268
308
  zip_fd.seek(0)
269
309
  return zip_fd.read()
270
310
 
@@ -277,6 +317,48 @@ class HandlerStorage:
277
317
  zip_fd.seek(0)
278
318
 
279
319
  with zipfile.ZipFile(zip_fd, 'r') as zip_ref:
280
- zip_ref.extractall(folder_path)
320
+ for name in zip_ref.namelist():
321
+ # If JSON storage file is in the zip file, import the content to the JSON storage.
322
+ # Thereafter, remove the file from the folder.
323
+ if name == JSON_STORAGE_FILE:
324
+ json_storage = zip_ref.read(JSON_STORAGE_FILE)
325
+ self.import_json_storage(json_storage)
326
+
327
+ else:
328
+ zip_ref.extract(name, folder_path)
281
329
 
282
330
  self.folder_sync('')
331
+
332
+ def export_json_storage(self) -> list[dict]:
333
+ json_storage = get_json_storage(
334
+ resource_id=self.integration_id,
335
+ resource_group=RESOURCE_GROUP.INTEGRATION
336
+ )
337
+
338
+ records = []
339
+ for record in json_storage.get_all_records():
340
+ record_dict = record.to_dict()
341
+ if record_dict.get('encrypted_content'):
342
+ record_dict['encrypted_content'] = record_dict['encrypted_content'].decode()
343
+ records.append(record_dict)
344
+
345
+ return records
346
+
347
+ def import_json_storage(self, records: bytes) -> None:
348
+ json_storage = get_json_storage(
349
+ resource_id=self.integration_id,
350
+ resource_group=RESOURCE_GROUP.INTEGRATION
351
+ )
352
+
353
+ encrypted_json_storage = get_encrypted_json_storage(
354
+ resource_id=self.integration_id,
355
+ resource_group=RESOURCE_GROUP.INTEGRATION
356
+ )
357
+
358
+ records = json.loads(records.decode())
359
+
360
+ for record in records:
361
+ if record['encrypted_content']:
362
+ encrypted_json_storage.set_str(record['name'], record['encrypted_content'])
363
+ else:
364
+ json_storage.set(record['name'], record['content'])
@@ -5,6 +5,7 @@ from mindsdb_sql_parser import parse_sql, ParsingException
5
5
  from mindsdb.interfaces.storage import db
6
6
  from mindsdb.interfaces.database.projects import ProjectController
7
7
  from mindsdb.utilities.context import context as ctx
8
+ from mindsdb.utilities.config import config
8
9
 
9
10
  from mindsdb.api.executor.controllers.session_controller import SessionController
10
11
 
@@ -16,7 +17,7 @@ class TriggersController:
16
17
  name = name.lower()
17
18
 
18
19
  if project_name is None:
19
- project_name = 'mindsdb'
20
+ project_name = config.get('default_project')
20
21
  project_controller = ProjectController()
21
22
  project = project_controller.get(name=project_name)
22
23
 
@@ -32,12 +32,26 @@ def upgrade():
32
32
  sa.UniqueConstraint('name', 'company_id', name='unique_project_name_company_id')
33
33
  )
34
34
 
35
+ project_table = sa.Table(
36
+ 'project',
37
+ sa.MetaData(),
38
+ sa.Column('id', sa.Integer()),
39
+ sa.Column('name', sa.String()),
40
+ sa.Column('company_id', sa.Integer()),
41
+ )
42
+
35
43
  conn = op.get_bind()
36
44
  session = sa.orm.Session(bind=conn)
37
45
 
38
- project_record = db.Project(name='mindsdb')
39
- session.add(project_record)
40
- session.commit()
46
+ conn.execute(
47
+ project_table.insert().values(
48
+ name='mindsdb'
49
+ )
50
+ )
51
+
52
+ project_record = conn.execute(
53
+ project_table.select().where(project_table.c.name == 'mindsdb')
54
+ ).fetchone()
41
55
 
42
56
  with op.batch_alter_table('predictor', schema=None) as batch_op:
43
57
  batch_op.add_column(sa.Column('project_id', sa.Integer()))