MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (164) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +40 -29
  3. mindsdb/api/a2a/__init__.py +1 -1
  4. mindsdb/api/a2a/agent.py +16 -10
  5. mindsdb/api/a2a/common/server/server.py +7 -3
  6. mindsdb/api/a2a/common/server/task_manager.py +12 -5
  7. mindsdb/api/a2a/common/types.py +66 -0
  8. mindsdb/api/a2a/task_manager.py +65 -17
  9. mindsdb/api/common/middleware.py +10 -12
  10. mindsdb/api/executor/command_executor.py +51 -40
  11. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  12. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
  13. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
  14. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  15. mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
  16. mindsdb/api/executor/exceptions.py +29 -10
  17. mindsdb/api/executor/planner/plan_join.py +17 -3
  18. mindsdb/api/executor/planner/query_prepare.py +2 -20
  19. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  20. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  21. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  22. mindsdb/api/executor/utilities/functions.py +6 -6
  23. mindsdb/api/executor/utilities/sql.py +37 -20
  24. mindsdb/api/http/gui.py +5 -11
  25. mindsdb/api/http/initialize.py +75 -61
  26. mindsdb/api/http/namespaces/agents.py +10 -15
  27. mindsdb/api/http/namespaces/analysis.py +13 -20
  28. mindsdb/api/http/namespaces/auth.py +1 -1
  29. mindsdb/api/http/namespaces/chatbots.py +0 -5
  30. mindsdb/api/http/namespaces/config.py +15 -11
  31. mindsdb/api/http/namespaces/databases.py +140 -201
  32. mindsdb/api/http/namespaces/file.py +17 -4
  33. mindsdb/api/http/namespaces/handlers.py +17 -7
  34. mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
  35. mindsdb/api/http/namespaces/models.py +94 -126
  36. mindsdb/api/http/namespaces/projects.py +13 -22
  37. mindsdb/api/http/namespaces/sql.py +33 -25
  38. mindsdb/api/http/namespaces/tab.py +27 -37
  39. mindsdb/api/http/namespaces/views.py +1 -1
  40. mindsdb/api/http/start.py +16 -10
  41. mindsdb/api/mcp/__init__.py +2 -1
  42. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  43. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  44. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  45. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
  46. mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
  47. mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
  48. mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
  49. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  50. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
  51. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  52. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
  53. mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
  54. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
  55. mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
  56. mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
  57. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
  58. mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
  59. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
  60. mindsdb/integrations/handlers/shopify_handler/requirements.txt +1 -0
  61. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +80 -13
  62. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  63. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  64. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  65. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  66. mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
  67. mindsdb/integrations/libs/api_handler.py +10 -10
  68. mindsdb/integrations/libs/base.py +4 -4
  69. mindsdb/integrations/libs/llm/utils.py +2 -2
  70. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  71. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  72. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  73. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  74. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  75. mindsdb/integrations/libs/process_cache.py +132 -140
  76. mindsdb/integrations/libs/response.py +18 -12
  77. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  78. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  79. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
  80. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
  81. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  82. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
  83. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  84. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  85. mindsdb/integrations/utilities/rag/settings.py +58 -133
  86. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  87. mindsdb/interfaces/agents/agents_controller.py +2 -3
  88. mindsdb/interfaces/agents/constants.py +0 -2
  89. mindsdb/interfaces/agents/litellm_server.py +34 -58
  90. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  91. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  92. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  93. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  94. mindsdb/interfaces/chatbot/polling.py +30 -18
  95. mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
  96. mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
  97. mindsdb/interfaces/database/data_handlers_cache.py +190 -0
  98. mindsdb/interfaces/database/database.py +3 -3
  99. mindsdb/interfaces/database/integrations.py +7 -110
  100. mindsdb/interfaces/database/projects.py +2 -6
  101. mindsdb/interfaces/database/views.py +1 -4
  102. mindsdb/interfaces/file/file_controller.py +6 -6
  103. mindsdb/interfaces/functions/controller.py +1 -1
  104. mindsdb/interfaces/functions/to_markdown.py +2 -2
  105. mindsdb/interfaces/jobs/jobs_controller.py +5 -9
  106. mindsdb/interfaces/jobs/scheduler.py +3 -9
  107. mindsdb/interfaces/knowledge_base/controller.py +244 -128
  108. mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
  109. mindsdb/interfaces/knowledge_base/executor.py +11 -0
  110. mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
  111. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  112. mindsdb/interfaces/model/model_controller.py +172 -168
  113. mindsdb/interfaces/query_context/context_controller.py +14 -2
  114. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
  115. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  116. mindsdb/interfaces/skills/skill_tool.py +2 -2
  117. mindsdb/interfaces/skills/skills_controller.py +1 -4
  118. mindsdb/interfaces/skills/sql_agent.py +25 -19
  119. mindsdb/interfaces/storage/db.py +16 -6
  120. mindsdb/interfaces/storage/fs.py +114 -169
  121. mindsdb/interfaces/storage/json.py +19 -18
  122. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  123. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  124. mindsdb/interfaces/tasks/task_thread.py +7 -9
  125. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  126. mindsdb/interfaces/triggers/triggers_controller.py +47 -52
  127. mindsdb/migrations/migrate.py +16 -16
  128. mindsdb/utilities/api_status.py +58 -0
  129. mindsdb/utilities/config.py +68 -2
  130. mindsdb/utilities/exception.py +40 -1
  131. mindsdb/utilities/fs.py +0 -1
  132. mindsdb/utilities/hooks/profiling.py +17 -14
  133. mindsdb/utilities/json_encoder.py +24 -10
  134. mindsdb/utilities/langfuse.py +40 -45
  135. mindsdb/utilities/log.py +272 -0
  136. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  137. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  138. mindsdb/utilities/render/sqlalchemy_render.py +22 -20
  139. mindsdb/utilities/starters.py +0 -10
  140. mindsdb/utilities/utils.py +2 -2
  141. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/METADATA +286 -267
  142. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/RECORD +145 -159
  143. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  144. mindsdb/api/postgres/__init__.py +0 -0
  145. mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
  146. mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
  147. mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
  148. mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
  149. mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
  150. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
  151. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
  152. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
  153. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
  154. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
  155. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
  156. mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
  157. mindsdb/api/postgres/start.py +0 -11
  158. mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
  159. mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
  160. mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
  161. mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
  162. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/WHEEL +0 -0
  163. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/licenses/LICENSE +0 -0
  164. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/top_level.txt +0 -0
@@ -11,14 +11,14 @@ from dataclasses import dataclass
11
11
  from datetime import datetime
12
12
  import threading
13
13
 
14
- if os.name == 'posix':
14
+ if os.name == "posix":
15
15
  import fcntl
16
16
 
17
17
  import psutil
18
18
 
19
19
  from mindsdb.utilities.config import Config
20
20
 
21
- if Config()['permanent_storage']['location'] == 's3':
21
+ if Config()["permanent_storage"]["location"] == "s3":
22
22
  import boto3
23
23
  from botocore.exceptions import ClientError as S3ClientError
24
24
  else:
@@ -34,17 +34,17 @@ logger = log.getLogger(__name__)
34
34
 
35
35
  @dataclass(frozen=True)
36
36
  class RESOURCE_GROUP:
37
- PREDICTOR = 'predictor'
38
- INTEGRATION = 'integration'
39
- TAB = 'tab'
40
- SYSTEM = 'system'
37
+ PREDICTOR = "predictor"
38
+ INTEGRATION = "integration"
39
+ TAB = "tab"
40
+ SYSTEM = "system"
41
41
 
42
42
 
43
43
  RESOURCE_GROUP = RESOURCE_GROUP()
44
44
 
45
45
 
46
- DIR_LOCK_FILE_NAME = 'dir.lock'
47
- DIR_LAST_MODIFIED_FILE_NAME = 'last_modified.txt'
46
+ DIR_LOCK_FILE_NAME = "dir.lock"
47
+ DIR_LAST_MODIFIED_FILE_NAME = "last_modified.txt"
48
48
  SERVICE_FILES_NAMES = (DIR_LOCK_FILE_NAME, DIR_LAST_MODIFIED_FILE_NAME)
49
49
 
50
50
 
@@ -88,7 +88,7 @@ def copy(src, dst):
88
88
  shutil.copytree(src, dst, dirs_exist_ok=True)
89
89
  else:
90
90
  if os.path.exists(dst):
91
- if hashlib.md5(open(src, 'rb').read()).hexdigest() == hashlib.md5(open(dst, 'rb').read()).hexdigest():
91
+ if hashlib.md5(open(src, "rb").read()).hexdigest() == hashlib.md5(open(dst, "rb").read()).hexdigest():
92
92
  return
93
93
  try:
94
94
  os.remove(dst)
@@ -98,12 +98,11 @@ def copy(src, dst):
98
98
 
99
99
 
100
100
  class BaseFSStore(ABC):
101
- """Base class for file storage
102
- """
101
+ """Base class for file storage"""
103
102
 
104
103
  def __init__(self):
105
104
  self.config = Config()
106
- self.storage = self.config['paths']['storage']
105
+ self.storage = self.config["paths"]["storage"]
107
106
 
108
107
  @abstractmethod
109
108
  def get(self, local_name, base_dir):
@@ -147,8 +146,8 @@ def get_dir_size(path: str):
147
146
 
148
147
 
149
148
  class AbsentFSStore(BaseFSStore):
150
- """Storage class that does not store anything. It is just a dummy.
151
- """
149
+ """Storage class that does not store anything. It is just a dummy."""
150
+
152
151
  def get(self, *args, **kwargs):
153
152
  pass
154
153
 
@@ -160,8 +159,7 @@ class AbsentFSStore(BaseFSStore):
160
159
 
161
160
 
162
161
  class LocalFSStore(BaseFSStore):
163
- """Storage that stores files locally
164
- """
162
+ """Storage that stores files locally"""
165
163
 
166
164
  def __init__(self):
167
165
  super().__init__()
@@ -175,10 +173,7 @@ class LocalFSStore(BaseFSStore):
175
173
 
176
174
  def put(self, local_name, base_dir, compression_level=9):
177
175
  remote_name = local_name
178
- copy(
179
- os.path.join(base_dir, local_name),
180
- os.path.join(self.storage, remote_name)
181
- )
176
+ copy(os.path.join(base_dir, local_name), os.path.join(self.storage, remote_name))
182
177
 
183
178
  def delete(self, remote_name):
184
179
  path = Path(self.storage).joinpath(remote_name)
@@ -192,44 +187,44 @@ class LocalFSStore(BaseFSStore):
192
187
 
193
188
 
194
189
  class FileLock:
195
- """ file lock to make safe concurrent access to directory
196
- works as context
190
+ """file lock to make safe concurrent access to directory
191
+ works as context
197
192
  """
198
193
 
199
194
  @staticmethod
200
195
  def lock_folder_path(relative_path: Path) -> Path:
201
- """ Args:
202
- relative_path (Path): path to resource directory relative to storage root
196
+ """Args:
197
+ relative_path (Path): path to resource directory relative to storage root
203
198
 
204
- Returns:
205
- Path: abs path to folder with lock file
199
+ Returns:
200
+ Path: abs path to folder with lock file
206
201
  """
207
202
  config = Config()
208
- root_storage_path = Path(config.paths['root'])
209
- return config.paths['locks'] / relative_path.relative_to(root_storage_path)
203
+ root_storage_path = Path(config.paths["root"])
204
+ return config.paths["locks"] / relative_path.relative_to(root_storage_path)
210
205
 
211
- def __init__(self, relative_path: Path, mode: str = 'w'):
212
- """ Args:
213
- relative_path (Path): path to resource directory relative to storage root
214
- mode (str): lock for read (r) or write (w)
206
+ def __init__(self, relative_path: Path, mode: str = "w"):
207
+ """Args:
208
+ relative_path (Path): path to resource directory relative to storage root
209
+ mode (str): lock for read (r) or write (w)
215
210
  """
216
- if os.name != 'posix':
211
+ if os.name != "posix":
217
212
  return
218
213
 
219
214
  self._local_path = FileLock.lock_folder_path(relative_path)
220
215
  self._lock_file_name = DIR_LOCK_FILE_NAME
221
216
  self._lock_file_path = self._local_path / self._lock_file_name
222
- self._mode = fcntl.LOCK_EX if mode == 'w' else fcntl.LOCK_SH
217
+ self._mode = fcntl.LOCK_EX if mode == "w" else fcntl.LOCK_SH
223
218
 
224
219
  if self._lock_file_path.is_file() is False:
225
220
  self._local_path.mkdir(parents=True, exist_ok=True)
226
221
  try:
227
- self._lock_file_path.write_text('')
222
+ self._lock_file_path.write_text("")
228
223
  except Exception:
229
224
  pass
230
225
 
231
226
  def __enter__(self):
232
- if os.name != 'posix':
227
+ if os.name != "posix":
233
228
  return
234
229
 
235
230
  try:
@@ -239,14 +234,14 @@ class FileLock:
239
234
  fcntl.lockf(self._lock_fd, self._mode | fcntl.LOCK_NB)
240
235
  except (ValueError, FileNotFoundError):
241
236
  # file probably was deleted between open and lock
242
- logger.error(f'Cant accure lock on {self._local_path}')
237
+ logger.error(f"Cant accure lock on {self._local_path}")
243
238
  raise FileNotFoundError
244
239
  except BlockingIOError:
245
- logger.error(f'Directory is locked by another process: {self._local_path}')
240
+ logger.error(f"Directory is locked by another process: {self._local_path}")
246
241
  fcntl.lockf(self._lock_fd, self._mode)
247
242
 
248
243
  def __exit__(self, exc_type, exc_value, traceback):
249
- if os.name != 'posix':
244
+ if os.name != "posix":
250
245
  return
251
246
 
252
247
  try:
@@ -257,47 +252,44 @@ class FileLock:
257
252
 
258
253
 
259
254
  class S3FSStore(BaseFSStore):
260
- """Storage that stores files in amazon s3
261
- """
255
+ """Storage that stores files in amazon s3"""
262
256
 
263
- dt_format = '%d.%m.%y %H:%M:%S.%f'
257
+ dt_format = "%d.%m.%y %H:%M:%S.%f"
264
258
 
265
259
  def __init__(self):
266
260
  super().__init__()
267
- if 's3_credentials' in self.config['permanent_storage']:
268
- self.s3 = boto3.client('s3', **self.config['permanent_storage']['s3_credentials'])
261
+ if "s3_credentials" in self.config["permanent_storage"]:
262
+ self.s3 = boto3.client("s3", **self.config["permanent_storage"]["s3_credentials"])
269
263
  else:
270
- self.s3 = boto3.client('s3')
271
- self.bucket = self.config['permanent_storage']['bucket']
264
+ self.s3 = boto3.client("s3")
265
+ self.bucket = self.config["permanent_storage"]["bucket"]
272
266
  self._thread_lock = threading.Lock()
273
267
 
274
268
  def _get_remote_last_modified(self, object_name: str) -> datetime:
275
- """ get time when object was created/modified
269
+ """get time when object was created/modified
276
270
 
277
- Args:
278
- object_name (str): name if file in bucket
271
+ Args:
272
+ object_name (str): name if file in bucket
279
273
 
280
- Returns:
281
- datetime
274
+ Returns:
275
+ datetime
282
276
  """
283
277
  last_modified = self.s3.get_object_attributes(
284
- Bucket=self.bucket,
285
- Key=object_name,
286
- ObjectAttributes=['Checksum']
287
- )['LastModified']
278
+ Bucket=self.bucket, Key=object_name, ObjectAttributes=["Checksum"]
279
+ )["LastModified"]
288
280
  last_modified = last_modified.replace(tzinfo=None)
289
281
  return last_modified
290
282
 
291
283
  @profiler.profile()
292
284
  def _get_local_last_modified(self, base_dir: str, local_name: str) -> datetime:
293
- """ get 'last_modified' that saved locally
285
+ """get 'last_modified' that saved locally
294
286
 
295
- Args:
296
- base_dir (str): path to base folder
297
- local_name (str): folder name
287
+ Args:
288
+ base_dir (str): path to base folder
289
+ local_name (str): folder name
298
290
 
299
- Returns:
300
- datetime | None
291
+ Returns:
292
+ datetime | None
301
293
  """
302
294
  last_modified_file_path = Path(base_dir) / local_name / DIR_LAST_MODIFIED_FILE_NAME
303
295
  if last_modified_file_path.is_file() is False:
@@ -311,35 +303,32 @@ class S3FSStore(BaseFSStore):
311
303
 
312
304
  @profiler.profile()
313
305
  def _save_local_last_modified(self, base_dir: str, local_name: str, last_modified: datetime):
314
- """ Save 'last_modified' to local folder
306
+ """Save 'last_modified' to local folder
315
307
 
316
- Args:
317
- base_dir (str): path to base folder
318
- local_name (str): folder name
319
- last_modified (datetime)
308
+ Args:
309
+ base_dir (str): path to base folder
310
+ local_name (str): folder name
311
+ last_modified (datetime)
320
312
  """
321
313
  last_modified_file_path = Path(base_dir) / local_name / DIR_LAST_MODIFIED_FILE_NAME
322
314
  last_modified_text = last_modified.strftime(self.dt_format)
323
315
  last_modified_file_path.write_text(last_modified_text)
324
316
 
325
317
  @profiler.profile()
326
- def _download(self, base_dir: str, remote_ziped_name: str,
327
- local_ziped_path: str, last_modified: datetime = None):
328
- """ download file to s3 and unarchive it
329
-
330
- Args:
331
- base_dir (str)
332
- remote_ziped_name (str)
333
- local_ziped_path (str)
334
- last_modified (datetime, optional)
318
+ def _download(self, base_dir: str, remote_ziped_name: str, local_ziped_path: str, last_modified: datetime = None):
319
+ """download file to s3 and unarchive it
320
+
321
+ Args:
322
+ base_dir (str)
323
+ remote_ziped_name (str)
324
+ local_ziped_path (str)
325
+ last_modified (datetime, optional)
335
326
  """
336
327
  os.makedirs(base_dir, exist_ok=True)
337
328
 
338
329
  remote_size = self.s3.get_object_attributes(
339
- Bucket=self.bucket,
340
- Key=remote_ziped_name,
341
- ObjectAttributes=['ObjectSize']
342
- )['ObjectSize']
330
+ Bucket=self.bucket, Key=remote_ziped_name, ObjectAttributes=["ObjectSize"]
331
+ )["ObjectSize"]
343
332
  if (remote_size * 2) > psutil.virtual_memory().available:
344
333
  fh = io.BytesIO()
345
334
  self.s3.download_fileobj(self.bucket, remote_ziped_name, fh)
@@ -354,52 +343,40 @@ class S3FSStore(BaseFSStore):
354
343
 
355
344
  if last_modified is None:
356
345
  last_modified = self._get_remote_last_modified(remote_ziped_name)
357
- self._save_local_last_modified(
358
- base_dir,
359
- remote_ziped_name.replace('.tar.gz', ''),
360
- last_modified
361
- )
346
+ self._save_local_last_modified(base_dir, remote_ziped_name.replace(".tar.gz", ""), last_modified)
362
347
 
363
348
  @profiler.profile()
364
349
  def get(self, local_name, base_dir):
365
350
  remote_name = local_name
366
- remote_ziped_name = f'{remote_name}.tar.gz'
367
- local_ziped_name = f'{local_name}.tar.gz'
351
+ remote_ziped_name = f"{remote_name}.tar.gz"
352
+ local_ziped_name = f"{local_name}.tar.gz"
368
353
  local_ziped_path = os.path.join(base_dir, local_ziped_name)
369
354
 
370
355
  folder_path = Path(base_dir) / local_name
371
- with FileLock(folder_path, mode='r'):
356
+ with FileLock(folder_path, mode="r"):
372
357
  local_last_modified = self._get_local_last_modified(base_dir, local_name)
373
358
  remote_last_modified = self._get_remote_last_modified(remote_ziped_name)
374
- if (
375
- local_last_modified is not None
376
- and local_last_modified == remote_last_modified
377
- ):
359
+ if local_last_modified is not None and local_last_modified == remote_last_modified:
378
360
  return
379
361
 
380
- with FileLock(folder_path, mode='w'):
381
- self._download(
382
- base_dir,
383
- remote_ziped_name,
384
- local_ziped_path,
385
- last_modified=remote_last_modified
386
- )
362
+ with FileLock(folder_path, mode="w"):
363
+ self._download(base_dir, remote_ziped_name, local_ziped_path, last_modified=remote_last_modified)
387
364
 
388
365
  @profiler.profile()
389
366
  def put(self, local_name, base_dir, compression_level=9):
390
367
  # NOTE: This `make_archive` function is implemente poorly and will create an empty archive file even if
391
368
  # the file/dir to be archived doesn't exist or for some other reason can't be archived
392
369
  remote_name = local_name
393
- remote_zipped_name = f'{remote_name}.tar.gz'
370
+ remote_zipped_name = f"{remote_name}.tar.gz"
394
371
 
395
372
  dir_path = Path(base_dir) / remote_name
396
- dir_size = sum(f.stat().st_size for f in dir_path.glob('**/*') if f.is_file())
373
+ dir_size = sum(f.stat().st_size for f in dir_path.glob("**/*") if f.is_file())
397
374
  if (dir_size * 2) < psutil.virtual_memory().available:
398
375
  old_cwd = os.getcwd()
399
376
  fh = io.BytesIO()
400
377
  with self._thread_lock:
401
378
  os.chdir(base_dir)
402
- with tarfile.open(fileobj=fh, mode='w:gz', compresslevel=compression_level) as tar:
379
+ with tarfile.open(fileobj=fh, mode="w:gz", compresslevel=compression_level) as tar:
403
380
  for path in dir_path.iterdir():
404
381
  if path.is_file() and path.name in SERVICE_FILES_NAMES:
405
382
  continue
@@ -407,24 +384,11 @@ class S3FSStore(BaseFSStore):
407
384
  os.chdir(old_cwd)
408
385
  fh.seek(0)
409
386
 
410
- self.s3.upload_fileobj(
411
- fh,
412
- self.bucket,
413
- remote_zipped_name
414
- )
387
+ self.s3.upload_fileobj(fh, self.bucket, remote_zipped_name)
415
388
  else:
416
- shutil.make_archive(
417
- os.path.join(base_dir, remote_name),
418
- 'gztar',
419
- root_dir=base_dir,
420
- base_dir=local_name
421
- )
422
-
423
- self.s3.upload_file(
424
- os.path.join(base_dir, remote_zipped_name),
425
- self.bucket,
426
- remote_zipped_name
427
- )
389
+ shutil.make_archive(os.path.join(base_dir, remote_name), "gztar", root_dir=base_dir, base_dir=local_name)
390
+
391
+ self.s3.upload_file(os.path.join(base_dir, remote_zipped_name), self.bucket, remote_zipped_name)
428
392
  os.remove(os.path.join(base_dir, remote_zipped_name))
429
393
 
430
394
  last_modified = self._get_remote_last_modified(remote_zipped_name)
@@ -436,25 +400,24 @@ class S3FSStore(BaseFSStore):
436
400
 
437
401
 
438
402
  def FsStore():
439
- storage_location = Config()['permanent_storage']['location']
440
- if storage_location == 'absent':
403
+ storage_location = Config()["permanent_storage"]["location"]
404
+ if storage_location == "absent":
441
405
  return AbsentFSStore()
442
- if storage_location == 'local':
406
+ if storage_location == "local":
443
407
  return LocalFSStore()
444
- if storage_location == 's3':
408
+ if storage_location == "s3":
445
409
  return S3FSStore()
446
410
  raise Exception(f"Location: '{storage_location}' not supported")
447
411
 
448
412
 
449
413
  class FileStorage:
450
- def __init__(self, resource_group: str, resource_id: int,
451
- root_dir: str = 'content', sync: bool = True):
414
+ def __init__(self, resource_group: str, resource_id: int, root_dir: str = "content", sync: bool = True):
452
415
  """
453
- Args:
454
- resource_group (str)
455
- resource_id (int)
456
- root_dir (str)
457
- sync (bool)
416
+ Args:
417
+ resource_group (str)
418
+ resource_id (int)
419
+ root_dir (str)
420
+ sync (bool)
458
421
  """
459
422
 
460
423
  self.resource_group = resource_group
@@ -462,11 +425,11 @@ class FileStorage:
462
425
  self.root_dir = root_dir
463
426
  self.sync = sync
464
427
 
465
- self.folder_name = f'{resource_group}_{ctx.company_id}_{resource_id}'
428
+ self.folder_name = f"{resource_group}_{ctx.company_id}_{resource_id}"
466
429
 
467
430
  config = Config()
468
431
  self.fs_store = FsStore()
469
- self.content_path = Path(config['paths'][root_dir])
432
+ self.content_path = Path(config["paths"][root_dir])
470
433
  self.resource_group_path = self.content_path / resource_group
471
434
  self.folder_path = self.resource_group_path / self.folder_name
472
435
  if self.folder_path.exists() is False:
@@ -474,16 +437,12 @@ class FileStorage:
474
437
 
475
438
  @profiler.profile()
476
439
  def push(self, compression_level: int = 9):
477
- with FileLock(self.folder_path, mode='r'):
440
+ with FileLock(self.folder_path, mode="r"):
478
441
  self._push_no_lock(compression_level=compression_level)
479
442
 
480
443
  @profiler.profile()
481
444
  def _push_no_lock(self, compression_level: int = 9):
482
- self.fs_store.put(
483
- str(self.folder_name),
484
- str(self.resource_group_path),
485
- compression_level=compression_level
486
- )
445
+ self.fs_store.put(str(self.folder_name), str(self.resource_group_path), compression_level=compression_level)
487
446
 
488
447
  @profiler.profile()
489
448
  def push_path(self, path, compression_level: int = 9):
@@ -493,10 +452,7 @@ class FileStorage:
493
452
  @profiler.profile()
494
453
  def pull(self):
495
454
  try:
496
- self.fs_store.get(
497
- str(self.folder_name),
498
- str(self.resource_group_path)
499
- )
455
+ self.fs_store.get(str(self.folder_name), str(self.resource_group_path))
500
456
  except (FileNotFoundError, S3ClientError):
501
457
  pass
502
458
 
@@ -510,11 +466,10 @@ class FileStorage:
510
466
  if self.sync is True:
511
467
  self.pull()
512
468
 
513
- with FileLock(self.folder_path, mode='w'):
514
-
469
+ with FileLock(self.folder_path, mode="w"):
515
470
  dest_abs_path = self.folder_path / name
516
471
 
517
- with open(dest_abs_path, 'wb') as fd:
472
+ with open(dest_abs_path, "wb") as fd:
518
473
  fd.write(content)
519
474
 
520
475
  if self.sync is True:
@@ -525,8 +480,8 @@ class FileStorage:
525
480
  if self.sync is True:
526
481
  self.pull()
527
482
  dest_abs_path = self.folder_path / name
528
- with FileLock(self.folder_path, mode='r'):
529
- with open(dest_abs_path, 'rb') as fd:
483
+ with FileLock(self.folder_path, mode="r"):
484
+ with open(dest_abs_path, "rb") as fd:
530
485
  return fd.read()
531
486
 
532
487
  @profiler.profile()
@@ -552,8 +507,7 @@ class FileStorage:
552
507
  """
553
508
  if self.sync is True:
554
509
  self.pull()
555
- with FileLock(self.folder_path, mode='w'):
556
-
510
+ with FileLock(self.folder_path, mode="w"):
557
511
  path = Path(path)
558
512
  if isinstance(dest_rel_path, str):
559
513
  dest_rel_path = Path(dest_rel_path)
@@ -563,17 +517,14 @@ class FileStorage:
563
517
  else:
564
518
  dest_abs_path = self.folder_path / dest_rel_path
565
519
 
566
- copy(
567
- str(path),
568
- str(dest_abs_path)
569
- )
520
+ copy(str(path), str(dest_abs_path))
570
521
 
571
522
  if self.sync is True:
572
523
  self._push_no_lock()
573
524
 
574
525
  @profiler.profile()
575
526
  def get_path(self, relative_path: Union[str, Path]) -> Path:
576
- """ Return path to file or folder
527
+ """Return path to file or folder
577
528
 
578
529
  Examples:
579
530
  get path to 'opts.json':
@@ -589,13 +540,13 @@ class FileStorage:
589
540
  if self.sync is True:
590
541
  self.pull()
591
542
 
592
- with FileLock(self.folder_path, mode='r'):
543
+ with FileLock(self.folder_path, mode="r"):
593
544
  if isinstance(relative_path, str):
594
545
  relative_path = Path(relative_path)
595
546
  # relative_path = relative_path.resolve()
596
547
 
597
548
  if relative_path.is_absolute():
598
- raise TypeError('FSStorage.get_path() got absolute path as argument')
549
+ raise TypeError("FSStorage.get_path() got absolute path as argument")
599
550
 
600
551
  ret_path = self.folder_path / relative_path
601
552
  if not ret_path.exists():
@@ -604,17 +555,17 @@ class FileStorage:
604
555
 
605
556
  return ret_path
606
557
 
607
- def delete(self, relative_path: Union[str, Path] = '.'):
558
+ def delete(self, relative_path: Union[str, Path] = "."):
608
559
  path = (self.folder_path / relative_path).resolve()
609
560
  if isinstance(relative_path, str):
610
561
  relative_path = Path(relative_path)
611
562
 
612
563
  if relative_path.is_absolute():
613
- raise TypeError('FSStorage.delete() got absolute path as argument')
564
+ raise TypeError("FSStorage.delete() got absolute path as argument")
614
565
 
615
566
  # complete removal
616
567
  if path == self.folder_path.resolve():
617
- with FileLock(self.folder_path, mode='w'):
568
+ with FileLock(self.folder_path, mode="w"):
618
569
  self.fs_store.delete(self.folder_name)
619
570
  # NOTE on some fs .rmtree is not working if any file is open
620
571
  shutil.rmtree(str(self.folder_path))
@@ -624,18 +575,16 @@ class FileStorage:
624
575
  try:
625
576
  shutil.rmtree(lock_folder_path)
626
577
  except FileNotFoundError:
627
- logger.warning('Tried to delete file not found: %s', lock_folder_path)
628
- except Exception as e:
629
- raise e
578
+ logger.warning("Tried to delete file not found: %s", lock_folder_path)
630
579
  # endregion
631
580
  return
632
581
 
633
582
  if self.sync is True:
634
583
  self.pull()
635
584
 
636
- with FileLock(self.folder_path, mode='w'):
585
+ with FileLock(self.folder_path, mode="w"):
637
586
  if path.exists() is False:
638
- raise Exception('Path does not exists')
587
+ raise Exception("Path does not exists")
639
588
 
640
589
  if path.is_file():
641
590
  path.unlink()
@@ -647,16 +596,12 @@ class FileStorage:
647
596
 
648
597
 
649
598
  class FileStorageFactory:
650
- def __init__(self, resource_group: str,
651
- root_dir: str = 'content', sync: bool = True):
599
+ def __init__(self, resource_group: str, root_dir: str = "content", sync: bool = True):
652
600
  self.resource_group = resource_group
653
601
  self.root_dir = root_dir
654
602
  self.sync = sync
655
603
 
656
604
  def __call__(self, resource_id: int):
657
605
  return FileStorage(
658
- resource_group=self.resource_group,
659
- root_dir=self.root_dir,
660
- sync=self.sync,
661
- resource_id=resource_id
606
+ resource_group=self.resource_group, root_dir=self.root_dir, sync=self.sync, resource_id=resource_id
662
607
  )