MindsDB 25.9.1.2__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +39 -20
- mindsdb/api/a2a/agent.py +7 -9
- mindsdb/api/a2a/common/server/server.py +3 -3
- mindsdb/api/a2a/common/server/task_manager.py +4 -4
- mindsdb/api/a2a/task_manager.py +15 -17
- mindsdb/api/common/middleware.py +9 -11
- mindsdb/api/executor/command_executor.py +2 -4
- mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
- mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
- mindsdb/api/executor/exceptions.py +29 -10
- mindsdb/api/executor/planner/plan_join.py +17 -3
- mindsdb/api/executor/sql_query/sql_query.py +74 -74
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
- mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
- mindsdb/api/executor/utilities/functions.py +6 -6
- mindsdb/api/executor/utilities/sql.py +32 -16
- mindsdb/api/http/gui.py +5 -11
- mindsdb/api/http/initialize.py +8 -10
- mindsdb/api/http/namespaces/agents.py +10 -12
- mindsdb/api/http/namespaces/analysis.py +13 -20
- mindsdb/api/http/namespaces/auth.py +1 -1
- mindsdb/api/http/namespaces/config.py +15 -11
- mindsdb/api/http/namespaces/databases.py +140 -201
- mindsdb/api/http/namespaces/file.py +15 -4
- mindsdb/api/http/namespaces/handlers.py +7 -2
- mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
- mindsdb/api/http/namespaces/models.py +94 -126
- mindsdb/api/http/namespaces/projects.py +13 -22
- mindsdb/api/http/namespaces/sql.py +33 -25
- mindsdb/api/http/namespaces/tab.py +27 -37
- mindsdb/api/http/namespaces/views.py +1 -1
- mindsdb/api/http/start.py +14 -8
- mindsdb/api/mcp/__init__.py +2 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
- mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +11 -5
- mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
- mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +20 -2
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +18 -3
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
- mindsdb/integrations/libs/api_handler.py +10 -10
- mindsdb/integrations/libs/base.py +4 -4
- mindsdb/integrations/libs/llm/utils.py +2 -2
- mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
- mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
- mindsdb/integrations/libs/process_cache.py +132 -140
- mindsdb/integrations/libs/response.py +18 -12
- mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
- mindsdb/integrations/utilities/files/file_reader.py +6 -7
- mindsdb/integrations/utilities/rag/config_loader.py +37 -26
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
- mindsdb/integrations/utilities/rag/settings.py +58 -133
- mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
- mindsdb/interfaces/agents/agents_controller.py +2 -1
- mindsdb/interfaces/agents/constants.py +0 -2
- mindsdb/interfaces/agents/litellm_server.py +34 -58
- mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
- mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
- mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
- mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
- mindsdb/interfaces/chatbot/polling.py +30 -18
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
- mindsdb/interfaces/database/integrations.py +19 -2
- mindsdb/interfaces/file/file_controller.py +6 -6
- mindsdb/interfaces/functions/controller.py +1 -1
- mindsdb/interfaces/functions/to_markdown.py +2 -2
- mindsdb/interfaces/jobs/jobs_controller.py +5 -5
- mindsdb/interfaces/jobs/scheduler.py +3 -8
- mindsdb/interfaces/knowledge_base/controller.py +54 -25
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
- mindsdb/interfaces/model/model_controller.py +170 -166
- mindsdb/interfaces/query_context/context_controller.py +14 -2
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
- mindsdb/interfaces/skills/retrieval_tool.py +43 -50
- mindsdb/interfaces/skills/skill_tool.py +2 -2
- mindsdb/interfaces/skills/sql_agent.py +25 -19
- mindsdb/interfaces/storage/fs.py +114 -169
- mindsdb/interfaces/storage/json.py +19 -18
- mindsdb/interfaces/storage/model_fs.py +54 -92
- mindsdb/interfaces/tabs/tabs_controller.py +49 -72
- mindsdb/interfaces/tasks/task_monitor.py +3 -9
- mindsdb/interfaces/tasks/task_thread.py +7 -9
- mindsdb/interfaces/triggers/trigger_task.py +7 -13
- mindsdb/interfaces/triggers/triggers_controller.py +47 -50
- mindsdb/migrations/migrate.py +16 -16
- mindsdb/utilities/api_status.py +58 -0
- mindsdb/utilities/config.py +49 -0
- mindsdb/utilities/exception.py +40 -1
- mindsdb/utilities/fs.py +0 -1
- mindsdb/utilities/hooks/profiling.py +17 -14
- mindsdb/utilities/langfuse.py +40 -45
- mindsdb/utilities/log.py +272 -0
- mindsdb/utilities/ml_task_queue/consumer.py +52 -58
- mindsdb/utilities/ml_task_queue/producer.py +26 -30
- mindsdb/utilities/render/sqlalchemy_render.py +8 -7
- mindsdb/utilities/utils.py +2 -2
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +266 -261
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +119 -119
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
mindsdb/interfaces/storage/fs.py
CHANGED
|
@@ -11,14 +11,14 @@ from dataclasses import dataclass
|
|
|
11
11
|
from datetime import datetime
|
|
12
12
|
import threading
|
|
13
13
|
|
|
14
|
-
if os.name ==
|
|
14
|
+
if os.name == "posix":
|
|
15
15
|
import fcntl
|
|
16
16
|
|
|
17
17
|
import psutil
|
|
18
18
|
|
|
19
19
|
from mindsdb.utilities.config import Config
|
|
20
20
|
|
|
21
|
-
if Config()[
|
|
21
|
+
if Config()["permanent_storage"]["location"] == "s3":
|
|
22
22
|
import boto3
|
|
23
23
|
from botocore.exceptions import ClientError as S3ClientError
|
|
24
24
|
else:
|
|
@@ -34,17 +34,17 @@ logger = log.getLogger(__name__)
|
|
|
34
34
|
|
|
35
35
|
@dataclass(frozen=True)
|
|
36
36
|
class RESOURCE_GROUP:
|
|
37
|
-
PREDICTOR =
|
|
38
|
-
INTEGRATION =
|
|
39
|
-
TAB =
|
|
40
|
-
SYSTEM =
|
|
37
|
+
PREDICTOR = "predictor"
|
|
38
|
+
INTEGRATION = "integration"
|
|
39
|
+
TAB = "tab"
|
|
40
|
+
SYSTEM = "system"
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
RESOURCE_GROUP = RESOURCE_GROUP()
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
DIR_LOCK_FILE_NAME =
|
|
47
|
-
DIR_LAST_MODIFIED_FILE_NAME =
|
|
46
|
+
DIR_LOCK_FILE_NAME = "dir.lock"
|
|
47
|
+
DIR_LAST_MODIFIED_FILE_NAME = "last_modified.txt"
|
|
48
48
|
SERVICE_FILES_NAMES = (DIR_LOCK_FILE_NAME, DIR_LAST_MODIFIED_FILE_NAME)
|
|
49
49
|
|
|
50
50
|
|
|
@@ -88,7 +88,7 @@ def copy(src, dst):
|
|
|
88
88
|
shutil.copytree(src, dst, dirs_exist_ok=True)
|
|
89
89
|
else:
|
|
90
90
|
if os.path.exists(dst):
|
|
91
|
-
if hashlib.md5(open(src,
|
|
91
|
+
if hashlib.md5(open(src, "rb").read()).hexdigest() == hashlib.md5(open(dst, "rb").read()).hexdigest():
|
|
92
92
|
return
|
|
93
93
|
try:
|
|
94
94
|
os.remove(dst)
|
|
@@ -98,12 +98,11 @@ def copy(src, dst):
|
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
class BaseFSStore(ABC):
|
|
101
|
-
"""Base class for file storage
|
|
102
|
-
"""
|
|
101
|
+
"""Base class for file storage"""
|
|
103
102
|
|
|
104
103
|
def __init__(self):
|
|
105
104
|
self.config = Config()
|
|
106
|
-
self.storage = self.config[
|
|
105
|
+
self.storage = self.config["paths"]["storage"]
|
|
107
106
|
|
|
108
107
|
@abstractmethod
|
|
109
108
|
def get(self, local_name, base_dir):
|
|
@@ -147,8 +146,8 @@ def get_dir_size(path: str):
|
|
|
147
146
|
|
|
148
147
|
|
|
149
148
|
class AbsentFSStore(BaseFSStore):
|
|
150
|
-
"""Storage class that does not store anything. It is just a dummy.
|
|
151
|
-
|
|
149
|
+
"""Storage class that does not store anything. It is just a dummy."""
|
|
150
|
+
|
|
152
151
|
def get(self, *args, **kwargs):
|
|
153
152
|
pass
|
|
154
153
|
|
|
@@ -160,8 +159,7 @@ class AbsentFSStore(BaseFSStore):
|
|
|
160
159
|
|
|
161
160
|
|
|
162
161
|
class LocalFSStore(BaseFSStore):
|
|
163
|
-
"""Storage that stores files locally
|
|
164
|
-
"""
|
|
162
|
+
"""Storage that stores files locally"""
|
|
165
163
|
|
|
166
164
|
def __init__(self):
|
|
167
165
|
super().__init__()
|
|
@@ -175,10 +173,7 @@ class LocalFSStore(BaseFSStore):
|
|
|
175
173
|
|
|
176
174
|
def put(self, local_name, base_dir, compression_level=9):
|
|
177
175
|
remote_name = local_name
|
|
178
|
-
copy(
|
|
179
|
-
os.path.join(base_dir, local_name),
|
|
180
|
-
os.path.join(self.storage, remote_name)
|
|
181
|
-
)
|
|
176
|
+
copy(os.path.join(base_dir, local_name), os.path.join(self.storage, remote_name))
|
|
182
177
|
|
|
183
178
|
def delete(self, remote_name):
|
|
184
179
|
path = Path(self.storage).joinpath(remote_name)
|
|
@@ -192,44 +187,44 @@ class LocalFSStore(BaseFSStore):
|
|
|
192
187
|
|
|
193
188
|
|
|
194
189
|
class FileLock:
|
|
195
|
-
"""
|
|
196
|
-
|
|
190
|
+
"""file lock to make safe concurrent access to directory
|
|
191
|
+
works as context
|
|
197
192
|
"""
|
|
198
193
|
|
|
199
194
|
@staticmethod
|
|
200
195
|
def lock_folder_path(relative_path: Path) -> Path:
|
|
201
|
-
"""
|
|
202
|
-
|
|
196
|
+
"""Args:
|
|
197
|
+
relative_path (Path): path to resource directory relative to storage root
|
|
203
198
|
|
|
204
|
-
|
|
205
|
-
|
|
199
|
+
Returns:
|
|
200
|
+
Path: abs path to folder with lock file
|
|
206
201
|
"""
|
|
207
202
|
config = Config()
|
|
208
|
-
root_storage_path = Path(config.paths[
|
|
209
|
-
return config.paths[
|
|
203
|
+
root_storage_path = Path(config.paths["root"])
|
|
204
|
+
return config.paths["locks"] / relative_path.relative_to(root_storage_path)
|
|
210
205
|
|
|
211
|
-
def __init__(self, relative_path: Path, mode: str =
|
|
212
|
-
"""
|
|
213
|
-
|
|
214
|
-
|
|
206
|
+
def __init__(self, relative_path: Path, mode: str = "w"):
|
|
207
|
+
"""Args:
|
|
208
|
+
relative_path (Path): path to resource directory relative to storage root
|
|
209
|
+
mode (str): lock for read (r) or write (w)
|
|
215
210
|
"""
|
|
216
|
-
if os.name !=
|
|
211
|
+
if os.name != "posix":
|
|
217
212
|
return
|
|
218
213
|
|
|
219
214
|
self._local_path = FileLock.lock_folder_path(relative_path)
|
|
220
215
|
self._lock_file_name = DIR_LOCK_FILE_NAME
|
|
221
216
|
self._lock_file_path = self._local_path / self._lock_file_name
|
|
222
|
-
self._mode = fcntl.LOCK_EX if mode ==
|
|
217
|
+
self._mode = fcntl.LOCK_EX if mode == "w" else fcntl.LOCK_SH
|
|
223
218
|
|
|
224
219
|
if self._lock_file_path.is_file() is False:
|
|
225
220
|
self._local_path.mkdir(parents=True, exist_ok=True)
|
|
226
221
|
try:
|
|
227
|
-
self._lock_file_path.write_text(
|
|
222
|
+
self._lock_file_path.write_text("")
|
|
228
223
|
except Exception:
|
|
229
224
|
pass
|
|
230
225
|
|
|
231
226
|
def __enter__(self):
|
|
232
|
-
if os.name !=
|
|
227
|
+
if os.name != "posix":
|
|
233
228
|
return
|
|
234
229
|
|
|
235
230
|
try:
|
|
@@ -239,14 +234,14 @@ class FileLock:
|
|
|
239
234
|
fcntl.lockf(self._lock_fd, self._mode | fcntl.LOCK_NB)
|
|
240
235
|
except (ValueError, FileNotFoundError):
|
|
241
236
|
# file probably was deleted between open and lock
|
|
242
|
-
logger.error(f
|
|
237
|
+
logger.error(f"Cant accure lock on {self._local_path}")
|
|
243
238
|
raise FileNotFoundError
|
|
244
239
|
except BlockingIOError:
|
|
245
|
-
logger.error(f
|
|
240
|
+
logger.error(f"Directory is locked by another process: {self._local_path}")
|
|
246
241
|
fcntl.lockf(self._lock_fd, self._mode)
|
|
247
242
|
|
|
248
243
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
249
|
-
if os.name !=
|
|
244
|
+
if os.name != "posix":
|
|
250
245
|
return
|
|
251
246
|
|
|
252
247
|
try:
|
|
@@ -257,47 +252,44 @@ class FileLock:
|
|
|
257
252
|
|
|
258
253
|
|
|
259
254
|
class S3FSStore(BaseFSStore):
|
|
260
|
-
"""Storage that stores files in amazon s3
|
|
261
|
-
"""
|
|
255
|
+
"""Storage that stores files in amazon s3"""
|
|
262
256
|
|
|
263
|
-
dt_format =
|
|
257
|
+
dt_format = "%d.%m.%y %H:%M:%S.%f"
|
|
264
258
|
|
|
265
259
|
def __init__(self):
|
|
266
260
|
super().__init__()
|
|
267
|
-
if
|
|
268
|
-
self.s3 = boto3.client(
|
|
261
|
+
if "s3_credentials" in self.config["permanent_storage"]:
|
|
262
|
+
self.s3 = boto3.client("s3", **self.config["permanent_storage"]["s3_credentials"])
|
|
269
263
|
else:
|
|
270
|
-
self.s3 = boto3.client(
|
|
271
|
-
self.bucket = self.config[
|
|
264
|
+
self.s3 = boto3.client("s3")
|
|
265
|
+
self.bucket = self.config["permanent_storage"]["bucket"]
|
|
272
266
|
self._thread_lock = threading.Lock()
|
|
273
267
|
|
|
274
268
|
def _get_remote_last_modified(self, object_name: str) -> datetime:
|
|
275
|
-
"""
|
|
269
|
+
"""get time when object was created/modified
|
|
276
270
|
|
|
277
|
-
|
|
278
|
-
|
|
271
|
+
Args:
|
|
272
|
+
object_name (str): name if file in bucket
|
|
279
273
|
|
|
280
|
-
|
|
281
|
-
|
|
274
|
+
Returns:
|
|
275
|
+
datetime
|
|
282
276
|
"""
|
|
283
277
|
last_modified = self.s3.get_object_attributes(
|
|
284
|
-
Bucket=self.bucket,
|
|
285
|
-
|
|
286
|
-
ObjectAttributes=['Checksum']
|
|
287
|
-
)['LastModified']
|
|
278
|
+
Bucket=self.bucket, Key=object_name, ObjectAttributes=["Checksum"]
|
|
279
|
+
)["LastModified"]
|
|
288
280
|
last_modified = last_modified.replace(tzinfo=None)
|
|
289
281
|
return last_modified
|
|
290
282
|
|
|
291
283
|
@profiler.profile()
|
|
292
284
|
def _get_local_last_modified(self, base_dir: str, local_name: str) -> datetime:
|
|
293
|
-
"""
|
|
285
|
+
"""get 'last_modified' that saved locally
|
|
294
286
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
287
|
+
Args:
|
|
288
|
+
base_dir (str): path to base folder
|
|
289
|
+
local_name (str): folder name
|
|
298
290
|
|
|
299
|
-
|
|
300
|
-
|
|
291
|
+
Returns:
|
|
292
|
+
datetime | None
|
|
301
293
|
"""
|
|
302
294
|
last_modified_file_path = Path(base_dir) / local_name / DIR_LAST_MODIFIED_FILE_NAME
|
|
303
295
|
if last_modified_file_path.is_file() is False:
|
|
@@ -311,35 +303,32 @@ class S3FSStore(BaseFSStore):
|
|
|
311
303
|
|
|
312
304
|
@profiler.profile()
|
|
313
305
|
def _save_local_last_modified(self, base_dir: str, local_name: str, last_modified: datetime):
|
|
314
|
-
"""
|
|
306
|
+
"""Save 'last_modified' to local folder
|
|
315
307
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
308
|
+
Args:
|
|
309
|
+
base_dir (str): path to base folder
|
|
310
|
+
local_name (str): folder name
|
|
311
|
+
last_modified (datetime)
|
|
320
312
|
"""
|
|
321
313
|
last_modified_file_path = Path(base_dir) / local_name / DIR_LAST_MODIFIED_FILE_NAME
|
|
322
314
|
last_modified_text = last_modified.strftime(self.dt_format)
|
|
323
315
|
last_modified_file_path.write_text(last_modified_text)
|
|
324
316
|
|
|
325
317
|
@profiler.profile()
|
|
326
|
-
def _download(self, base_dir: str, remote_ziped_name: str,
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
last_modified (datetime, optional)
|
|
318
|
+
def _download(self, base_dir: str, remote_ziped_name: str, local_ziped_path: str, last_modified: datetime = None):
|
|
319
|
+
"""download file to s3 and unarchive it
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
base_dir (str)
|
|
323
|
+
remote_ziped_name (str)
|
|
324
|
+
local_ziped_path (str)
|
|
325
|
+
last_modified (datetime, optional)
|
|
335
326
|
"""
|
|
336
327
|
os.makedirs(base_dir, exist_ok=True)
|
|
337
328
|
|
|
338
329
|
remote_size = self.s3.get_object_attributes(
|
|
339
|
-
Bucket=self.bucket,
|
|
340
|
-
|
|
341
|
-
ObjectAttributes=['ObjectSize']
|
|
342
|
-
)['ObjectSize']
|
|
330
|
+
Bucket=self.bucket, Key=remote_ziped_name, ObjectAttributes=["ObjectSize"]
|
|
331
|
+
)["ObjectSize"]
|
|
343
332
|
if (remote_size * 2) > psutil.virtual_memory().available:
|
|
344
333
|
fh = io.BytesIO()
|
|
345
334
|
self.s3.download_fileobj(self.bucket, remote_ziped_name, fh)
|
|
@@ -354,52 +343,40 @@ class S3FSStore(BaseFSStore):
|
|
|
354
343
|
|
|
355
344
|
if last_modified is None:
|
|
356
345
|
last_modified = self._get_remote_last_modified(remote_ziped_name)
|
|
357
|
-
self._save_local_last_modified(
|
|
358
|
-
base_dir,
|
|
359
|
-
remote_ziped_name.replace('.tar.gz', ''),
|
|
360
|
-
last_modified
|
|
361
|
-
)
|
|
346
|
+
self._save_local_last_modified(base_dir, remote_ziped_name.replace(".tar.gz", ""), last_modified)
|
|
362
347
|
|
|
363
348
|
@profiler.profile()
|
|
364
349
|
def get(self, local_name, base_dir):
|
|
365
350
|
remote_name = local_name
|
|
366
|
-
remote_ziped_name = f
|
|
367
|
-
local_ziped_name = f
|
|
351
|
+
remote_ziped_name = f"{remote_name}.tar.gz"
|
|
352
|
+
local_ziped_name = f"{local_name}.tar.gz"
|
|
368
353
|
local_ziped_path = os.path.join(base_dir, local_ziped_name)
|
|
369
354
|
|
|
370
355
|
folder_path = Path(base_dir) / local_name
|
|
371
|
-
with FileLock(folder_path, mode=
|
|
356
|
+
with FileLock(folder_path, mode="r"):
|
|
372
357
|
local_last_modified = self._get_local_last_modified(base_dir, local_name)
|
|
373
358
|
remote_last_modified = self._get_remote_last_modified(remote_ziped_name)
|
|
374
|
-
if
|
|
375
|
-
local_last_modified is not None
|
|
376
|
-
and local_last_modified == remote_last_modified
|
|
377
|
-
):
|
|
359
|
+
if local_last_modified is not None and local_last_modified == remote_last_modified:
|
|
378
360
|
return
|
|
379
361
|
|
|
380
|
-
with FileLock(folder_path, mode=
|
|
381
|
-
self._download(
|
|
382
|
-
base_dir,
|
|
383
|
-
remote_ziped_name,
|
|
384
|
-
local_ziped_path,
|
|
385
|
-
last_modified=remote_last_modified
|
|
386
|
-
)
|
|
362
|
+
with FileLock(folder_path, mode="w"):
|
|
363
|
+
self._download(base_dir, remote_ziped_name, local_ziped_path, last_modified=remote_last_modified)
|
|
387
364
|
|
|
388
365
|
@profiler.profile()
|
|
389
366
|
def put(self, local_name, base_dir, compression_level=9):
|
|
390
367
|
# NOTE: This `make_archive` function is implemente poorly and will create an empty archive file even if
|
|
391
368
|
# the file/dir to be archived doesn't exist or for some other reason can't be archived
|
|
392
369
|
remote_name = local_name
|
|
393
|
-
remote_zipped_name = f
|
|
370
|
+
remote_zipped_name = f"{remote_name}.tar.gz"
|
|
394
371
|
|
|
395
372
|
dir_path = Path(base_dir) / remote_name
|
|
396
|
-
dir_size = sum(f.stat().st_size for f in dir_path.glob(
|
|
373
|
+
dir_size = sum(f.stat().st_size for f in dir_path.glob("**/*") if f.is_file())
|
|
397
374
|
if (dir_size * 2) < psutil.virtual_memory().available:
|
|
398
375
|
old_cwd = os.getcwd()
|
|
399
376
|
fh = io.BytesIO()
|
|
400
377
|
with self._thread_lock:
|
|
401
378
|
os.chdir(base_dir)
|
|
402
|
-
with tarfile.open(fileobj=fh, mode=
|
|
379
|
+
with tarfile.open(fileobj=fh, mode="w:gz", compresslevel=compression_level) as tar:
|
|
403
380
|
for path in dir_path.iterdir():
|
|
404
381
|
if path.is_file() and path.name in SERVICE_FILES_NAMES:
|
|
405
382
|
continue
|
|
@@ -407,24 +384,11 @@ class S3FSStore(BaseFSStore):
|
|
|
407
384
|
os.chdir(old_cwd)
|
|
408
385
|
fh.seek(0)
|
|
409
386
|
|
|
410
|
-
self.s3.upload_fileobj(
|
|
411
|
-
fh,
|
|
412
|
-
self.bucket,
|
|
413
|
-
remote_zipped_name
|
|
414
|
-
)
|
|
387
|
+
self.s3.upload_fileobj(fh, self.bucket, remote_zipped_name)
|
|
415
388
|
else:
|
|
416
|
-
shutil.make_archive(
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
root_dir=base_dir,
|
|
420
|
-
base_dir=local_name
|
|
421
|
-
)
|
|
422
|
-
|
|
423
|
-
self.s3.upload_file(
|
|
424
|
-
os.path.join(base_dir, remote_zipped_name),
|
|
425
|
-
self.bucket,
|
|
426
|
-
remote_zipped_name
|
|
427
|
-
)
|
|
389
|
+
shutil.make_archive(os.path.join(base_dir, remote_name), "gztar", root_dir=base_dir, base_dir=local_name)
|
|
390
|
+
|
|
391
|
+
self.s3.upload_file(os.path.join(base_dir, remote_zipped_name), self.bucket, remote_zipped_name)
|
|
428
392
|
os.remove(os.path.join(base_dir, remote_zipped_name))
|
|
429
393
|
|
|
430
394
|
last_modified = self._get_remote_last_modified(remote_zipped_name)
|
|
@@ -436,25 +400,24 @@ class S3FSStore(BaseFSStore):
|
|
|
436
400
|
|
|
437
401
|
|
|
438
402
|
def FsStore():
|
|
439
|
-
storage_location = Config()[
|
|
440
|
-
if storage_location ==
|
|
403
|
+
storage_location = Config()["permanent_storage"]["location"]
|
|
404
|
+
if storage_location == "absent":
|
|
441
405
|
return AbsentFSStore()
|
|
442
|
-
if storage_location ==
|
|
406
|
+
if storage_location == "local":
|
|
443
407
|
return LocalFSStore()
|
|
444
|
-
if storage_location ==
|
|
408
|
+
if storage_location == "s3":
|
|
445
409
|
return S3FSStore()
|
|
446
410
|
raise Exception(f"Location: '{storage_location}' not supported")
|
|
447
411
|
|
|
448
412
|
|
|
449
413
|
class FileStorage:
|
|
450
|
-
def __init__(self, resource_group: str, resource_id: int,
|
|
451
|
-
root_dir: str = 'content', sync: bool = True):
|
|
414
|
+
def __init__(self, resource_group: str, resource_id: int, root_dir: str = "content", sync: bool = True):
|
|
452
415
|
"""
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
416
|
+
Args:
|
|
417
|
+
resource_group (str)
|
|
418
|
+
resource_id (int)
|
|
419
|
+
root_dir (str)
|
|
420
|
+
sync (bool)
|
|
458
421
|
"""
|
|
459
422
|
|
|
460
423
|
self.resource_group = resource_group
|
|
@@ -462,11 +425,11 @@ class FileStorage:
|
|
|
462
425
|
self.root_dir = root_dir
|
|
463
426
|
self.sync = sync
|
|
464
427
|
|
|
465
|
-
self.folder_name = f
|
|
428
|
+
self.folder_name = f"{resource_group}_{ctx.company_id}_{resource_id}"
|
|
466
429
|
|
|
467
430
|
config = Config()
|
|
468
431
|
self.fs_store = FsStore()
|
|
469
|
-
self.content_path = Path(config[
|
|
432
|
+
self.content_path = Path(config["paths"][root_dir])
|
|
470
433
|
self.resource_group_path = self.content_path / resource_group
|
|
471
434
|
self.folder_path = self.resource_group_path / self.folder_name
|
|
472
435
|
if self.folder_path.exists() is False:
|
|
@@ -474,16 +437,12 @@ class FileStorage:
|
|
|
474
437
|
|
|
475
438
|
@profiler.profile()
|
|
476
439
|
def push(self, compression_level: int = 9):
|
|
477
|
-
with FileLock(self.folder_path, mode=
|
|
440
|
+
with FileLock(self.folder_path, mode="r"):
|
|
478
441
|
self._push_no_lock(compression_level=compression_level)
|
|
479
442
|
|
|
480
443
|
@profiler.profile()
|
|
481
444
|
def _push_no_lock(self, compression_level: int = 9):
|
|
482
|
-
self.fs_store.put(
|
|
483
|
-
str(self.folder_name),
|
|
484
|
-
str(self.resource_group_path),
|
|
485
|
-
compression_level=compression_level
|
|
486
|
-
)
|
|
445
|
+
self.fs_store.put(str(self.folder_name), str(self.resource_group_path), compression_level=compression_level)
|
|
487
446
|
|
|
488
447
|
@profiler.profile()
|
|
489
448
|
def push_path(self, path, compression_level: int = 9):
|
|
@@ -493,10 +452,7 @@ class FileStorage:
|
|
|
493
452
|
@profiler.profile()
|
|
494
453
|
def pull(self):
|
|
495
454
|
try:
|
|
496
|
-
self.fs_store.get(
|
|
497
|
-
str(self.folder_name),
|
|
498
|
-
str(self.resource_group_path)
|
|
499
|
-
)
|
|
455
|
+
self.fs_store.get(str(self.folder_name), str(self.resource_group_path))
|
|
500
456
|
except (FileNotFoundError, S3ClientError):
|
|
501
457
|
pass
|
|
502
458
|
|
|
@@ -510,11 +466,10 @@ class FileStorage:
|
|
|
510
466
|
if self.sync is True:
|
|
511
467
|
self.pull()
|
|
512
468
|
|
|
513
|
-
with FileLock(self.folder_path, mode=
|
|
514
|
-
|
|
469
|
+
with FileLock(self.folder_path, mode="w"):
|
|
515
470
|
dest_abs_path = self.folder_path / name
|
|
516
471
|
|
|
517
|
-
with open(dest_abs_path,
|
|
472
|
+
with open(dest_abs_path, "wb") as fd:
|
|
518
473
|
fd.write(content)
|
|
519
474
|
|
|
520
475
|
if self.sync is True:
|
|
@@ -525,8 +480,8 @@ class FileStorage:
|
|
|
525
480
|
if self.sync is True:
|
|
526
481
|
self.pull()
|
|
527
482
|
dest_abs_path = self.folder_path / name
|
|
528
|
-
with FileLock(self.folder_path, mode=
|
|
529
|
-
with open(dest_abs_path,
|
|
483
|
+
with FileLock(self.folder_path, mode="r"):
|
|
484
|
+
with open(dest_abs_path, "rb") as fd:
|
|
530
485
|
return fd.read()
|
|
531
486
|
|
|
532
487
|
@profiler.profile()
|
|
@@ -552,8 +507,7 @@ class FileStorage:
|
|
|
552
507
|
"""
|
|
553
508
|
if self.sync is True:
|
|
554
509
|
self.pull()
|
|
555
|
-
with FileLock(self.folder_path, mode=
|
|
556
|
-
|
|
510
|
+
with FileLock(self.folder_path, mode="w"):
|
|
557
511
|
path = Path(path)
|
|
558
512
|
if isinstance(dest_rel_path, str):
|
|
559
513
|
dest_rel_path = Path(dest_rel_path)
|
|
@@ -563,17 +517,14 @@ class FileStorage:
|
|
|
563
517
|
else:
|
|
564
518
|
dest_abs_path = self.folder_path / dest_rel_path
|
|
565
519
|
|
|
566
|
-
copy(
|
|
567
|
-
str(path),
|
|
568
|
-
str(dest_abs_path)
|
|
569
|
-
)
|
|
520
|
+
copy(str(path), str(dest_abs_path))
|
|
570
521
|
|
|
571
522
|
if self.sync is True:
|
|
572
523
|
self._push_no_lock()
|
|
573
524
|
|
|
574
525
|
@profiler.profile()
|
|
575
526
|
def get_path(self, relative_path: Union[str, Path]) -> Path:
|
|
576
|
-
"""
|
|
527
|
+
"""Return path to file or folder
|
|
577
528
|
|
|
578
529
|
Examples:
|
|
579
530
|
get path to 'opts.json':
|
|
@@ -589,13 +540,13 @@ class FileStorage:
|
|
|
589
540
|
if self.sync is True:
|
|
590
541
|
self.pull()
|
|
591
542
|
|
|
592
|
-
with FileLock(self.folder_path, mode=
|
|
543
|
+
with FileLock(self.folder_path, mode="r"):
|
|
593
544
|
if isinstance(relative_path, str):
|
|
594
545
|
relative_path = Path(relative_path)
|
|
595
546
|
# relative_path = relative_path.resolve()
|
|
596
547
|
|
|
597
548
|
if relative_path.is_absolute():
|
|
598
|
-
raise TypeError(
|
|
549
|
+
raise TypeError("FSStorage.get_path() got absolute path as argument")
|
|
599
550
|
|
|
600
551
|
ret_path = self.folder_path / relative_path
|
|
601
552
|
if not ret_path.exists():
|
|
@@ -604,17 +555,17 @@ class FileStorage:
|
|
|
604
555
|
|
|
605
556
|
return ret_path
|
|
606
557
|
|
|
607
|
-
def delete(self, relative_path: Union[str, Path] =
|
|
558
|
+
def delete(self, relative_path: Union[str, Path] = "."):
|
|
608
559
|
path = (self.folder_path / relative_path).resolve()
|
|
609
560
|
if isinstance(relative_path, str):
|
|
610
561
|
relative_path = Path(relative_path)
|
|
611
562
|
|
|
612
563
|
if relative_path.is_absolute():
|
|
613
|
-
raise TypeError(
|
|
564
|
+
raise TypeError("FSStorage.delete() got absolute path as argument")
|
|
614
565
|
|
|
615
566
|
# complete removal
|
|
616
567
|
if path == self.folder_path.resolve():
|
|
617
|
-
with FileLock(self.folder_path, mode=
|
|
568
|
+
with FileLock(self.folder_path, mode="w"):
|
|
618
569
|
self.fs_store.delete(self.folder_name)
|
|
619
570
|
# NOTE on some fs .rmtree is not working if any file is open
|
|
620
571
|
shutil.rmtree(str(self.folder_path))
|
|
@@ -624,18 +575,16 @@ class FileStorage:
|
|
|
624
575
|
try:
|
|
625
576
|
shutil.rmtree(lock_folder_path)
|
|
626
577
|
except FileNotFoundError:
|
|
627
|
-
logger.warning(
|
|
628
|
-
except Exception as e:
|
|
629
|
-
raise e
|
|
578
|
+
logger.warning("Tried to delete file not found: %s", lock_folder_path)
|
|
630
579
|
# endregion
|
|
631
580
|
return
|
|
632
581
|
|
|
633
582
|
if self.sync is True:
|
|
634
583
|
self.pull()
|
|
635
584
|
|
|
636
|
-
with FileLock(self.folder_path, mode=
|
|
585
|
+
with FileLock(self.folder_path, mode="w"):
|
|
637
586
|
if path.exists() is False:
|
|
638
|
-
raise Exception(
|
|
587
|
+
raise Exception("Path does not exists")
|
|
639
588
|
|
|
640
589
|
if path.is_file():
|
|
641
590
|
path.unlink()
|
|
@@ -647,16 +596,12 @@ class FileStorage:
|
|
|
647
596
|
|
|
648
597
|
|
|
649
598
|
class FileStorageFactory:
|
|
650
|
-
def __init__(self, resource_group: str,
|
|
651
|
-
root_dir: str = 'content', sync: bool = True):
|
|
599
|
+
def __init__(self, resource_group: str, root_dir: str = "content", sync: bool = True):
|
|
652
600
|
self.resource_group = resource_group
|
|
653
601
|
self.root_dir = root_dir
|
|
654
602
|
self.sync = sync
|
|
655
603
|
|
|
656
604
|
def __call__(self, resource_id: int):
|
|
657
605
|
return FileStorage(
|
|
658
|
-
resource_group=self.resource_group,
|
|
659
|
-
root_dir=self.root_dir,
|
|
660
|
-
sync=self.sync,
|
|
661
|
-
resource_id=resource_id
|
|
606
|
+
resource_group=self.resource_group, root_dir=self.root_dir, sync=self.sync, resource_id=resource_id
|
|
662
607
|
)
|