mercuto-client 0.3.0__py3-none-any.whl → 0.3.4a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mercuto_client/_tests/test_ingester/test_file_processor.py +171 -13
- mercuto_client/acl.py +20 -0
- mercuto_client/client.py +41 -8
- mercuto_client/ingester/__main__.py +11 -8
- mercuto_client/ingester/processor.py +57 -37
- mercuto_client/mocks/__init__.py +76 -2
- mercuto_client/mocks/_utility.py +6 -0
- mercuto_client/mocks/mock_core.py +44 -0
- mercuto_client/mocks/mock_media.py +117 -0
- mercuto_client/mocks/mock_notifications.py +65 -0
- mercuto_client/modules/__init__.py +8 -4
- mercuto_client/modules/core.py +117 -287
- mercuto_client/modules/data.py +39 -41
- mercuto_client/modules/fatigue.py +19 -19
- mercuto_client/modules/identity.py +31 -31
- mercuto_client/modules/media.py +324 -0
- mercuto_client/modules/notifications.py +88 -0
- mercuto_client/modules/reports.py +222 -0
- mercuto_client/util.py +1 -1
- {mercuto_client-0.3.0.dist-info → mercuto_client-0.3.4a3.dist-info}/METADATA +1 -1
- {mercuto_client-0.3.0.dist-info → mercuto_client-0.3.4a3.dist-info}/RECORD +24 -18
- {mercuto_client-0.3.0.dist-info → mercuto_client-0.3.4a3.dist-info}/WHEEL +0 -0
- {mercuto_client-0.3.0.dist-info → mercuto_client-0.3.4a3.dist-info}/licenses/LICENSE +0 -0
- {mercuto_client-0.3.0.dist-info → mercuto_client-0.3.4a3.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
import sqlite3
|
|
3
3
|
import tempfile
|
|
4
4
|
import time
|
|
5
|
-
from typing import Generator, Tuple
|
|
5
|
+
from typing import Generator, Iterator, Tuple
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
8
8
|
|
|
@@ -14,30 +14,40 @@ def mock_process_callback(filepath: str) -> bool:
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@pytest.fixture
|
|
17
|
-
def
|
|
17
|
+
def buffer_directory() -> Iterator[str]:
|
|
18
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
19
|
+
yield temp_dir
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def work_directory() -> Iterator[str]:
|
|
24
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
25
|
+
yield temp_dir
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.fixture
|
|
29
|
+
def database_path(work_directory: str) -> Iterator[str]:
|
|
30
|
+
yield os.path.join(work_directory, "test_buffer.db")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.fixture
|
|
34
|
+
def temp_env(buffer_directory: str, database_path: str) -> Generator[Tuple[FileProcessor, str, str], None, None]:
|
|
18
35
|
"""Setup temporary directory and database"""
|
|
19
|
-
buffer_dir: str = tempfile.mkdtemp()
|
|
20
|
-
workdir = tempfile.mkdtemp()
|
|
21
|
-
db_path: str = os.path.join(workdir, "test_buffer.db")
|
|
22
36
|
|
|
23
37
|
processor: FileProcessor = FileProcessor(
|
|
24
|
-
buffer_dir=
|
|
25
|
-
db_path=
|
|
38
|
+
buffer_dir=buffer_directory,
|
|
39
|
+
db_path=database_path,
|
|
26
40
|
max_files=3,
|
|
27
41
|
max_attempts=2,
|
|
28
42
|
process_callback=mock_process_callback
|
|
29
43
|
)
|
|
30
44
|
|
|
31
|
-
yield processor,
|
|
32
|
-
|
|
33
|
-
# Cleanup
|
|
34
|
-
if os.path.exists(db_path):
|
|
35
|
-
os.remove(db_path)
|
|
45
|
+
yield processor, buffer_directory, database_path
|
|
36
46
|
|
|
37
47
|
|
|
38
48
|
def test_init_db(temp_env: Tuple[FileProcessor, str, str]) -> None:
|
|
39
49
|
"""Verify database initialization"""
|
|
40
|
-
|
|
50
|
+
_, _, db_path = temp_env
|
|
41
51
|
conn: sqlite3.Connection = sqlite3.connect(db_path)
|
|
42
52
|
cursor: sqlite3.Cursor = conn.cursor()
|
|
43
53
|
cursor.execute(
|
|
@@ -208,3 +218,151 @@ def test_scan_existing_files_that_havnt_been_processed(temp_env: Tuple[FileProce
|
|
|
208
218
|
files: list[tuple[str]] = cursor.fetchall()
|
|
209
219
|
conn.close()
|
|
210
220
|
assert files == [('file0.txt',), ('file1.txt',), ('file2.txt',), ('file3.txt',), ('file4.txt',)]
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def test_ensure_free_space(database_path: str, buffer_directory: str) -> None:
|
|
224
|
+
"""Ensure that keeping X MB free space works correctly"""
|
|
225
|
+
remaining_free_space_mb = 501
|
|
226
|
+
|
|
227
|
+
def mock_free_space_checker(dir: str) -> float:
|
|
228
|
+
"""
|
|
229
|
+
Mock free space checker that returns a controlled value.
|
|
230
|
+
Returns remaining_free_space_mb - number of files in the buffer directory.
|
|
231
|
+
"""
|
|
232
|
+
n_files = len(os.listdir(buffer_directory))
|
|
233
|
+
return remaining_free_space_mb - n_files
|
|
234
|
+
|
|
235
|
+
processor = FileProcessor(
|
|
236
|
+
buffer_dir=buffer_directory,
|
|
237
|
+
db_path=database_path,
|
|
238
|
+
max_files=10,
|
|
239
|
+
max_attempts=2,
|
|
240
|
+
process_callback=mock_process_callback,
|
|
241
|
+
target_free_space_mb=500,
|
|
242
|
+
free_space_checker=mock_free_space_checker
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
def create_and_add_file(name: str) -> str:
|
|
246
|
+
file_path = os.path.join(buffer_directory, name)
|
|
247
|
+
with open(file_path, 'w') as f:
|
|
248
|
+
f.write("Test content")
|
|
249
|
+
processor.add_file_to_db(file_path)
|
|
250
|
+
return file_path
|
|
251
|
+
|
|
252
|
+
create_and_add_file('success_file_1.txt')
|
|
253
|
+
create_and_add_file('success_file_2.txt')
|
|
254
|
+
create_and_add_file('success_file_3.txt')
|
|
255
|
+
create_and_add_file('success_file_4.txt')
|
|
256
|
+
|
|
257
|
+
# There should be 4 files on disk
|
|
258
|
+
assert len(os.listdir(buffer_directory)) == 4
|
|
259
|
+
|
|
260
|
+
# Trigger cleanup, should not delete anything as we have enough free space
|
|
261
|
+
remaining_free_space_mb = 504
|
|
262
|
+
processor.cleanup_old_files()
|
|
263
|
+
assert len(os.listdir(buffer_directory)) == 4
|
|
264
|
+
|
|
265
|
+
# Reduce free space to trigger cleanup
|
|
266
|
+
remaining_free_space_mb = 502
|
|
267
|
+
processor.cleanup_old_files()
|
|
268
|
+
# This should have deleted the oldest two files to maintain 500 MB free space
|
|
269
|
+
# E.g. no deletes gives 498 MB free space, deleting one file gives 499 MB free space,
|
|
270
|
+
# deleting two files gives 500 MB free space
|
|
271
|
+
assert len(os.listdir(buffer_directory)) == 2
|
|
272
|
+
remaining_files = os.listdir(buffer_directory)
|
|
273
|
+
assert set(remaining_files) == {'success_file_3.txt', 'success_file_4.txt'}
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def test_ensure_free_space_with_externally_modified_files(database_path: str, buffer_directory: str) -> None:
|
|
277
|
+
"""
|
|
278
|
+
Ensure that keeping X MB free space works correctly under the following:
|
|
279
|
+
- Files may be deleted from disk externally (not via the processor)
|
|
280
|
+
"""
|
|
281
|
+
remaining_free_space_mb = 501
|
|
282
|
+
|
|
283
|
+
def mock_free_space_checker(dir: str) -> float:
|
|
284
|
+
"""
|
|
285
|
+
Mock free space checker that returns a controlled value.
|
|
286
|
+
Returns remaining_free_space_mb - number of files in the buffer directory.
|
|
287
|
+
"""
|
|
288
|
+
n_files = len(os.listdir(buffer_directory))
|
|
289
|
+
return remaining_free_space_mb - n_files
|
|
290
|
+
|
|
291
|
+
processor = FileProcessor(
|
|
292
|
+
buffer_dir=buffer_directory,
|
|
293
|
+
db_path=database_path,
|
|
294
|
+
max_files=10,
|
|
295
|
+
max_attempts=2,
|
|
296
|
+
process_callback=mock_process_callback,
|
|
297
|
+
target_free_space_mb=500,
|
|
298
|
+
free_space_checker=mock_free_space_checker
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
def create_and_add_file(name: str) -> str:
|
|
302
|
+
file_path = os.path.join(buffer_directory, name)
|
|
303
|
+
with open(file_path, 'w') as f:
|
|
304
|
+
f.write("Test content")
|
|
305
|
+
processor.add_file_to_db(file_path)
|
|
306
|
+
return file_path
|
|
307
|
+
|
|
308
|
+
file1 = create_and_add_file('success_file_1.txt')
|
|
309
|
+
file2 = create_and_add_file('success_file_2.txt')
|
|
310
|
+
create_and_add_file('success_file_3.txt')
|
|
311
|
+
create_and_add_file('success_file_4.txt')
|
|
312
|
+
|
|
313
|
+
# There should be 4 files on disk
|
|
314
|
+
assert len(os.listdir(buffer_directory)) == 4
|
|
315
|
+
|
|
316
|
+
# Manually delete files 1 and 2 to simulate external deletion
|
|
317
|
+
os.remove(file1)
|
|
318
|
+
os.remove(file2)
|
|
319
|
+
|
|
320
|
+
# Reduce free space to trigger cleanup
|
|
321
|
+
remaining_free_space_mb = 100
|
|
322
|
+
processor.cleanup_old_files()
|
|
323
|
+
# This should still delete all files (even though 1 and 2 are already gone) to maintain 500 MB free space
|
|
324
|
+
# The current free space of 100MB will never be reached, so all files should be deleted.
|
|
325
|
+
assert len(os.listdir(buffer_directory)) == 0
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def test_cleanup_max_files_with_externally_modified_files(database_path: str, buffer_directory: str) -> None:
|
|
329
|
+
"""
|
|
330
|
+
Ensure that keeping X max files works correctly under the following:
|
|
331
|
+
- Files may be deleted from disk externally (not via the processor)
|
|
332
|
+
"""
|
|
333
|
+
|
|
334
|
+
processor = FileProcessor(
|
|
335
|
+
buffer_dir=buffer_directory,
|
|
336
|
+
db_path=database_path,
|
|
337
|
+
max_files=1,
|
|
338
|
+
max_attempts=2,
|
|
339
|
+
process_callback=mock_process_callback,
|
|
340
|
+
target_free_space_mb=None,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
def create_and_add_file(name: str) -> str:
|
|
344
|
+
file_path = os.path.join(buffer_directory, name)
|
|
345
|
+
with open(file_path, 'w') as f:
|
|
346
|
+
f.write("Test content")
|
|
347
|
+
processor.add_file_to_db(file_path)
|
|
348
|
+
return file_path
|
|
349
|
+
|
|
350
|
+
file1 = create_and_add_file('success_file_1.txt')
|
|
351
|
+
file2 = create_and_add_file('success_file_2.txt')
|
|
352
|
+
create_and_add_file('success_file_3.txt')
|
|
353
|
+
create_and_add_file('success_file_4.txt')
|
|
354
|
+
|
|
355
|
+
# There should be 4 files on disk
|
|
356
|
+
assert len(os.listdir(buffer_directory)) == 4
|
|
357
|
+
|
|
358
|
+
# Manually delete files 1 and 2 to simulate external deletion
|
|
359
|
+
os.remove(file1)
|
|
360
|
+
os.remove(file2)
|
|
361
|
+
|
|
362
|
+
# Trigger cleanup
|
|
363
|
+
processor.cleanup_old_files()
|
|
364
|
+
# THis should have deleted all but one file to maintain max_files=1
|
|
365
|
+
assert len(os.listdir(buffer_directory)) == 1
|
|
366
|
+
# It should be the last file added
|
|
367
|
+
remaining_files = os.listdir(buffer_directory)
|
|
368
|
+
assert set(remaining_files) == {'success_file_4.txt'}
|
mercuto_client/acl.py
CHANGED
|
@@ -20,6 +20,13 @@ class ResourceTypes:
|
|
|
20
20
|
TENANT = 'tenant'
|
|
21
21
|
WILDCARD = '*'
|
|
22
22
|
|
|
23
|
+
class Notifications:
|
|
24
|
+
"""
|
|
25
|
+
Resource types available for ServiceTypes.NOTIFICATIONS
|
|
26
|
+
"""
|
|
27
|
+
CONTACT_GROUP = 'contact-group'
|
|
28
|
+
WILDCARD = '*'
|
|
29
|
+
|
|
23
30
|
|
|
24
31
|
class AllowedActions:
|
|
25
32
|
WILDCARD = '*'
|
|
@@ -46,6 +53,13 @@ class AllowedActions:
|
|
|
46
53
|
VIEW_USER_DETAILED_INFO = 'IDENTITY:VIEW_USER_DETAILED_INFO'
|
|
47
54
|
CREATE_NEW_TENANTS = 'IDENTITY:CREATE_NEW_TENANTS'
|
|
48
55
|
|
|
56
|
+
class Notifications:
|
|
57
|
+
"""
|
|
58
|
+
Actions available for ServiceTypes.NOTIFICATIONS
|
|
59
|
+
"""
|
|
60
|
+
WILDCARD = '*'
|
|
61
|
+
ISSUE_NOTIFICATION = 'NOTIFICATIONS:ISSUE_NOTIFICATION'
|
|
62
|
+
|
|
49
63
|
|
|
50
64
|
class ServiceTypes:
|
|
51
65
|
class Mercuto:
|
|
@@ -58,8 +72,14 @@ class ServiceTypes:
|
|
|
58
72
|
ResourceTypes = ResourceTypes.Identity
|
|
59
73
|
AllowedActions = AllowedActions.Identity
|
|
60
74
|
|
|
75
|
+
class Notifications:
|
|
76
|
+
Name = 'notifications'
|
|
77
|
+
ResourceTypes = ResourceTypes.Notifications
|
|
78
|
+
AllowedActions = AllowedActions.Notifications
|
|
79
|
+
|
|
61
80
|
IDENTITY = Identity.Name
|
|
62
81
|
MERCUTO = Mercuto.Name
|
|
82
|
+
NOTIFICATIONS = Notifications.Name
|
|
63
83
|
WILDCARD = '*'
|
|
64
84
|
|
|
65
85
|
|
mercuto_client/client.py
CHANGED
|
@@ -3,7 +3,8 @@ import json as json_stdlib
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
import time
|
|
6
|
-
from typing import Any, Iterator, Mapping, Optional, Protocol, Type,
|
|
6
|
+
from typing import (Any, Iterator, Literal, Mapping, Optional, Protocol, Type,
|
|
7
|
+
TypeVar)
|
|
7
8
|
|
|
8
9
|
import requests
|
|
9
10
|
import requests.cookies
|
|
@@ -15,6 +16,9 @@ from .modules.core import MercutoCoreService
|
|
|
15
16
|
from .modules.data import MercutoDataService
|
|
16
17
|
from .modules.fatigue import MercutoFatigueService
|
|
17
18
|
from .modules.identity import MercutoIdentityService
|
|
19
|
+
from .modules.media import MercutoMediaService
|
|
20
|
+
from .modules.notifications import MercutoNotificationService
|
|
21
|
+
from .modules.reports import MercutoReportService
|
|
18
22
|
|
|
19
23
|
logger = logging.getLogger(__name__)
|
|
20
24
|
|
|
@@ -36,11 +40,11 @@ class MercutoClient:
|
|
|
36
40
|
if url.endswith('/'):
|
|
37
41
|
url = url[:-1]
|
|
38
42
|
|
|
39
|
-
if not url.startswith('https://'):
|
|
43
|
+
if verify_ssl and not url.startswith('https://'):
|
|
40
44
|
raise ValueError(f'Url must be https, is {url}')
|
|
41
45
|
|
|
42
46
|
self._url = url
|
|
43
|
-
self.
|
|
47
|
+
self.verify_ssl = verify_ssl
|
|
44
48
|
|
|
45
49
|
if active_session is None:
|
|
46
50
|
self._current_session = requests.Session()
|
|
@@ -63,11 +67,11 @@ class MercutoClient:
|
|
|
63
67
|
raise MercutoClientException("No credentials set")
|
|
64
68
|
return self._auth_method.unique_key()
|
|
65
69
|
|
|
66
|
-
def
|
|
67
|
-
self.
|
|
70
|
+
def setverify_ssl(self, verify_ssl: bool) -> None:
|
|
71
|
+
self.verify_ssl = verify_ssl
|
|
68
72
|
|
|
69
73
|
def copy(self) -> 'MercutoClient':
|
|
70
|
-
return MercutoClient(self._url, self.
|
|
74
|
+
return MercutoClient(self._url, self.verify_ssl, self._current_session)
|
|
71
75
|
|
|
72
76
|
@contextlib.contextmanager
|
|
73
77
|
def as_credentials(self, api_key: Optional[str] = None,
|
|
@@ -78,7 +82,7 @@ class MercutoClient:
|
|
|
78
82
|
Same as .connect(), but as a context manager. Will automatically logout when exiting the context.
|
|
79
83
|
"""
|
|
80
84
|
# TODO: We are passing the current session along to re-use connections for speed. Will this cause security issues?
|
|
81
|
-
other = MercutoClient(self._url, self.
|
|
85
|
+
other = MercutoClient(self._url, self.verify_ssl, self._current_session)
|
|
82
86
|
try:
|
|
83
87
|
yield other.connect(api_key=api_key, service_token=service_token, bearer_token=bearer_token, headers=headers)
|
|
84
88
|
finally:
|
|
@@ -108,6 +112,26 @@ class MercutoClient:
|
|
|
108
112
|
base.update(headers)
|
|
109
113
|
return base
|
|
110
114
|
|
|
115
|
+
def session(self) -> requests.Session:
|
|
116
|
+
return self._current_session
|
|
117
|
+
|
|
118
|
+
def request(self, url: str, method: Literal['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'],
|
|
119
|
+
params: Optional[dict[str, Any]] = None,
|
|
120
|
+
json: Optional[dict[str, Any]] = None,
|
|
121
|
+
raise_for_status: bool = True,
|
|
122
|
+
**kwargs: Any) -> requests.Response:
|
|
123
|
+
"""
|
|
124
|
+
Make an HTTP request to the Mercuto API.
|
|
125
|
+
:param url: The URL path (relative to the base API URL) to make the request to.
|
|
126
|
+
:param method: The HTTP method to use (e.g., 'GET', 'POST', etc.).
|
|
127
|
+
:param params: Optional dictionary of query parameters to include in the request.
|
|
128
|
+
:param json: Optional dictionary to send as a JSON payload in the request body.
|
|
129
|
+
:param raise_for_status: Whether to raise an exception for HTTP error responses.
|
|
130
|
+
:param kwargs: Additional keyword arguments to pass to the requests method.
|
|
131
|
+
:return: The HTTP response object.
|
|
132
|
+
"""
|
|
133
|
+
return self._http_request(url, method, params=params, json=json, raise_for_status=raise_for_status, **kwargs)
|
|
134
|
+
|
|
111
135
|
def _http_request(self, url: str, method: str,
|
|
112
136
|
params: Optional[dict[str, Any]] = None,
|
|
113
137
|
json: Optional[dict[str, Any]] = None,
|
|
@@ -122,7 +146,7 @@ class MercutoClient:
|
|
|
122
146
|
kwargs['headers'] = self._update_headers(kwargs.get('headers', {}))
|
|
123
147
|
|
|
124
148
|
if 'verify' not in kwargs:
|
|
125
|
-
kwargs['verify'] = self.
|
|
149
|
+
kwargs['verify'] = self.verify_ssl
|
|
126
150
|
|
|
127
151
|
if 'cookies' not in kwargs:
|
|
128
152
|
kwargs['cookies'] = self._cookies
|
|
@@ -167,6 +191,15 @@ class MercutoClient:
|
|
|
167
191
|
def core(self) -> 'MercutoCoreService':
|
|
168
192
|
return self._add_and_fetch_module('core', MercutoCoreService)
|
|
169
193
|
|
|
194
|
+
def media(self) -> 'MercutoMediaService':
|
|
195
|
+
return self._add_and_fetch_module('media', MercutoMediaService)
|
|
196
|
+
|
|
197
|
+
def reports(self) -> 'MercutoReportService':
|
|
198
|
+
return self._add_and_fetch_module('reports', MercutoReportService)
|
|
199
|
+
|
|
200
|
+
def notifications(self) -> 'MercutoNotificationService':
|
|
201
|
+
return self._add_and_fetch_module('notifications', MercutoNotificationService)
|
|
202
|
+
|
|
170
203
|
def login(self, authentication: IAuthenticationMethod) -> None:
|
|
171
204
|
self._auth_method = authentication
|
|
172
205
|
|
|
@@ -40,9 +40,11 @@ if __name__ == '__main__':
|
|
|
40
40
|
help='Enable verbose output')
|
|
41
41
|
parser.add_argument('-d', '--directory', type=str,
|
|
42
42
|
help='Directory to store ingested files. Default is a directory called `buffered-files` in the workdir.')
|
|
43
|
-
parser.add_argument('-s', '--
|
|
44
|
-
help='Size in MB for total amount of
|
|
45
|
-
Default is
|
|
43
|
+
parser.add_argument('-s', '--target-free-space-mb', type=int,
|
|
44
|
+
help='Size in MB for total amount of remaining free space to keep available. \
|
|
45
|
+
Default is 25% of the available disk space on the buffer partition excluding the directory itself', default=None)
|
|
46
|
+
parser.add_argument('--max-files', type=int,
|
|
47
|
+
help='Maximum number of files to keep in the buffer. Default is to use the size param.', default=None)
|
|
46
48
|
parser.add_argument('--max-attempts', type=int,
|
|
47
49
|
help='Maximum number of attempts to process a file before giving up. Default is 1000.',
|
|
48
50
|
default=1000)
|
|
@@ -113,10 +115,10 @@ if __name__ == '__main__':
|
|
|
113
115
|
ftp_dir = os.path.join(workdir, 'temp-ftp-data')
|
|
114
116
|
os.makedirs(ftp_dir, exist_ok=True)
|
|
115
117
|
|
|
116
|
-
|
|
117
|
-
if
|
|
118
|
-
|
|
119
|
-
logging.info(f"
|
|
118
|
+
target_free_space_mb = args.target_free_space_mb
|
|
119
|
+
if target_free_space_mb is None and args.max_files is None:
|
|
120
|
+
target_free_space_mb = get_free_space_excluding_files(buffer_directory) * 0.25 // (1024 * 1024) # Convert to MB
|
|
121
|
+
logging.info(f"Target remaining free space set to {target_free_space_mb} MB based on available disk space.")
|
|
120
122
|
|
|
121
123
|
if args.mapping is not None:
|
|
122
124
|
import json
|
|
@@ -146,7 +148,8 @@ if __name__ == '__main__':
|
|
|
146
148
|
db_path=database_path,
|
|
147
149
|
process_callback=ingester.process_file,
|
|
148
150
|
max_attempts=args.max_attempts,
|
|
149
|
-
|
|
151
|
+
target_free_space_mb=target_free_space_mb,
|
|
152
|
+
max_files=args.max_files)
|
|
150
153
|
|
|
151
154
|
processor.scan_existing_files()
|
|
152
155
|
|
|
@@ -8,6 +8,22 @@ from typing import Callable, Optional
|
|
|
8
8
|
logger = logging.getLogger(__name__)
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
def _default_standard_clock(_: str):
|
|
12
|
+
# Default clock function to get current time in seconds since epoch
|
|
13
|
+
return datetime.now(timezone.utc).timestamp()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _default_file_clock(filepath: str) -> float:
|
|
17
|
+
# Default clock function to get file creation time in seconds since epoch
|
|
18
|
+
return os.path.getctime(filepath)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _default_free_space_checker(buffer_dir: str) -> float:
|
|
22
|
+
"""Returns the free space in MB on the partition where the buffer directory is located."""
|
|
23
|
+
_, _, free = shutil.disk_usage(buffer_dir)
|
|
24
|
+
return free / (1024 * 1024)
|
|
25
|
+
|
|
26
|
+
|
|
11
27
|
class FileProcessor:
|
|
12
28
|
"""
|
|
13
29
|
System for processing files in a strict order with retry logic.
|
|
@@ -20,11 +36,14 @@ class FileProcessor:
|
|
|
20
36
|
:param process_callback: Callable that processes a file. Should return True if processing is successful
|
|
21
37
|
:param max_attempts: Maximum number of attempts to process a file before marking it as failed.
|
|
22
38
|
:param max_files: Maximum number of files to keep in the buffer directory. If None, no limit is enforced.
|
|
23
|
-
:param
|
|
39
|
+
:param target_free_space_mb: Optional minimum free space in MB to keep on the partition where the buffer directory is located.
|
|
24
40
|
This is combined with max_files to determine when to delete old files and takes precedence over max_files.
|
|
25
41
|
:param clock: Optional callable that returns the timestamp for the file based on the filename. Takes in the file name as an argument
|
|
26
42
|
and should return a float representing the timestamp in seconds since the epoch. Defaults to datetime.now().timestamp().
|
|
27
43
|
This clock is NOT used when scanning existing files, it takes its own clock function.
|
|
44
|
+
:param free_space_checker: Optional callable that returns the free space in MB on the partition where the buffer directory is located.
|
|
45
|
+
Takes in the buffer directory path as an argument and should return a float representing the free space in MB.
|
|
46
|
+
Defaults to checking the actual free space on the disk.
|
|
28
47
|
|
|
29
48
|
|
|
30
49
|
Provides a callback for processing files, which should return True if successful.
|
|
@@ -50,20 +69,18 @@ class FileProcessor:
|
|
|
50
69
|
process_callback: Callable[[str], bool],
|
|
51
70
|
max_attempts: int,
|
|
52
71
|
max_files: Optional[int] = None,
|
|
53
|
-
|
|
54
|
-
clock: Optional[Callable[[str], float]] = None
|
|
72
|
+
target_free_space_mb: Optional[float] = None,
|
|
73
|
+
clock: Optional[Callable[[str], float]] = None,
|
|
74
|
+
free_space_checker: Optional[Callable[[str], float]] = None
|
|
55
75
|
) -> None:
|
|
56
76
|
self._buffer_dir = buffer_dir
|
|
57
77
|
self._db_path = db_path
|
|
58
78
|
self._max_files = max_files
|
|
59
79
|
self._max_attempts = max_attempts
|
|
60
80
|
self._process_callback = process_callback
|
|
61
|
-
self.
|
|
62
|
-
if clock is None
|
|
63
|
-
|
|
64
|
-
def clock(_): return datetime.now(timezone.utc).timestamp()
|
|
65
|
-
self._clock = clock
|
|
66
|
-
|
|
81
|
+
self._target_free_space_mb = target_free_space_mb
|
|
82
|
+
self._clock = clock if clock is not None else _default_standard_clock
|
|
83
|
+
self._free_space_checker = free_space_checker if free_space_checker is not None else _default_free_space_checker
|
|
67
84
|
os.makedirs(self._buffer_dir, exist_ok=True)
|
|
68
85
|
self._init_db()
|
|
69
86
|
|
|
@@ -102,9 +119,7 @@ class FileProcessor:
|
|
|
102
119
|
|
|
103
120
|
"""
|
|
104
121
|
if clock is None:
|
|
105
|
-
|
|
106
|
-
def clock(filepath: str) -> float:
|
|
107
|
-
return os.path.getctime(filepath)
|
|
122
|
+
clock = _default_file_clock
|
|
108
123
|
|
|
109
124
|
conn = sqlite3.connect(self._db_path)
|
|
110
125
|
cursor = conn.cursor()
|
|
@@ -216,38 +231,42 @@ class FileProcessor:
|
|
|
216
231
|
conn.close()
|
|
217
232
|
|
|
218
233
|
for (filepath,) in reversed(files_to_delete):
|
|
219
|
-
|
|
234
|
+
conn = sqlite3.connect(self._db_path)
|
|
235
|
+
cursor = conn.cursor()
|
|
236
|
+
cursor.execute(
|
|
237
|
+
"DELETE FROM file_buffer WHERE filepath = ?", (filepath,))
|
|
238
|
+
conn.commit()
|
|
239
|
+
conn.close()
|
|
240
|
+
try:
|
|
220
241
|
os.remove(filepath)
|
|
221
|
-
conn = sqlite3.connect(self._db_path)
|
|
222
|
-
cursor = conn.cursor()
|
|
223
|
-
cursor.execute(
|
|
224
|
-
"DELETE FROM file_buffer WHERE filepath = ?", (filepath,))
|
|
225
|
-
conn.commit()
|
|
226
|
-
conn.close()
|
|
227
242
|
logger.info(f"Deleted old file {filepath}")
|
|
243
|
+
except FileNotFoundError:
|
|
244
|
+
logger.warning(f"File {filepath} does not exist for deletion.")
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.error(f"Error deleting file {filepath}: {e}")
|
|
228
247
|
|
|
229
248
|
def cleanup_old_files(self) -> None:
|
|
230
249
|
"""Remove old files based on max_files and free space."""
|
|
231
250
|
if self._max_files is not None:
|
|
232
251
|
self.cleanup_old_files_with_max_files()
|
|
233
252
|
|
|
234
|
-
if self.
|
|
253
|
+
if self._target_free_space_mb is not None:
|
|
235
254
|
self.cleanup_old_files_with_free_space()
|
|
236
255
|
|
|
237
256
|
def cleanup_old_files_with_free_space(self) -> None:
|
|
238
257
|
"""Remove old files ensuring free space is maintained."""
|
|
239
|
-
if self.
|
|
258
|
+
if self._target_free_space_mb is None:
|
|
240
259
|
return
|
|
241
260
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
total, used, free = shutil.disk_usage(self._buffer_dir)
|
|
245
|
-
return free / (1024 * 1024)
|
|
246
|
-
|
|
247
|
-
while free_space_mb() < self._free_space_mb:
|
|
261
|
+
count = 0
|
|
262
|
+
while self._free_space_checker(self._buffer_dir) < self._target_free_space_mb:
|
|
248
263
|
if not self._delete_oldest_file():
|
|
249
264
|
logger.warning("No more files to delete to free up space.")
|
|
250
265
|
break
|
|
266
|
+
count += 1
|
|
267
|
+
if count > 1000:
|
|
268
|
+
logger.error("Too many iterations trying to free space, aborting.")
|
|
269
|
+
break
|
|
251
270
|
|
|
252
271
|
def _delete_oldest_file(self) -> bool:
|
|
253
272
|
"""Deletes the oldest file in the buffer directory."""
|
|
@@ -258,25 +277,26 @@ class FileProcessor:
|
|
|
258
277
|
oldest_file: Optional[tuple[str]] = cursor.fetchone()
|
|
259
278
|
if not oldest_file:
|
|
260
279
|
logger.info("No files to delete.")
|
|
280
|
+
conn.close()
|
|
261
281
|
return False
|
|
262
282
|
|
|
263
283
|
filepath = oldest_file[0]
|
|
264
|
-
|
|
265
|
-
|
|
284
|
+
cursor.execute(
|
|
285
|
+
"DELETE FROM file_buffer WHERE filepath = ?", (filepath,))
|
|
286
|
+
conn.commit()
|
|
287
|
+
conn.close()
|
|
288
|
+
try:
|
|
266
289
|
os.remove(filepath)
|
|
267
|
-
cursor.execute(
|
|
268
|
-
"DELETE FROM file_buffer WHERE filepath = ?", (filepath,))
|
|
269
|
-
conn.commit()
|
|
270
|
-
removed = True
|
|
271
290
|
logger.info(f"Deleted oldest file {filepath}")
|
|
272
|
-
|
|
291
|
+
except FileNotFoundError:
|
|
273
292
|
logger.warning(f"Oldest file {filepath} does not exist.")
|
|
293
|
+
except Exception as e:
|
|
294
|
+
logger.error(f"Error deleting oldest file {filepath}: {e}")
|
|
274
295
|
|
|
275
|
-
|
|
276
|
-
return removed
|
|
296
|
+
return True
|
|
277
297
|
|
|
278
298
|
def add_file_to_db(self, filepath: str) -> None:
|
|
279
|
-
"""Adds a new file to database
|
|
299
|
+
"""Adds a new file to database to be processed on the next call to process_next_file."""
|
|
280
300
|
timestamp = self._clock(filepath)
|
|
281
301
|
filename: str = os.path.basename(filepath)
|
|
282
302
|
conn = sqlite3.connect(self._db_path)
|
mercuto_client/mocks/__init__.py
CHANGED
|
@@ -10,6 +10,9 @@ from ..client import MercutoClient
|
|
|
10
10
|
def mock_mercuto(data: bool = True,
|
|
11
11
|
identity: bool = True,
|
|
12
12
|
fatigue: bool = True,
|
|
13
|
+
core: bool = True,
|
|
14
|
+
media: bool = True,
|
|
15
|
+
notifications: bool = True,
|
|
13
16
|
verify_service_token: Optional[Callable[[str], VerifyMyPermissions]] = None) -> Iterator[None]:
|
|
14
17
|
"""
|
|
15
18
|
While this context is active, all calls to MercutoClient will use mocked services.
|
|
@@ -23,12 +26,40 @@ def mock_mercuto(data: bool = True,
|
|
|
23
26
|
if data:
|
|
24
27
|
stack.enter_context(mock_data_module())
|
|
25
28
|
if identity:
|
|
26
|
-
stack.enter_context(mock_identity_module(
|
|
29
|
+
stack.enter_context(mock_identity_module(
|
|
30
|
+
verify_service_token=verify_service_token))
|
|
27
31
|
if fatigue:
|
|
28
32
|
stack.enter_context(mock_fatigue_module())
|
|
33
|
+
if core:
|
|
34
|
+
stack.enter_context(mock_core_module())
|
|
35
|
+
if media:
|
|
36
|
+
stack.enter_context(mock_media_module())
|
|
37
|
+
if notifications:
|
|
38
|
+
stack.enter_context(mock_notifications_module())
|
|
29
39
|
yield
|
|
30
40
|
|
|
31
41
|
|
|
42
|
+
@contextlib.contextmanager
|
|
43
|
+
def mock_core_module() -> Iterator[None]:
|
|
44
|
+
from .mock_core import MockMercutoCoreService
|
|
45
|
+
original = MercutoClient.core
|
|
46
|
+
|
|
47
|
+
_cache: Optional[MockMercutoCoreService] = None
|
|
48
|
+
|
|
49
|
+
def stub(self: MercutoClient) -> MockMercutoCoreService:
|
|
50
|
+
nonlocal _cache
|
|
51
|
+
if _cache is None:
|
|
52
|
+
_cache = MockMercutoCoreService(self)
|
|
53
|
+
_cache._client = self
|
|
54
|
+
return _cache
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
setattr(MercutoClient, 'core', stub)
|
|
58
|
+
yield
|
|
59
|
+
finally:
|
|
60
|
+
setattr(MercutoClient, 'core', original)
|
|
61
|
+
|
|
62
|
+
|
|
32
63
|
@contextlib.contextmanager
|
|
33
64
|
def mock_data_module() -> Iterator[None]:
|
|
34
65
|
from .mock_data import MockMercutoDataService
|
|
@@ -60,7 +91,8 @@ def mock_identity_module(verify_service_token: Optional[Callable[[str], VerifyMy
|
|
|
60
91
|
def stub(self: MercutoClient) -> MockMercutoIdentityService:
|
|
61
92
|
nonlocal _cache
|
|
62
93
|
if _cache is None:
|
|
63
|
-
_cache = MockMercutoIdentityService(
|
|
94
|
+
_cache = MockMercutoIdentityService(
|
|
95
|
+
self, verify_service_token=verify_service_token)
|
|
64
96
|
_cache._client = self
|
|
65
97
|
return _cache
|
|
66
98
|
|
|
@@ -90,3 +122,45 @@ def mock_fatigue_module() -> Iterator[None]:
|
|
|
90
122
|
yield
|
|
91
123
|
finally:
|
|
92
124
|
setattr(MercutoClient, 'fatigue', original)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@contextlib.contextmanager
|
|
128
|
+
def mock_media_module() -> Iterator[None]:
|
|
129
|
+
from .mock_media import MockMercutoMediaService
|
|
130
|
+
original = MercutoClient.media
|
|
131
|
+
|
|
132
|
+
_cache: Optional[MockMercutoMediaService] = None
|
|
133
|
+
|
|
134
|
+
def stub(self: MercutoClient) -> MockMercutoMediaService:
|
|
135
|
+
nonlocal _cache
|
|
136
|
+
if _cache is None:
|
|
137
|
+
_cache = MockMercutoMediaService(self)
|
|
138
|
+
_cache._client = self
|
|
139
|
+
return _cache
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
setattr(MercutoClient, 'media', stub)
|
|
143
|
+
yield
|
|
144
|
+
finally:
|
|
145
|
+
setattr(MercutoClient, 'media', original)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@contextlib.contextmanager
|
|
149
|
+
def mock_notifications_module() -> Iterator[None]:
|
|
150
|
+
from .mock_notifications import MockMercutoNotificationService
|
|
151
|
+
original = MercutoClient.notifications
|
|
152
|
+
|
|
153
|
+
_cache: Optional[MockMercutoNotificationService] = None
|
|
154
|
+
|
|
155
|
+
def stub(self: MercutoClient) -> MockMercutoNotificationService:
|
|
156
|
+
nonlocal _cache
|
|
157
|
+
if _cache is None:
|
|
158
|
+
_cache = MockMercutoNotificationService(self)
|
|
159
|
+
_cache._client = self
|
|
160
|
+
return _cache
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
setattr(MercutoClient, 'notifications', stub)
|
|
164
|
+
yield
|
|
165
|
+
finally:
|
|
166
|
+
setattr(MercutoClient, 'notifications', original)
|