gooddata-pipelines 1.50.0__py3-none-any.whl → 1.50.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gooddata-pipelines might be problematic. Click here for more details.
- gooddata_pipelines/backup_and_restore/backup_manager.py +36 -62
- gooddata_pipelines/backup_and_restore/constants.py +3 -7
- gooddata_pipelines/backup_and_restore/models/storage.py +4 -5
- gooddata_pipelines/utils/__init__.py +9 -0
- gooddata_pipelines/utils/rate_limiter.py +64 -0
- {gooddata_pipelines-1.50.0.dist-info → gooddata_pipelines-1.50.1.dev1.dist-info}/METADATA +2 -2
- {gooddata_pipelines-1.50.0.dist-info → gooddata_pipelines-1.50.1.dev1.dist-info}/RECORD +9 -7
- {gooddata_pipelines-1.50.0.dist-info → gooddata_pipelines-1.50.1.dev1.dist-info}/WHEEL +0 -0
- {gooddata_pipelines-1.50.0.dist-info → gooddata_pipelines-1.50.1.dev1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -4,10 +4,8 @@ import json
|
|
|
4
4
|
import os
|
|
5
5
|
import shutil
|
|
6
6
|
import tempfile
|
|
7
|
-
import threading
|
|
8
7
|
import time
|
|
9
8
|
import traceback
|
|
10
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
11
9
|
from dataclasses import dataclass
|
|
12
10
|
from pathlib import Path
|
|
13
11
|
from typing import Any, Type
|
|
@@ -39,6 +37,7 @@ from gooddata_pipelines.backup_and_restore.storage.s3_storage import (
|
|
|
39
37
|
S3Storage,
|
|
40
38
|
)
|
|
41
39
|
from gooddata_pipelines.logger import LogObserver
|
|
40
|
+
from gooddata_pipelines.utils.rate_limiter import RateLimiter
|
|
42
41
|
|
|
43
42
|
|
|
44
43
|
@dataclass
|
|
@@ -60,6 +59,10 @@ class BackupManager:
|
|
|
60
59
|
|
|
61
60
|
self.loader = BackupInputProcessor(self._api, self.config.api_page_size)
|
|
62
61
|
|
|
62
|
+
self._api_rate_limiter = RateLimiter(
|
|
63
|
+
calls_per_second=self.config.api_calls_per_second,
|
|
64
|
+
)
|
|
65
|
+
|
|
63
66
|
@classmethod
|
|
64
67
|
def create(
|
|
65
68
|
cls: Type["BackupManager"],
|
|
@@ -95,11 +98,12 @@ class BackupManager:
|
|
|
95
98
|
|
|
96
99
|
def get_user_data_filters(self, ws_id: str) -> dict:
|
|
97
100
|
"""Returns the user data filters for the specified workspace."""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
101
|
+
with self._api_rate_limiter:
|
|
102
|
+
response: requests.Response = self._api.get_user_data_filters(ws_id)
|
|
103
|
+
if response.ok:
|
|
104
|
+
return response.json()
|
|
105
|
+
else:
|
|
106
|
+
raise RuntimeError(f"{response.status_code}: {response.text}")
|
|
103
107
|
|
|
104
108
|
def _store_user_data_filters(
|
|
105
109
|
self,
|
|
@@ -144,14 +148,17 @@ class BackupManager:
|
|
|
144
148
|
|
|
145
149
|
def _get_automations_from_api(self, workspace_id: str) -> Any:
|
|
146
150
|
"""Returns automations for the workspace as JSON."""
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
else:
|
|
151
|
-
raise RuntimeError(
|
|
152
|
-
f"Failed to get automations for {workspace_id}. "
|
|
153
|
-
+ f"{response.status_code}: {response.text}"
|
|
151
|
+
with self._api_rate_limiter:
|
|
152
|
+
response: requests.Response = self._api.get_automations(
|
|
153
|
+
workspace_id
|
|
154
154
|
)
|
|
155
|
+
if response.ok:
|
|
156
|
+
return response.json()
|
|
157
|
+
else:
|
|
158
|
+
raise RuntimeError(
|
|
159
|
+
f"Failed to get automations for {workspace_id}. "
|
|
160
|
+
+ f"{response.status_code}: {response.text}"
|
|
161
|
+
)
|
|
155
162
|
|
|
156
163
|
def _store_automations(self, export_path: Path, workspace_id: str) -> None:
|
|
157
164
|
"""Stores the automations in the specified export path."""
|
|
@@ -183,7 +190,8 @@ class BackupManager:
|
|
|
183
190
|
) -> None:
|
|
184
191
|
"""Stores the filter views in the specified export path."""
|
|
185
192
|
# Get the filter views YAML files from the API
|
|
186
|
-
self.
|
|
193
|
+
with self._api_rate_limiter:
|
|
194
|
+
self._api.store_declarative_filter_views(workspace_id, export_path)
|
|
187
195
|
|
|
188
196
|
# Move filter views to the subfolder containing the analytics model
|
|
189
197
|
self._move_folder(
|
|
@@ -231,7 +239,10 @@ class BackupManager:
|
|
|
231
239
|
# the SDK. That way we could save and package all the declarations
|
|
232
240
|
# directly instead of reorganizing the folder structures. That should
|
|
233
241
|
# be more transparent/readable and possibly safer for threading
|
|
234
|
-
self.
|
|
242
|
+
with self._api_rate_limiter:
|
|
243
|
+
self._api.store_declarative_workspace(
|
|
244
|
+
workspace_id, export_path
|
|
245
|
+
)
|
|
235
246
|
self.store_declarative_filter_views(export_path, workspace_id)
|
|
236
247
|
self._store_automations(export_path, workspace_id)
|
|
237
248
|
|
|
@@ -291,7 +302,6 @@ class BackupManager:
|
|
|
291
302
|
def _process_batch(
|
|
292
303
|
self,
|
|
293
304
|
batch: BackupBatch,
|
|
294
|
-
stop_event: threading.Event,
|
|
295
305
|
retry_count: int = 0,
|
|
296
306
|
) -> None:
|
|
297
307
|
"""Processes a single batch of workspaces for backup.
|
|
@@ -299,10 +309,6 @@ class BackupManager:
|
|
|
299
309
|
and retry with exponential backoff up to BackupSettings.MAX_RETRIES.
|
|
300
310
|
The base wait time is defined by BackupSettings.RETRY_DELAY.
|
|
301
311
|
"""
|
|
302
|
-
if stop_event.is_set():
|
|
303
|
-
# If the stop_event flag is set, return. This will terminate the thread
|
|
304
|
-
return
|
|
305
|
-
|
|
306
312
|
try:
|
|
307
313
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
308
314
|
self._get_workspace_export(tmpdir, batch.list_of_ids)
|
|
@@ -314,10 +320,7 @@ class BackupManager:
|
|
|
314
320
|
self.storage.export(tmpdir, self.org_id)
|
|
315
321
|
|
|
316
322
|
except Exception as e:
|
|
317
|
-
if
|
|
318
|
-
return
|
|
319
|
-
|
|
320
|
-
elif retry_count < BackupSettings.MAX_RETRIES:
|
|
323
|
+
if retry_count < BackupSettings.MAX_RETRIES:
|
|
321
324
|
# Retry with exponential backoff until MAX_RETRIES
|
|
322
325
|
next_retry = retry_count + 1
|
|
323
326
|
wait_time = BackupSettings.RETRY_DELAY**next_retry
|
|
@@ -328,52 +331,23 @@ class BackupManager:
|
|
|
328
331
|
)
|
|
329
332
|
|
|
330
333
|
time.sleep(wait_time)
|
|
331
|
-
self._process_batch(batch,
|
|
334
|
+
self._process_batch(batch, next_retry)
|
|
332
335
|
else:
|
|
333
336
|
# If the batch fails after MAX_RETRIES, raise the error
|
|
334
337
|
self.logger.error(f"Batch failed: {e.__class__.__name__}: {e}")
|
|
335
338
|
raise
|
|
336
339
|
|
|
337
|
-
def
|
|
340
|
+
def _process_batches(
|
|
338
341
|
self,
|
|
339
342
|
batches: list[BackupBatch],
|
|
340
343
|
) -> None:
|
|
341
344
|
"""
|
|
342
|
-
Processes batches
|
|
343
|
-
|
|
345
|
+
Processes batches sequentially to avoid overloading the API.
|
|
346
|
+
If any batch fails, the processing will stop.
|
|
344
347
|
"""
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
with ThreadPoolExecutor(
|
|
350
|
-
max_workers=self.config.max_workers
|
|
351
|
-
) as executor:
|
|
352
|
-
# Set the futures tasks.
|
|
353
|
-
futures = []
|
|
354
|
-
for batch in batches:
|
|
355
|
-
futures.append(
|
|
356
|
-
executor.submit(
|
|
357
|
-
self._process_batch,
|
|
358
|
-
batch,
|
|
359
|
-
stop_event,
|
|
360
|
-
)
|
|
361
|
-
)
|
|
362
|
-
|
|
363
|
-
# Process futures as they complete
|
|
364
|
-
for future in as_completed(futures):
|
|
365
|
-
try:
|
|
366
|
-
future.result()
|
|
367
|
-
except Exception:
|
|
368
|
-
# On failure, set the flag to True - signal running processes to stop
|
|
369
|
-
stop_event.set()
|
|
370
|
-
|
|
371
|
-
# Cancel unstarted threads
|
|
372
|
-
for f in futures:
|
|
373
|
-
if not f.done():
|
|
374
|
-
f.cancel()
|
|
375
|
-
|
|
376
|
-
raise
|
|
348
|
+
for i, batch in enumerate(batches, 1):
|
|
349
|
+
self.logger.info(f"Processing batch {i}/{len(batches)}...")
|
|
350
|
+
self._process_batch(batch)
|
|
377
351
|
|
|
378
352
|
def backup_workspaces(
|
|
379
353
|
self,
|
|
@@ -440,7 +414,7 @@ class BackupManager:
|
|
|
440
414
|
f"Exporting {len(workspaces_to_export)} workspaces in {len(batches)} batches."
|
|
441
415
|
)
|
|
442
416
|
|
|
443
|
-
self.
|
|
417
|
+
self._process_batches(batches)
|
|
444
418
|
|
|
445
419
|
self.logger.info("Backup completed")
|
|
446
420
|
except Exception as e:
|
|
@@ -21,19 +21,15 @@ class DirNames:
|
|
|
21
21
|
UDF = "user_data_filters"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
@dataclass(frozen=True)
|
|
25
|
-
class ConcurrencyDefaults:
|
|
26
|
-
MAX_WORKERS = 1
|
|
27
|
-
DEFAULT_BATCH_SIZE = 100
|
|
28
|
-
|
|
29
|
-
|
|
30
24
|
@dataclass(frozen=True)
|
|
31
25
|
class ApiDefaults:
|
|
32
26
|
DEFAULT_PAGE_SIZE = 100
|
|
27
|
+
DEFAULT_BATCH_SIZE = 100
|
|
28
|
+
DEFAULT_API_CALLS_PER_SECOND = 1.0
|
|
33
29
|
|
|
34
30
|
|
|
35
31
|
@dataclass(frozen=True)
|
|
36
|
-
class BackupSettings(
|
|
32
|
+
class BackupSettings(ApiDefaults):
|
|
37
33
|
MAX_RETRIES = 3
|
|
38
34
|
RETRY_DELAY = 5 # seconds
|
|
39
35
|
TIMESTAMP_SDK_FOLDER = (
|
|
@@ -83,14 +83,13 @@ class BackupRestoreConfig(BaseModel):
|
|
|
83
83
|
description="Batch size must be greater than 0",
|
|
84
84
|
),
|
|
85
85
|
] = Field(default=BackupSettings.DEFAULT_BATCH_SIZE)
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
api_calls_per_second: Annotated[
|
|
87
|
+
float,
|
|
88
88
|
Field(
|
|
89
89
|
gt=0,
|
|
90
|
-
|
|
91
|
-
description="Max workers must be greater than 0 and less than 3",
|
|
90
|
+
description="Maximum API calls per second (rate limiting)",
|
|
92
91
|
),
|
|
93
|
-
] = Field(default=BackupSettings.
|
|
92
|
+
] = Field(default=BackupSettings.DEFAULT_API_CALLS_PER_SECOND)
|
|
94
93
|
|
|
95
94
|
@classmethod
|
|
96
95
|
def from_yaml(cls, conf_path: str) -> "BackupRestoreConfig":
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# (C) 2025 GoodData Corporation
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import threading
|
|
5
|
+
import functools
|
|
6
|
+
from typing import Callable, Any, Literal
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RateLimiter:
|
|
10
|
+
"""
|
|
11
|
+
Rate limiter usable as a decorator and as a context manager.
|
|
12
|
+
- Shared instance decorator: limiter = RateLimiter(); @limiter
|
|
13
|
+
- Per-function decorator: @RateLimiter(calls_per_second=2)
|
|
14
|
+
- Context manager: with RateLimiter(2): ...
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, calls_per_second: float = 1.0) -> None:
|
|
18
|
+
if calls_per_second <= 0:
|
|
19
|
+
raise ValueError("calls_per_second must be greater than 0")
|
|
20
|
+
|
|
21
|
+
self.calls_per_second = calls_per_second
|
|
22
|
+
self.min_interval = 1.0 / calls_per_second
|
|
23
|
+
|
|
24
|
+
self._lock = threading.Lock()
|
|
25
|
+
self._last_call_time = 0.0
|
|
26
|
+
|
|
27
|
+
def wait_if_needed(self) -> float:
|
|
28
|
+
"""Sleep if needed to maintain the rate limit, return actual sleep time."""
|
|
29
|
+
with self._lock:
|
|
30
|
+
now = time.monotonic()
|
|
31
|
+
since_last = now - self._last_call_time
|
|
32
|
+
|
|
33
|
+
if since_last < self.min_interval:
|
|
34
|
+
sleep_time = self.min_interval - since_last
|
|
35
|
+
time.sleep(sleep_time)
|
|
36
|
+
self._last_call_time = time.monotonic()
|
|
37
|
+
return sleep_time
|
|
38
|
+
else:
|
|
39
|
+
self._last_call_time = now
|
|
40
|
+
return 0.0
|
|
41
|
+
|
|
42
|
+
# Decorator support
|
|
43
|
+
def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]:
|
|
44
|
+
@functools.wraps(func)
|
|
45
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
46
|
+
self.wait_if_needed()
|
|
47
|
+
return func(*args, **kwargs)
|
|
48
|
+
|
|
49
|
+
return wrapper
|
|
50
|
+
|
|
51
|
+
# Context manager support
|
|
52
|
+
def __enter__(self) -> "RateLimiter":
|
|
53
|
+
self.wait_if_needed()
|
|
54
|
+
return self
|
|
55
|
+
|
|
56
|
+
def __exit__(
|
|
57
|
+
self, exc_type: Any, exc_val: Any, exc_tb: Any
|
|
58
|
+
) -> Literal[False]:
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
def reset(self) -> None:
|
|
62
|
+
"""Reset the limiter (useful in tests)."""
|
|
63
|
+
with self._lock:
|
|
64
|
+
self._last_call_time = 0.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gooddata-pipelines
|
|
3
|
-
Version: 1.50.
|
|
3
|
+
Version: 1.50.1.dev1
|
|
4
4
|
Summary: GoodData Cloud lifecycle automation pipelines
|
|
5
5
|
Author-email: GoodData <support@gooddata.com>
|
|
6
6
|
License: MIT
|
|
@@ -8,7 +8,7 @@ License-File: LICENSE.txt
|
|
|
8
8
|
Requires-Python: >=3.10
|
|
9
9
|
Requires-Dist: boto3-stubs<2.0.0,>=1.39.3
|
|
10
10
|
Requires-Dist: boto3<2.0.0,>=1.39.3
|
|
11
|
-
Requires-Dist: gooddata-sdk~=1.50.
|
|
11
|
+
Requires-Dist: gooddata-sdk~=1.50.1.dev1
|
|
12
12
|
Requires-Dist: pydantic<3.0.0,>=2.11.3
|
|
13
13
|
Requires-Dist: requests<3.0.0,>=2.32.3
|
|
14
14
|
Requires-Dist: types-pyyaml<7.0.0,>=6.0.12.20250326
|
|
@@ -9,12 +9,12 @@ gooddata_pipelines/api/gooddata_sdk.py,sha256=wd5O4e9BQLWUawt6odrs5a51nqFGthBkvq
|
|
|
9
9
|
gooddata_pipelines/api/utils.py,sha256=3QY_aYH17I9THoCINE3l-n5oj52k-gNeT1wv6Z_VxN8,1433
|
|
10
10
|
gooddata_pipelines/backup_and_restore/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
|
|
11
11
|
gooddata_pipelines/backup_and_restore/backup_input_processor.py,sha256=ex1tGwETdHDDBRJ_DGKZsZbH6uoRuOrbGbKOC976H5s,7940
|
|
12
|
-
gooddata_pipelines/backup_and_restore/backup_manager.py,sha256=
|
|
13
|
-
gooddata_pipelines/backup_and_restore/constants.py,sha256=
|
|
12
|
+
gooddata_pipelines/backup_and_restore/backup_manager.py,sha256=kWxhxe5K8_oK3tz2e1RBMpyHHv18_UA_QVlXQeb7UUk,15227
|
|
13
|
+
gooddata_pipelines/backup_and_restore/constants.py,sha256=m8wAYhVGlRlfAgiC_54wJr6N8HDEAT7hIfrH1N2UrQY,884
|
|
14
14
|
gooddata_pipelines/backup_and_restore/csv_reader.py,sha256=0Kw7mJT7REj3Gjqfsc6YT9MbhcqfCGNB_SKBwzTI1rk,1268
|
|
15
15
|
gooddata_pipelines/backup_and_restore/models/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
|
|
16
16
|
gooddata_pipelines/backup_and_restore/models/input_type.py,sha256=CBKJigKdmZ-NJD9MSfNhq89bo86W0AqCMMoyonbd1QA,239
|
|
17
|
-
gooddata_pipelines/backup_and_restore/models/storage.py,sha256=
|
|
17
|
+
gooddata_pipelines/backup_and_restore/models/storage.py,sha256=BcgOGIk4u3EaH0u0gArDHQpDyIPjx_c3fmoc-i_Ptj4,2795
|
|
18
18
|
gooddata_pipelines/backup_and_restore/models/workspace_response.py,sha256=eQbYLgRQc17IRG0yPTAJVrD-Xs05SzuwtzoNrPT2DoY,833
|
|
19
19
|
gooddata_pipelines/backup_and_restore/storage/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
|
|
20
20
|
gooddata_pipelines/backup_and_restore/storage/base_storage.py,sha256=67wdItlG3neExeb_eCUDQhswdUB62X5Nyj9sOImB_Hg,487
|
|
@@ -48,7 +48,9 @@ gooddata_pipelines/provisioning/utils/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pj
|
|
|
48
48
|
gooddata_pipelines/provisioning/utils/context_objects.py,sha256=HJoeumH_gXwM6X-GO3HkC4w-6RYozz6-aqQOhDnu7no,879
|
|
49
49
|
gooddata_pipelines/provisioning/utils/exceptions.py,sha256=1WnAOlPhqOf0xRcvn70lxAlLb8Oo6m6WCYS4hj9uzDU,3630
|
|
50
50
|
gooddata_pipelines/provisioning/utils/utils.py,sha256=uF3k5hmoM5d6UoWWfPGCQgT_861zcU-ACyaQHHOOncY,2434
|
|
51
|
-
gooddata_pipelines
|
|
52
|
-
gooddata_pipelines
|
|
53
|
-
gooddata_pipelines-1.50.
|
|
54
|
-
gooddata_pipelines-1.50.
|
|
51
|
+
gooddata_pipelines/utils/__init__.py,sha256=s9TtSjKqo1gSGWOVoGrXaGi1TsbRowjRDYKtjmKy7BY,155
|
|
52
|
+
gooddata_pipelines/utils/rate_limiter.py,sha256=owbcEZhUxlTnE7rRHiWQ8XBC-vML2fVPbt41EeGEM7o,2002
|
|
53
|
+
gooddata_pipelines-1.50.1.dev1.dist-info/METADATA,sha256=YcJNGu4zGfjvfMJlzUzm77G5rm0xubtl2-0MUB40jWI,3522
|
|
54
|
+
gooddata_pipelines-1.50.1.dev1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
55
|
+
gooddata_pipelines-1.50.1.dev1.dist-info/licenses/LICENSE.txt,sha256=PNC7WXGIo6OKkNoPLRxlVrw6jaLcjSTUsSxy9Xcu9Jo,560365
|
|
56
|
+
gooddata_pipelines-1.50.1.dev1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|