gooddata-pipelines 1.50.0__py3-none-any.whl → 1.50.1.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gooddata-pipelines might be problematic. Click here for more details.
- gooddata_pipelines/__init__.py +18 -0
- gooddata_pipelines/api/gooddata_api.py +55 -0
- gooddata_pipelines/backup_and_restore/backup_manager.py +36 -62
- gooddata_pipelines/backup_and_restore/constants.py +3 -7
- gooddata_pipelines/backup_and_restore/models/storage.py +4 -5
- gooddata_pipelines/ldm_extension/__init__.py +1 -0
- gooddata_pipelines/ldm_extension/input_processor.py +286 -0
- gooddata_pipelines/ldm_extension/input_validator.py +185 -0
- gooddata_pipelines/ldm_extension/ldm_extension_manager.py +283 -0
- gooddata_pipelines/ldm_extension/models/__init__.py +1 -0
- gooddata_pipelines/ldm_extension/models/aliases.py +9 -0
- gooddata_pipelines/ldm_extension/models/analytical_object.py +33 -0
- gooddata_pipelines/ldm_extension/models/custom_data_object.py +90 -0
- gooddata_pipelines/provisioning/entities/users/models/users.py +10 -1
- gooddata_pipelines/provisioning/entities/users/users.py +38 -0
- gooddata_pipelines/provisioning/provisioning.py +2 -3
- gooddata_pipelines/utils/__init__.py +9 -0
- gooddata_pipelines/utils/rate_limiter.py +64 -0
- {gooddata_pipelines-1.50.0.dist-info → gooddata_pipelines-1.50.1.dev2.dist-info}/METADATA +11 -3
- {gooddata_pipelines-1.50.0.dist-info → gooddata_pipelines-1.50.1.dev2.dist-info}/RECORD +22 -12
- {gooddata_pipelines-1.50.0.dist-info → gooddata_pipelines-1.50.1.dev2.dist-info}/WHEEL +0 -0
- {gooddata_pipelines-1.50.0.dist-info → gooddata_pipelines-1.50.1.dev2.dist-info}/licenses/LICENSE.txt +0 -0
gooddata_pipelines/__init__.py
CHANGED
|
@@ -6,11 +6,22 @@ from ._version import __version__
|
|
|
6
6
|
from .backup_and_restore.backup_manager import BackupManager
|
|
7
7
|
from .backup_and_restore.models.storage import (
|
|
8
8
|
BackupRestoreConfig,
|
|
9
|
+
LocalStorageConfig,
|
|
10
|
+
S3StorageConfig,
|
|
9
11
|
StorageType,
|
|
10
12
|
)
|
|
11
13
|
from .backup_and_restore.storage.local_storage import LocalStorage
|
|
12
14
|
from .backup_and_restore.storage.s3_storage import S3Storage
|
|
13
15
|
|
|
16
|
+
# -------- LDM Extension --------
|
|
17
|
+
from .ldm_extension.ldm_extension_manager import LdmExtensionManager
|
|
18
|
+
from .ldm_extension.models.custom_data_object import (
|
|
19
|
+
ColumnDataType,
|
|
20
|
+
CustomDatasetDefinition,
|
|
21
|
+
CustomFieldDefinition,
|
|
22
|
+
CustomFieldType,
|
|
23
|
+
)
|
|
24
|
+
|
|
14
25
|
# -------- Provisioning --------
|
|
15
26
|
from .provisioning.entities.user_data_filters.models.udf_models import (
|
|
16
27
|
UserDataFilterFullLoad,
|
|
@@ -51,6 +62,8 @@ __all__ = [
|
|
|
51
62
|
"UserIncrementalLoad",
|
|
52
63
|
"UserGroupIncrementalLoad",
|
|
53
64
|
"PermissionFullLoad",
|
|
65
|
+
"LocalStorageConfig",
|
|
66
|
+
"S3StorageConfig",
|
|
54
67
|
"PermissionIncrementalLoad",
|
|
55
68
|
"UserFullLoad",
|
|
56
69
|
"UserGroupFullLoad",
|
|
@@ -61,5 +74,10 @@ __all__ = [
|
|
|
61
74
|
"UserDataFilterProvisioner",
|
|
62
75
|
"UserDataFilterFullLoad",
|
|
63
76
|
"EntityType",
|
|
77
|
+
"LdmExtensionManager",
|
|
78
|
+
"CustomDatasetDefinition",
|
|
79
|
+
"CustomFieldDefinition",
|
|
80
|
+
"ColumnDataType",
|
|
81
|
+
"CustomFieldType",
|
|
64
82
|
"__version__",
|
|
65
83
|
]
|
|
@@ -174,6 +174,49 @@ class ApiMethods:
|
|
|
174
174
|
)
|
|
175
175
|
return self._get(endpoint)
|
|
176
176
|
|
|
177
|
+
def get_all_metrics(self, workspace_id: str) -> requests.Response:
|
|
178
|
+
"""Get all metrics from the specified workspace.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
workspace_id (str): The ID of the workspace to retrieve metrics from.
|
|
182
|
+
Returns:
|
|
183
|
+
requests.Response: The response containing the metrics.
|
|
184
|
+
"""
|
|
185
|
+
endpoint = f"/entities/workspaces/{workspace_id}/metrics"
|
|
186
|
+
headers = {**self.headers, "X-GDC-VALIDATE-RELATIONS": "true"}
|
|
187
|
+
return self._get(endpoint, headers=headers)
|
|
188
|
+
|
|
189
|
+
def get_all_visualization_objects(
|
|
190
|
+
self, workspace_id: str
|
|
191
|
+
) -> requests.Response:
|
|
192
|
+
"""Get all visualizations from the specified workspace.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
workspace_id (str): The ID of the workspace to retrieve visualizations from.
|
|
196
|
+
Returns:
|
|
197
|
+
requests.Response: The response containing the visualizations.
|
|
198
|
+
"""
|
|
199
|
+
endpoint = f"/entities/workspaces/{workspace_id}/visualizationObjects"
|
|
200
|
+
headers = {**self.headers, "X-GDC-VALIDATE-RELATIONS": "true"}
|
|
201
|
+
return self._get(endpoint, headers=headers)
|
|
202
|
+
|
|
203
|
+
def get_all_dashboards(self, workspace_id: str) -> requests.Response:
|
|
204
|
+
"""Get all dashboards from the specified workspace.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
workspace_id (str): The ID of the workspace to retrieve dashboards from.
|
|
208
|
+
Returns:
|
|
209
|
+
requests.Response: The response containing the dashboards.
|
|
210
|
+
"""
|
|
211
|
+
endpoint = f"/entities/workspaces/{workspace_id}/analyticalDashboards"
|
|
212
|
+
headers = {**self.headers, "X-GDC-VALIDATE-RELATIONS": "true"}
|
|
213
|
+
return self._get(endpoint, headers=headers)
|
|
214
|
+
|
|
215
|
+
def get_profile(self) -> requests.Response:
|
|
216
|
+
"""Returns organization and current user information."""
|
|
217
|
+
endpoint = "/profile"
|
|
218
|
+
return self._get(endpoint)
|
|
219
|
+
|
|
177
220
|
def _get(
|
|
178
221
|
self, endpoint: str, headers: dict[str, str] | None = None
|
|
179
222
|
) -> requests.Response:
|
|
@@ -253,3 +296,15 @@ class ApiMethods:
|
|
|
253
296
|
url = self._get_url(endpoint)
|
|
254
297
|
|
|
255
298
|
return requests.delete(url, headers=self.headers, timeout=TIMEOUT)
|
|
299
|
+
|
|
300
|
+
@staticmethod
|
|
301
|
+
def raise_if_response_not_ok(*responses: requests.Response) -> None:
|
|
302
|
+
"""Check if responses from API calls are OK.
|
|
303
|
+
|
|
304
|
+
Raises ValueError if any response is not OK (status code not 2xx).
|
|
305
|
+
"""
|
|
306
|
+
for response in responses:
|
|
307
|
+
if not response.ok:
|
|
308
|
+
raise ValueError(
|
|
309
|
+
f"Request to {response.url} failed with status code {response.status_code}: {response.text}"
|
|
310
|
+
)
|
|
@@ -4,10 +4,8 @@ import json
|
|
|
4
4
|
import os
|
|
5
5
|
import shutil
|
|
6
6
|
import tempfile
|
|
7
|
-
import threading
|
|
8
7
|
import time
|
|
9
8
|
import traceback
|
|
10
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
11
9
|
from dataclasses import dataclass
|
|
12
10
|
from pathlib import Path
|
|
13
11
|
from typing import Any, Type
|
|
@@ -39,6 +37,7 @@ from gooddata_pipelines.backup_and_restore.storage.s3_storage import (
|
|
|
39
37
|
S3Storage,
|
|
40
38
|
)
|
|
41
39
|
from gooddata_pipelines.logger import LogObserver
|
|
40
|
+
from gooddata_pipelines.utils.rate_limiter import RateLimiter
|
|
42
41
|
|
|
43
42
|
|
|
44
43
|
@dataclass
|
|
@@ -60,6 +59,10 @@ class BackupManager:
|
|
|
60
59
|
|
|
61
60
|
self.loader = BackupInputProcessor(self._api, self.config.api_page_size)
|
|
62
61
|
|
|
62
|
+
self._api_rate_limiter = RateLimiter(
|
|
63
|
+
calls_per_second=self.config.api_calls_per_second,
|
|
64
|
+
)
|
|
65
|
+
|
|
63
66
|
@classmethod
|
|
64
67
|
def create(
|
|
65
68
|
cls: Type["BackupManager"],
|
|
@@ -95,11 +98,12 @@ class BackupManager:
|
|
|
95
98
|
|
|
96
99
|
def get_user_data_filters(self, ws_id: str) -> dict:
|
|
97
100
|
"""Returns the user data filters for the specified workspace."""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
101
|
+
with self._api_rate_limiter:
|
|
102
|
+
response: requests.Response = self._api.get_user_data_filters(ws_id)
|
|
103
|
+
if response.ok:
|
|
104
|
+
return response.json()
|
|
105
|
+
else:
|
|
106
|
+
raise RuntimeError(f"{response.status_code}: {response.text}")
|
|
103
107
|
|
|
104
108
|
def _store_user_data_filters(
|
|
105
109
|
self,
|
|
@@ -144,14 +148,17 @@ class BackupManager:
|
|
|
144
148
|
|
|
145
149
|
def _get_automations_from_api(self, workspace_id: str) -> Any:
|
|
146
150
|
"""Returns automations for the workspace as JSON."""
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
else:
|
|
151
|
-
raise RuntimeError(
|
|
152
|
-
f"Failed to get automations for {workspace_id}. "
|
|
153
|
-
+ f"{response.status_code}: {response.text}"
|
|
151
|
+
with self._api_rate_limiter:
|
|
152
|
+
response: requests.Response = self._api.get_automations(
|
|
153
|
+
workspace_id
|
|
154
154
|
)
|
|
155
|
+
if response.ok:
|
|
156
|
+
return response.json()
|
|
157
|
+
else:
|
|
158
|
+
raise RuntimeError(
|
|
159
|
+
f"Failed to get automations for {workspace_id}. "
|
|
160
|
+
+ f"{response.status_code}: {response.text}"
|
|
161
|
+
)
|
|
155
162
|
|
|
156
163
|
def _store_automations(self, export_path: Path, workspace_id: str) -> None:
|
|
157
164
|
"""Stores the automations in the specified export path."""
|
|
@@ -183,7 +190,8 @@ class BackupManager:
|
|
|
183
190
|
) -> None:
|
|
184
191
|
"""Stores the filter views in the specified export path."""
|
|
185
192
|
# Get the filter views YAML files from the API
|
|
186
|
-
self.
|
|
193
|
+
with self._api_rate_limiter:
|
|
194
|
+
self._api.store_declarative_filter_views(workspace_id, export_path)
|
|
187
195
|
|
|
188
196
|
# Move filter views to the subfolder containing the analytics model
|
|
189
197
|
self._move_folder(
|
|
@@ -231,7 +239,10 @@ class BackupManager:
|
|
|
231
239
|
# the SDK. That way we could save and package all the declarations
|
|
232
240
|
# directly instead of reorganizing the folder structures. That should
|
|
233
241
|
# be more transparent/readable and possibly safer for threading
|
|
234
|
-
self.
|
|
242
|
+
with self._api_rate_limiter:
|
|
243
|
+
self._api.store_declarative_workspace(
|
|
244
|
+
workspace_id, export_path
|
|
245
|
+
)
|
|
235
246
|
self.store_declarative_filter_views(export_path, workspace_id)
|
|
236
247
|
self._store_automations(export_path, workspace_id)
|
|
237
248
|
|
|
@@ -291,7 +302,6 @@ class BackupManager:
|
|
|
291
302
|
def _process_batch(
|
|
292
303
|
self,
|
|
293
304
|
batch: BackupBatch,
|
|
294
|
-
stop_event: threading.Event,
|
|
295
305
|
retry_count: int = 0,
|
|
296
306
|
) -> None:
|
|
297
307
|
"""Processes a single batch of workspaces for backup.
|
|
@@ -299,10 +309,6 @@ class BackupManager:
|
|
|
299
309
|
and retry with exponential backoff up to BackupSettings.MAX_RETRIES.
|
|
300
310
|
The base wait time is defined by BackupSettings.RETRY_DELAY.
|
|
301
311
|
"""
|
|
302
|
-
if stop_event.is_set():
|
|
303
|
-
# If the stop_event flag is set, return. This will terminate the thread
|
|
304
|
-
return
|
|
305
|
-
|
|
306
312
|
try:
|
|
307
313
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
308
314
|
self._get_workspace_export(tmpdir, batch.list_of_ids)
|
|
@@ -314,10 +320,7 @@ class BackupManager:
|
|
|
314
320
|
self.storage.export(tmpdir, self.org_id)
|
|
315
321
|
|
|
316
322
|
except Exception as e:
|
|
317
|
-
if
|
|
318
|
-
return
|
|
319
|
-
|
|
320
|
-
elif retry_count < BackupSettings.MAX_RETRIES:
|
|
323
|
+
if retry_count < BackupSettings.MAX_RETRIES:
|
|
321
324
|
# Retry with exponential backoff until MAX_RETRIES
|
|
322
325
|
next_retry = retry_count + 1
|
|
323
326
|
wait_time = BackupSettings.RETRY_DELAY**next_retry
|
|
@@ -328,52 +331,23 @@ class BackupManager:
|
|
|
328
331
|
)
|
|
329
332
|
|
|
330
333
|
time.sleep(wait_time)
|
|
331
|
-
self._process_batch(batch,
|
|
334
|
+
self._process_batch(batch, next_retry)
|
|
332
335
|
else:
|
|
333
336
|
# If the batch fails after MAX_RETRIES, raise the error
|
|
334
337
|
self.logger.error(f"Batch failed: {e.__class__.__name__}: {e}")
|
|
335
338
|
raise
|
|
336
339
|
|
|
337
|
-
def
|
|
340
|
+
def _process_batches(
|
|
338
341
|
self,
|
|
339
342
|
batches: list[BackupBatch],
|
|
340
343
|
) -> None:
|
|
341
344
|
"""
|
|
342
|
-
Processes batches
|
|
343
|
-
|
|
345
|
+
Processes batches sequentially to avoid overloading the API.
|
|
346
|
+
If any batch fails, the processing will stop.
|
|
344
347
|
"""
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
with ThreadPoolExecutor(
|
|
350
|
-
max_workers=self.config.max_workers
|
|
351
|
-
) as executor:
|
|
352
|
-
# Set the futures tasks.
|
|
353
|
-
futures = []
|
|
354
|
-
for batch in batches:
|
|
355
|
-
futures.append(
|
|
356
|
-
executor.submit(
|
|
357
|
-
self._process_batch,
|
|
358
|
-
batch,
|
|
359
|
-
stop_event,
|
|
360
|
-
)
|
|
361
|
-
)
|
|
362
|
-
|
|
363
|
-
# Process futures as they complete
|
|
364
|
-
for future in as_completed(futures):
|
|
365
|
-
try:
|
|
366
|
-
future.result()
|
|
367
|
-
except Exception:
|
|
368
|
-
# On failure, set the flag to True - signal running processes to stop
|
|
369
|
-
stop_event.set()
|
|
370
|
-
|
|
371
|
-
# Cancel unstarted threads
|
|
372
|
-
for f in futures:
|
|
373
|
-
if not f.done():
|
|
374
|
-
f.cancel()
|
|
375
|
-
|
|
376
|
-
raise
|
|
348
|
+
for i, batch in enumerate(batches, 1):
|
|
349
|
+
self.logger.info(f"Processing batch {i}/{len(batches)}...")
|
|
350
|
+
self._process_batch(batch)
|
|
377
351
|
|
|
378
352
|
def backup_workspaces(
|
|
379
353
|
self,
|
|
@@ -440,7 +414,7 @@ class BackupManager:
|
|
|
440
414
|
f"Exporting {len(workspaces_to_export)} workspaces in {len(batches)} batches."
|
|
441
415
|
)
|
|
442
416
|
|
|
443
|
-
self.
|
|
417
|
+
self._process_batches(batches)
|
|
444
418
|
|
|
445
419
|
self.logger.info("Backup completed")
|
|
446
420
|
except Exception as e:
|
|
@@ -21,19 +21,15 @@ class DirNames:
|
|
|
21
21
|
UDF = "user_data_filters"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
@dataclass(frozen=True)
|
|
25
|
-
class ConcurrencyDefaults:
|
|
26
|
-
MAX_WORKERS = 1
|
|
27
|
-
DEFAULT_BATCH_SIZE = 100
|
|
28
|
-
|
|
29
|
-
|
|
30
24
|
@dataclass(frozen=True)
|
|
31
25
|
class ApiDefaults:
|
|
32
26
|
DEFAULT_PAGE_SIZE = 100
|
|
27
|
+
DEFAULT_BATCH_SIZE = 100
|
|
28
|
+
DEFAULT_API_CALLS_PER_SECOND = 1.0
|
|
33
29
|
|
|
34
30
|
|
|
35
31
|
@dataclass(frozen=True)
|
|
36
|
-
class BackupSettings(
|
|
32
|
+
class BackupSettings(ApiDefaults):
|
|
37
33
|
MAX_RETRIES = 3
|
|
38
34
|
RETRY_DELAY = 5 # seconds
|
|
39
35
|
TIMESTAMP_SDK_FOLDER = (
|
|
@@ -83,14 +83,13 @@ class BackupRestoreConfig(BaseModel):
|
|
|
83
83
|
description="Batch size must be greater than 0",
|
|
84
84
|
),
|
|
85
85
|
] = Field(default=BackupSettings.DEFAULT_BATCH_SIZE)
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
api_calls_per_second: Annotated[
|
|
87
|
+
float,
|
|
88
88
|
Field(
|
|
89
89
|
gt=0,
|
|
90
|
-
|
|
91
|
-
description="Max workers must be greater than 0 and less than 3",
|
|
90
|
+
description="Maximum API calls per second (rate limiting)",
|
|
92
91
|
),
|
|
93
|
-
] = Field(default=BackupSettings.
|
|
92
|
+
] = Field(default=BackupSettings.DEFAULT_API_CALLS_PER_SECOND)
|
|
94
93
|
|
|
95
94
|
@classmethod
|
|
96
95
|
def from_yaml(cls, conf_path: str) -> "BackupRestoreConfig":
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# (C) 2025 GoodData Corporation
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
# (C) 2025 GoodData Corporation
|
|
2
|
+
"""Module for processing validated custom datasets and fields data.
|
|
3
|
+
|
|
4
|
+
This module is responsible for converting validated custom datasets and fields
|
|
5
|
+
into objects defined in the GoodData Python SDK.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from gooddata_sdk.catalog.identifier import (
|
|
9
|
+
CatalogDatasetWorkspaceDataFilterIdentifier,
|
|
10
|
+
CatalogGrainIdentifier,
|
|
11
|
+
CatalogReferenceIdentifier,
|
|
12
|
+
)
|
|
13
|
+
from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.data_filter_references import (
|
|
14
|
+
CatalogDeclarativeWorkspaceDataFilterReferences,
|
|
15
|
+
)
|
|
16
|
+
from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.dataset.dataset import (
|
|
17
|
+
CatalogDataSourceTableIdentifier,
|
|
18
|
+
CatalogDeclarativeAttribute,
|
|
19
|
+
CatalogDeclarativeDataset,
|
|
20
|
+
CatalogDeclarativeDatasetSql,
|
|
21
|
+
CatalogDeclarativeFact,
|
|
22
|
+
CatalogDeclarativeReference,
|
|
23
|
+
CatalogDeclarativeReferenceSource,
|
|
24
|
+
CatalogDeclarativeWorkspaceDataFilterColumn,
|
|
25
|
+
)
|
|
26
|
+
from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.date_dataset.date_dataset import (
|
|
27
|
+
CatalogDeclarativeDateDataset,
|
|
28
|
+
CatalogGranularitiesFormatting,
|
|
29
|
+
)
|
|
30
|
+
from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import (
|
|
31
|
+
CatalogDeclarativeLdm,
|
|
32
|
+
CatalogDeclarativeModel,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
from gooddata_pipelines.ldm_extension.models.aliases import DatasetId
|
|
36
|
+
from gooddata_pipelines.ldm_extension.models.custom_data_object import (
|
|
37
|
+
ColumnDataType,
|
|
38
|
+
CustomDataset,
|
|
39
|
+
CustomFieldDefinition,
|
|
40
|
+
CustomFieldType,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class LdmExtensionDataProcessor:
|
|
45
|
+
"""Create GoodData LDM from validated custom datasets and fields."""
|
|
46
|
+
|
|
47
|
+
DATE_GRANULARITIES: list[str] = [
|
|
48
|
+
"MINUTE",
|
|
49
|
+
"HOUR",
|
|
50
|
+
"DAY",
|
|
51
|
+
"WEEK",
|
|
52
|
+
"MONTH",
|
|
53
|
+
"QUARTER",
|
|
54
|
+
"YEAR",
|
|
55
|
+
"MINUTE_OF_HOUR",
|
|
56
|
+
"HOUR_OF_DAY",
|
|
57
|
+
"DAY_OF_WEEK",
|
|
58
|
+
"DAY_OF_MONTH",
|
|
59
|
+
"DAY_OF_YEAR",
|
|
60
|
+
"WEEK_OF_YEAR",
|
|
61
|
+
"MONTH_OF_YEAR",
|
|
62
|
+
"QUARTER_OF_YEAR",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def _attribute_from_field(
|
|
67
|
+
dataset_name: str,
|
|
68
|
+
custom_field: CustomFieldDefinition,
|
|
69
|
+
) -> CatalogDeclarativeAttribute:
|
|
70
|
+
"""Assign a declarative attribute from a custom field definition."""
|
|
71
|
+
return CatalogDeclarativeAttribute(
|
|
72
|
+
id=custom_field.custom_field_id,
|
|
73
|
+
title=custom_field.custom_field_name,
|
|
74
|
+
source_column=custom_field.custom_field_source_column,
|
|
75
|
+
labels=[],
|
|
76
|
+
source_column_data_type=custom_field.custom_field_source_column_data_type.value,
|
|
77
|
+
tags=[dataset_name],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def _fact_from_field(
|
|
82
|
+
dataset_name: str,
|
|
83
|
+
custom_field: CustomFieldDefinition,
|
|
84
|
+
) -> CatalogDeclarativeFact:
|
|
85
|
+
"""Assign a declarative fact from a custom field definition."""
|
|
86
|
+
return CatalogDeclarativeFact(
|
|
87
|
+
id=custom_field.custom_field_id,
|
|
88
|
+
title=custom_field.custom_field_name,
|
|
89
|
+
source_column=custom_field.custom_field_source_column,
|
|
90
|
+
source_column_data_type=custom_field.custom_field_source_column_data_type.value,
|
|
91
|
+
tags=[dataset_name],
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _date_from_field(
|
|
95
|
+
self,
|
|
96
|
+
dataset_name: str,
|
|
97
|
+
custom_field: CustomFieldDefinition,
|
|
98
|
+
) -> CatalogDeclarativeDateDataset:
|
|
99
|
+
"""Assign a declarative date dataset from a custom field definition."""
|
|
100
|
+
|
|
101
|
+
return CatalogDeclarativeDateDataset(
|
|
102
|
+
id=custom_field.custom_field_id,
|
|
103
|
+
title=custom_field.custom_field_name,
|
|
104
|
+
granularities_formatting=CatalogGranularitiesFormatting(
|
|
105
|
+
title_base="",
|
|
106
|
+
title_pattern="%titleBase - %granularityTitle",
|
|
107
|
+
),
|
|
108
|
+
granularities=self.DATE_GRANULARITIES,
|
|
109
|
+
tags=[dataset_name],
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _date_ref_from_field(
|
|
114
|
+
custom_field: CustomFieldDefinition,
|
|
115
|
+
) -> CatalogDeclarativeReference:
|
|
116
|
+
"""Create a date reference from a custom field definition."""
|
|
117
|
+
return CatalogDeclarativeReference(
|
|
118
|
+
identifier=CatalogReferenceIdentifier(
|
|
119
|
+
id=custom_field.custom_field_id
|
|
120
|
+
),
|
|
121
|
+
multivalue=False,
|
|
122
|
+
sources=[
|
|
123
|
+
CatalogDeclarativeReferenceSource(
|
|
124
|
+
column=custom_field.custom_field_source_column,
|
|
125
|
+
target=CatalogGrainIdentifier(
|
|
126
|
+
id=custom_field.custom_field_id,
|
|
127
|
+
type=CustomFieldType.DATE.value,
|
|
128
|
+
),
|
|
129
|
+
data_type=custom_field.custom_field_source_column_data_type.value,
|
|
130
|
+
)
|
|
131
|
+
],
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def _get_sources(
|
|
136
|
+
dataset: CustomDataset,
|
|
137
|
+
) -> tuple[
|
|
138
|
+
CatalogDataSourceTableIdentifier | None,
|
|
139
|
+
CatalogDeclarativeDatasetSql | None,
|
|
140
|
+
]:
|
|
141
|
+
"""Get the data source table and SQL from the dataset definition."""
|
|
142
|
+
# We will have either a table id or a sql statement. Let's store
|
|
143
|
+
# whatever data is available to variables and pass it to the
|
|
144
|
+
# dataset. Both can be object instances or None, but at least one
|
|
145
|
+
# should be valid as per prior validation.
|
|
146
|
+
dataset_source_table_id = (
|
|
147
|
+
CatalogDataSourceTableIdentifier(
|
|
148
|
+
id=dataset.definition.dataset_source_table,
|
|
149
|
+
data_source_id=dataset.definition.dataset_datasource_id,
|
|
150
|
+
path=[dataset.definition.dataset_source_table],
|
|
151
|
+
)
|
|
152
|
+
if dataset.definition.dataset_source_table
|
|
153
|
+
else None
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
dataset_sql = (
|
|
157
|
+
CatalogDeclarativeDatasetSql(
|
|
158
|
+
statement=dataset.definition.dataset_source_sql,
|
|
159
|
+
data_source_id=dataset.definition.dataset_datasource_id,
|
|
160
|
+
)
|
|
161
|
+
if dataset.definition.dataset_source_sql
|
|
162
|
+
else None
|
|
163
|
+
)
|
|
164
|
+
return dataset_source_table_id, dataset_sql
|
|
165
|
+
|
|
166
|
+
def datasets_to_ldm(
|
|
167
|
+
self, datasets: dict[DatasetId, CustomDataset]
|
|
168
|
+
) -> CatalogDeclarativeModel:
|
|
169
|
+
"""Convert validated datasets to GoodData declarative model.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
datasets (dict[DatasetId, CustomDataset]): Dictionary of validated
|
|
173
|
+
datasets.
|
|
174
|
+
Returns:
|
|
175
|
+
CatalogDeclarativeModel: GoodData declarative model representation
|
|
176
|
+
of the datasets.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
declarative_datasets: list[CatalogDeclarativeDataset] = []
|
|
180
|
+
|
|
181
|
+
# Date dimensions are not stored in a dataset, but as a separate datasets
|
|
182
|
+
# in `date_instances` object on the LDM
|
|
183
|
+
date_instances: list[CatalogDeclarativeDateDataset] = []
|
|
184
|
+
|
|
185
|
+
for dataset in datasets.values():
|
|
186
|
+
date_references: list[CatalogDeclarativeReference] = []
|
|
187
|
+
attributes: list[CatalogDeclarativeAttribute] = []
|
|
188
|
+
facts: list[CatalogDeclarativeFact] = []
|
|
189
|
+
|
|
190
|
+
# Iterate through the custom fields and create the appropriate objects
|
|
191
|
+
for custom_field in dataset.custom_fields:
|
|
192
|
+
if custom_field.custom_field_type == CustomFieldType.ATTRIBUTE:
|
|
193
|
+
attributes.append(
|
|
194
|
+
self._attribute_from_field(
|
|
195
|
+
dataset.definition.dataset_name, custom_field
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
elif custom_field.custom_field_type == CustomFieldType.FACT:
|
|
200
|
+
facts.append(
|
|
201
|
+
self._fact_from_field(
|
|
202
|
+
dataset.definition.dataset_name, custom_field
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Process date dimensions and store them to date_instances. Date
|
|
207
|
+
# dimensions are not stored in a dataset, but as a separate dataset.
|
|
208
|
+
# However, they need to be referenced in the dataset references to
|
|
209
|
+
# create the connection between the dataset and the date dimension
|
|
210
|
+
# in the GoodData Logical Data Model.
|
|
211
|
+
elif custom_field.custom_field_type == CustomFieldType.DATE:
|
|
212
|
+
# Add the date dimension to the date_instances
|
|
213
|
+
date_instances.append(
|
|
214
|
+
self._date_from_field(
|
|
215
|
+
dataset.definition.dataset_name, custom_field
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Create a reference so that the date dimension is connected
|
|
220
|
+
# to the dataset in the GoodData Logical Data Model.
|
|
221
|
+
date_references.append(
|
|
222
|
+
self._date_ref_from_field(custom_field)
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
else:
|
|
226
|
+
raise ValueError(
|
|
227
|
+
f"Unsupported custom field type: {custom_field.custom_field_type}"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Get the data source info
|
|
231
|
+
dataset_source_table_id, dataset_sql = self._get_sources(dataset)
|
|
232
|
+
|
|
233
|
+
# Construct the declarative dataset object and append it to the list.
|
|
234
|
+
declarative_datasets.append(
|
|
235
|
+
CatalogDeclarativeDataset(
|
|
236
|
+
id=dataset.definition.dataset_id,
|
|
237
|
+
title=dataset.definition.dataset_name,
|
|
238
|
+
grain=[],
|
|
239
|
+
references=[
|
|
240
|
+
CatalogDeclarativeReference(
|
|
241
|
+
identifier=CatalogReferenceIdentifier(
|
|
242
|
+
id=dataset.definition.parent_dataset_reference,
|
|
243
|
+
),
|
|
244
|
+
multivalue=True,
|
|
245
|
+
sources=[
|
|
246
|
+
CatalogDeclarativeReferenceSource(
|
|
247
|
+
column=dataset.definition.dataset_reference_source_column,
|
|
248
|
+
data_type=dataset.definition.dataset_reference_source_column_data_type.value,
|
|
249
|
+
target=CatalogGrainIdentifier(
|
|
250
|
+
id=dataset.definition.parent_dataset_reference_attribute_id,
|
|
251
|
+
type=CustomFieldType.ATTRIBUTE.value,
|
|
252
|
+
),
|
|
253
|
+
)
|
|
254
|
+
],
|
|
255
|
+
),
|
|
256
|
+
]
|
|
257
|
+
+ date_references,
|
|
258
|
+
description=None,
|
|
259
|
+
attributes=attributes,
|
|
260
|
+
facts=facts,
|
|
261
|
+
data_source_table_id=dataset_source_table_id,
|
|
262
|
+
sql=dataset_sql,
|
|
263
|
+
workspace_data_filter_columns=[
|
|
264
|
+
CatalogDeclarativeWorkspaceDataFilterColumn(
|
|
265
|
+
name=dataset.definition.workspace_data_filter_column_name,
|
|
266
|
+
data_type=ColumnDataType.STRING.value,
|
|
267
|
+
)
|
|
268
|
+
],
|
|
269
|
+
workspace_data_filter_references=[
|
|
270
|
+
CatalogDeclarativeWorkspaceDataFilterReferences(
|
|
271
|
+
filter_id=CatalogDatasetWorkspaceDataFilterIdentifier(
|
|
272
|
+
id=dataset.definition.workspace_data_filter_id
|
|
273
|
+
),
|
|
274
|
+
filter_column=dataset.definition.workspace_data_filter_column_name,
|
|
275
|
+
filter_column_data_type=ColumnDataType.STRING.value,
|
|
276
|
+
)
|
|
277
|
+
],
|
|
278
|
+
tags=[dataset.definition.dataset_name],
|
|
279
|
+
)
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# Create the Logical Data Model from the datasets and the date instances.
|
|
283
|
+
ldm = CatalogDeclarativeLdm(
|
|
284
|
+
datasets=declarative_datasets, date_instances=date_instances
|
|
285
|
+
)
|
|
286
|
+
return CatalogDeclarativeModel(ldm=ldm)
|