gooddata-pipelines 1.50.0__py3-none-any.whl → 1.50.1.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gooddata-pipelines might be problematic. Click here for more details.

@@ -6,11 +6,22 @@ from ._version import __version__
6
6
  from .backup_and_restore.backup_manager import BackupManager
7
7
  from .backup_and_restore.models.storage import (
8
8
  BackupRestoreConfig,
9
+ LocalStorageConfig,
10
+ S3StorageConfig,
9
11
  StorageType,
10
12
  )
11
13
  from .backup_and_restore.storage.local_storage import LocalStorage
12
14
  from .backup_and_restore.storage.s3_storage import S3Storage
13
15
 
16
+ # -------- LDM Extension --------
17
+ from .ldm_extension.ldm_extension_manager import LdmExtensionManager
18
+ from .ldm_extension.models.custom_data_object import (
19
+ ColumnDataType,
20
+ CustomDatasetDefinition,
21
+ CustomFieldDefinition,
22
+ CustomFieldType,
23
+ )
24
+
14
25
  # -------- Provisioning --------
15
26
  from .provisioning.entities.user_data_filters.models.udf_models import (
16
27
  UserDataFilterFullLoad,
@@ -51,6 +62,8 @@ __all__ = [
51
62
  "UserIncrementalLoad",
52
63
  "UserGroupIncrementalLoad",
53
64
  "PermissionFullLoad",
65
+ "LocalStorageConfig",
66
+ "S3StorageConfig",
54
67
  "PermissionIncrementalLoad",
55
68
  "UserFullLoad",
56
69
  "UserGroupFullLoad",
@@ -61,5 +74,10 @@ __all__ = [
61
74
  "UserDataFilterProvisioner",
62
75
  "UserDataFilterFullLoad",
63
76
  "EntityType",
77
+ "LdmExtensionManager",
78
+ "CustomDatasetDefinition",
79
+ "CustomFieldDefinition",
80
+ "ColumnDataType",
81
+ "CustomFieldType",
64
82
  "__version__",
65
83
  ]
@@ -174,6 +174,49 @@ class ApiMethods:
174
174
  )
175
175
  return self._get(endpoint)
176
176
 
177
+ def get_all_metrics(self, workspace_id: str) -> requests.Response:
178
+ """Get all metrics from the specified workspace.
179
+
180
+ Args:
181
+ workspace_id (str): The ID of the workspace to retrieve metrics from.
182
+ Returns:
183
+ requests.Response: The response containing the metrics.
184
+ """
185
+ endpoint = f"/entities/workspaces/{workspace_id}/metrics"
186
+ headers = {**self.headers, "X-GDC-VALIDATE-RELATIONS": "true"}
187
+ return self._get(endpoint, headers=headers)
188
+
189
+ def get_all_visualization_objects(
190
+ self, workspace_id: str
191
+ ) -> requests.Response:
192
+ """Get all visualizations from the specified workspace.
193
+
194
+ Args:
195
+ workspace_id (str): The ID of the workspace to retrieve visualizations from.
196
+ Returns:
197
+ requests.Response: The response containing the visualizations.
198
+ """
199
+ endpoint = f"/entities/workspaces/{workspace_id}/visualizationObjects"
200
+ headers = {**self.headers, "X-GDC-VALIDATE-RELATIONS": "true"}
201
+ return self._get(endpoint, headers=headers)
202
+
203
+ def get_all_dashboards(self, workspace_id: str) -> requests.Response:
204
+ """Get all dashboards from the specified workspace.
205
+
206
+ Args:
207
+ workspace_id (str): The ID of the workspace to retrieve dashboards from.
208
+ Returns:
209
+ requests.Response: The response containing the dashboards.
210
+ """
211
+ endpoint = f"/entities/workspaces/{workspace_id}/analyticalDashboards"
212
+ headers = {**self.headers, "X-GDC-VALIDATE-RELATIONS": "true"}
213
+ return self._get(endpoint, headers=headers)
214
+
215
+ def get_profile(self) -> requests.Response:
216
+ """Returns organization and current user information."""
217
+ endpoint = "/profile"
218
+ return self._get(endpoint)
219
+
177
220
  def _get(
178
221
  self, endpoint: str, headers: dict[str, str] | None = None
179
222
  ) -> requests.Response:
@@ -253,3 +296,15 @@ class ApiMethods:
253
296
  url = self._get_url(endpoint)
254
297
 
255
298
  return requests.delete(url, headers=self.headers, timeout=TIMEOUT)
299
+
300
+ @staticmethod
301
+ def raise_if_response_not_ok(*responses: requests.Response) -> None:
302
+ """Check if responses from API calls are OK.
303
+
304
+ Raises ValueError if any response is not OK (status code not 2xx).
305
+ """
306
+ for response in responses:
307
+ if not response.ok:
308
+ raise ValueError(
309
+ f"Request to {response.url} failed with status code {response.status_code}: {response.text}"
310
+ )
@@ -4,10 +4,8 @@ import json
4
4
  import os
5
5
  import shutil
6
6
  import tempfile
7
- import threading
8
7
  import time
9
8
  import traceback
10
- from concurrent.futures import ThreadPoolExecutor, as_completed
11
9
  from dataclasses import dataclass
12
10
  from pathlib import Path
13
11
  from typing import Any, Type
@@ -39,6 +37,7 @@ from gooddata_pipelines.backup_and_restore.storage.s3_storage import (
39
37
  S3Storage,
40
38
  )
41
39
  from gooddata_pipelines.logger import LogObserver
40
+ from gooddata_pipelines.utils.rate_limiter import RateLimiter
42
41
 
43
42
 
44
43
  @dataclass
@@ -60,6 +59,10 @@ class BackupManager:
60
59
 
61
60
  self.loader = BackupInputProcessor(self._api, self.config.api_page_size)
62
61
 
62
+ self._api_rate_limiter = RateLimiter(
63
+ calls_per_second=self.config.api_calls_per_second,
64
+ )
65
+
63
66
  @classmethod
64
67
  def create(
65
68
  cls: Type["BackupManager"],
@@ -95,11 +98,12 @@ class BackupManager:
95
98
 
96
99
  def get_user_data_filters(self, ws_id: str) -> dict:
97
100
  """Returns the user data filters for the specified workspace."""
98
- response: requests.Response = self._api.get_user_data_filters(ws_id)
99
- if response.ok:
100
- return response.json()
101
- else:
102
- raise RuntimeError(f"{response.status_code}: {response.text}")
101
+ with self._api_rate_limiter:
102
+ response: requests.Response = self._api.get_user_data_filters(ws_id)
103
+ if response.ok:
104
+ return response.json()
105
+ else:
106
+ raise RuntimeError(f"{response.status_code}: {response.text}")
103
107
 
104
108
  def _store_user_data_filters(
105
109
  self,
@@ -144,14 +148,17 @@ class BackupManager:
144
148
 
145
149
  def _get_automations_from_api(self, workspace_id: str) -> Any:
146
150
  """Returns automations for the workspace as JSON."""
147
- response: requests.Response = self._api.get_automations(workspace_id)
148
- if response.ok:
149
- return response.json()
150
- else:
151
- raise RuntimeError(
152
- f"Failed to get automations for {workspace_id}. "
153
- + f"{response.status_code}: {response.text}"
151
+ with self._api_rate_limiter:
152
+ response: requests.Response = self._api.get_automations(
153
+ workspace_id
154
154
  )
155
+ if response.ok:
156
+ return response.json()
157
+ else:
158
+ raise RuntimeError(
159
+ f"Failed to get automations for {workspace_id}. "
160
+ + f"{response.status_code}: {response.text}"
161
+ )
155
162
 
156
163
  def _store_automations(self, export_path: Path, workspace_id: str) -> None:
157
164
  """Stores the automations in the specified export path."""
@@ -183,7 +190,8 @@ class BackupManager:
183
190
  ) -> None:
184
191
  """Stores the filter views in the specified export path."""
185
192
  # Get the filter views YAML files from the API
186
- self._api.store_declarative_filter_views(workspace_id, export_path)
193
+ with self._api_rate_limiter:
194
+ self._api.store_declarative_filter_views(workspace_id, export_path)
187
195
 
188
196
  # Move filter views to the subfolder containing the analytics model
189
197
  self._move_folder(
@@ -231,7 +239,10 @@ class BackupManager:
231
239
  # the SDK. That way we could save and package all the declarations
232
240
  # directly instead of reorganizing the folder structures. That should
233
241
  # be more transparent/readable and possibly safer for threading
234
- self._api.store_declarative_workspace(workspace_id, export_path)
242
+ with self._api_rate_limiter:
243
+ self._api.store_declarative_workspace(
244
+ workspace_id, export_path
245
+ )
235
246
  self.store_declarative_filter_views(export_path, workspace_id)
236
247
  self._store_automations(export_path, workspace_id)
237
248
 
@@ -291,7 +302,6 @@ class BackupManager:
291
302
  def _process_batch(
292
303
  self,
293
304
  batch: BackupBatch,
294
- stop_event: threading.Event,
295
305
  retry_count: int = 0,
296
306
  ) -> None:
297
307
  """Processes a single batch of workspaces for backup.
@@ -299,10 +309,6 @@ class BackupManager:
299
309
  and retry with exponential backoff up to BackupSettings.MAX_RETRIES.
300
310
  The base wait time is defined by BackupSettings.RETRY_DELAY.
301
311
  """
302
- if stop_event.is_set():
303
- # If the stop_event flag is set, return. This will terminate the thread
304
- return
305
-
306
312
  try:
307
313
  with tempfile.TemporaryDirectory() as tmpdir:
308
314
  self._get_workspace_export(tmpdir, batch.list_of_ids)
@@ -314,10 +320,7 @@ class BackupManager:
314
320
  self.storage.export(tmpdir, self.org_id)
315
321
 
316
322
  except Exception as e:
317
- if stop_event.is_set():
318
- return
319
-
320
- elif retry_count < BackupSettings.MAX_RETRIES:
323
+ if retry_count < BackupSettings.MAX_RETRIES:
321
324
  # Retry with exponential backoff until MAX_RETRIES
322
325
  next_retry = retry_count + 1
323
326
  wait_time = BackupSettings.RETRY_DELAY**next_retry
@@ -328,52 +331,23 @@ class BackupManager:
328
331
  )
329
332
 
330
333
  time.sleep(wait_time)
331
- self._process_batch(batch, stop_event, next_retry)
334
+ self._process_batch(batch, next_retry)
332
335
  else:
333
336
  # If the batch fails after MAX_RETRIES, raise the error
334
337
  self.logger.error(f"Batch failed: {e.__class__.__name__}: {e}")
335
338
  raise
336
339
 
337
- def _process_batches_in_parallel(
340
+ def _process_batches(
338
341
  self,
339
342
  batches: list[BackupBatch],
340
343
  ) -> None:
341
344
  """
342
- Processes batches in parallel using concurrent.futures. Will stop the processing
343
- if any one of the batches fails.
345
+ Processes batches sequentially to avoid overloading the API.
346
+ If any batch fails, the processing will stop.
344
347
  """
345
-
346
- # Create a threading flag to control the threads that have already been started
347
- stop_event = threading.Event()
348
-
349
- with ThreadPoolExecutor(
350
- max_workers=self.config.max_workers
351
- ) as executor:
352
- # Set the futures tasks.
353
- futures = []
354
- for batch in batches:
355
- futures.append(
356
- executor.submit(
357
- self._process_batch,
358
- batch,
359
- stop_event,
360
- )
361
- )
362
-
363
- # Process futures as they complete
364
- for future in as_completed(futures):
365
- try:
366
- future.result()
367
- except Exception:
368
- # On failure, set the flag to True - signal running processes to stop
369
- stop_event.set()
370
-
371
- # Cancel unstarted threads
372
- for f in futures:
373
- if not f.done():
374
- f.cancel()
375
-
376
- raise
348
+ for i, batch in enumerate(batches, 1):
349
+ self.logger.info(f"Processing batch {i}/{len(batches)}...")
350
+ self._process_batch(batch)
377
351
 
378
352
  def backup_workspaces(
379
353
  self,
@@ -440,7 +414,7 @@ class BackupManager:
440
414
  f"Exporting {len(workspaces_to_export)} workspaces in {len(batches)} batches."
441
415
  )
442
416
 
443
- self._process_batches_in_parallel(batches)
417
+ self._process_batches(batches)
444
418
 
445
419
  self.logger.info("Backup completed")
446
420
  except Exception as e:
@@ -21,19 +21,15 @@ class DirNames:
21
21
  UDF = "user_data_filters"
22
22
 
23
23
 
24
- @dataclass(frozen=True)
25
- class ConcurrencyDefaults:
26
- MAX_WORKERS = 1
27
- DEFAULT_BATCH_SIZE = 100
28
-
29
-
30
24
  @dataclass(frozen=True)
31
25
  class ApiDefaults:
32
26
  DEFAULT_PAGE_SIZE = 100
27
+ DEFAULT_BATCH_SIZE = 100
28
+ DEFAULT_API_CALLS_PER_SECOND = 1.0
33
29
 
34
30
 
35
31
  @dataclass(frozen=True)
36
- class BackupSettings(ConcurrencyDefaults, ApiDefaults):
32
+ class BackupSettings(ApiDefaults):
37
33
  MAX_RETRIES = 3
38
34
  RETRY_DELAY = 5 # seconds
39
35
  TIMESTAMP_SDK_FOLDER = (
@@ -83,14 +83,13 @@ class BackupRestoreConfig(BaseModel):
83
83
  description="Batch size must be greater than 0",
84
84
  ),
85
85
  ] = Field(default=BackupSettings.DEFAULT_BATCH_SIZE)
86
- max_workers: Annotated[
87
- int,
86
+ api_calls_per_second: Annotated[
87
+ float,
88
88
  Field(
89
89
  gt=0,
90
- lt=3,
91
- description="Max workers must be greater than 0 and less than 3",
90
+ description="Maximum API calls per second (rate limiting)",
92
91
  ),
93
- ] = Field(default=BackupSettings.MAX_WORKERS)
92
+ ] = Field(default=BackupSettings.DEFAULT_API_CALLS_PER_SECOND)
94
93
 
95
94
  @classmethod
96
95
  def from_yaml(cls, conf_path: str) -> "BackupRestoreConfig":
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation
@@ -0,0 +1,286 @@
1
+ # (C) 2025 GoodData Corporation
2
+ """Module for processing validated custom datasets and fields data.
3
+
4
+ This module is responsible for converting validated custom datasets and fields
5
+ into objects defined in the GoodData Python SDK.
6
+ """
7
+
8
+ from gooddata_sdk.catalog.identifier import (
9
+ CatalogDatasetWorkspaceDataFilterIdentifier,
10
+ CatalogGrainIdentifier,
11
+ CatalogReferenceIdentifier,
12
+ )
13
+ from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.data_filter_references import (
14
+ CatalogDeclarativeWorkspaceDataFilterReferences,
15
+ )
16
+ from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.dataset.dataset import (
17
+ CatalogDataSourceTableIdentifier,
18
+ CatalogDeclarativeAttribute,
19
+ CatalogDeclarativeDataset,
20
+ CatalogDeclarativeDatasetSql,
21
+ CatalogDeclarativeFact,
22
+ CatalogDeclarativeReference,
23
+ CatalogDeclarativeReferenceSource,
24
+ CatalogDeclarativeWorkspaceDataFilterColumn,
25
+ )
26
+ from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.date_dataset.date_dataset import (
27
+ CatalogDeclarativeDateDataset,
28
+ CatalogGranularitiesFormatting,
29
+ )
30
+ from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import (
31
+ CatalogDeclarativeLdm,
32
+ CatalogDeclarativeModel,
33
+ )
34
+
35
+ from gooddata_pipelines.ldm_extension.models.aliases import DatasetId
36
+ from gooddata_pipelines.ldm_extension.models.custom_data_object import (
37
+ ColumnDataType,
38
+ CustomDataset,
39
+ CustomFieldDefinition,
40
+ CustomFieldType,
41
+ )
42
+
43
+
44
+ class LdmExtensionDataProcessor:
45
+ """Create GoodData LDM from validated custom datasets and fields."""
46
+
47
+ DATE_GRANULARITIES: list[str] = [
48
+ "MINUTE",
49
+ "HOUR",
50
+ "DAY",
51
+ "WEEK",
52
+ "MONTH",
53
+ "QUARTER",
54
+ "YEAR",
55
+ "MINUTE_OF_HOUR",
56
+ "HOUR_OF_DAY",
57
+ "DAY_OF_WEEK",
58
+ "DAY_OF_MONTH",
59
+ "DAY_OF_YEAR",
60
+ "WEEK_OF_YEAR",
61
+ "MONTH_OF_YEAR",
62
+ "QUARTER_OF_YEAR",
63
+ ]
64
+
65
+ @staticmethod
66
+ def _attribute_from_field(
67
+ dataset_name: str,
68
+ custom_field: CustomFieldDefinition,
69
+ ) -> CatalogDeclarativeAttribute:
70
+ """Assign a declarative attribute from a custom field definition."""
71
+ return CatalogDeclarativeAttribute(
72
+ id=custom_field.custom_field_id,
73
+ title=custom_field.custom_field_name,
74
+ source_column=custom_field.custom_field_source_column,
75
+ labels=[],
76
+ source_column_data_type=custom_field.custom_field_source_column_data_type.value,
77
+ tags=[dataset_name],
78
+ )
79
+
80
+ @staticmethod
81
+ def _fact_from_field(
82
+ dataset_name: str,
83
+ custom_field: CustomFieldDefinition,
84
+ ) -> CatalogDeclarativeFact:
85
+ """Assign a declarative fact from a custom field definition."""
86
+ return CatalogDeclarativeFact(
87
+ id=custom_field.custom_field_id,
88
+ title=custom_field.custom_field_name,
89
+ source_column=custom_field.custom_field_source_column,
90
+ source_column_data_type=custom_field.custom_field_source_column_data_type.value,
91
+ tags=[dataset_name],
92
+ )
93
+
94
+ def _date_from_field(
95
+ self,
96
+ dataset_name: str,
97
+ custom_field: CustomFieldDefinition,
98
+ ) -> CatalogDeclarativeDateDataset:
99
+ """Assign a declarative date dataset from a custom field definition."""
100
+
101
+ return CatalogDeclarativeDateDataset(
102
+ id=custom_field.custom_field_id,
103
+ title=custom_field.custom_field_name,
104
+ granularities_formatting=CatalogGranularitiesFormatting(
105
+ title_base="",
106
+ title_pattern="%titleBase - %granularityTitle",
107
+ ),
108
+ granularities=self.DATE_GRANULARITIES,
109
+ tags=[dataset_name],
110
+ )
111
+
112
+ @staticmethod
113
+ def _date_ref_from_field(
114
+ custom_field: CustomFieldDefinition,
115
+ ) -> CatalogDeclarativeReference:
116
+ """Create a date reference from a custom field definition."""
117
+ return CatalogDeclarativeReference(
118
+ identifier=CatalogReferenceIdentifier(
119
+ id=custom_field.custom_field_id
120
+ ),
121
+ multivalue=False,
122
+ sources=[
123
+ CatalogDeclarativeReferenceSource(
124
+ column=custom_field.custom_field_source_column,
125
+ target=CatalogGrainIdentifier(
126
+ id=custom_field.custom_field_id,
127
+ type=CustomFieldType.DATE.value,
128
+ ),
129
+ data_type=custom_field.custom_field_source_column_data_type.value,
130
+ )
131
+ ],
132
+ )
133
+
134
+ @staticmethod
135
+ def _get_sources(
136
+ dataset: CustomDataset,
137
+ ) -> tuple[
138
+ CatalogDataSourceTableIdentifier | None,
139
+ CatalogDeclarativeDatasetSql | None,
140
+ ]:
141
+ """Get the data source table and SQL from the dataset definition."""
142
+ # We will have either a table id or a sql statement. Let's store
143
+ # whatever data is available to variables and pass it to the
144
+ # dataset. Both can be object instances or None, but at least one
145
+ # should be valid as per prior validation.
146
+ dataset_source_table_id = (
147
+ CatalogDataSourceTableIdentifier(
148
+ id=dataset.definition.dataset_source_table,
149
+ data_source_id=dataset.definition.dataset_datasource_id,
150
+ path=[dataset.definition.dataset_source_table],
151
+ )
152
+ if dataset.definition.dataset_source_table
153
+ else None
154
+ )
155
+
156
+ dataset_sql = (
157
+ CatalogDeclarativeDatasetSql(
158
+ statement=dataset.definition.dataset_source_sql,
159
+ data_source_id=dataset.definition.dataset_datasource_id,
160
+ )
161
+ if dataset.definition.dataset_source_sql
162
+ else None
163
+ )
164
+ return dataset_source_table_id, dataset_sql
165
+
166
+ def datasets_to_ldm(
167
+ self, datasets: dict[DatasetId, CustomDataset]
168
+ ) -> CatalogDeclarativeModel:
169
+ """Convert validated datasets to GoodData declarative model.
170
+
171
+ Args:
172
+ datasets (dict[DatasetId, CustomDataset]): Dictionary of validated
173
+ datasets.
174
+ Returns:
175
+ CatalogDeclarativeModel: GoodData declarative model representation
176
+ of the datasets.
177
+ """
178
+
179
+ declarative_datasets: list[CatalogDeclarativeDataset] = []
180
+
181
+ # Date dimensions are not stored in a dataset, but as a separate datasets
182
+ # in `date_instances` object on the LDM
183
+ date_instances: list[CatalogDeclarativeDateDataset] = []
184
+
185
+ for dataset in datasets.values():
186
+ date_references: list[CatalogDeclarativeReference] = []
187
+ attributes: list[CatalogDeclarativeAttribute] = []
188
+ facts: list[CatalogDeclarativeFact] = []
189
+
190
+ # Iterate through the custom fields and create the appropriate objects
191
+ for custom_field in dataset.custom_fields:
192
+ if custom_field.custom_field_type == CustomFieldType.ATTRIBUTE:
193
+ attributes.append(
194
+ self._attribute_from_field(
195
+ dataset.definition.dataset_name, custom_field
196
+ )
197
+ )
198
+
199
+ elif custom_field.custom_field_type == CustomFieldType.FACT:
200
+ facts.append(
201
+ self._fact_from_field(
202
+ dataset.definition.dataset_name, custom_field
203
+ )
204
+ )
205
+
206
+ # Process date dimensions and store them to date_instances. Date
207
+ # dimensions are not stored in a dataset, but as a separate dataset.
208
+ # However, they need to be referenced in the dataset references to
209
+ # create the connection between the dataset and the date dimension
210
+ # in the GoodData Logical Data Model.
211
+ elif custom_field.custom_field_type == CustomFieldType.DATE:
212
+ # Add the date dimension to the date_instances
213
+ date_instances.append(
214
+ self._date_from_field(
215
+ dataset.definition.dataset_name, custom_field
216
+ )
217
+ )
218
+
219
+ # Create a reference so that the date dimension is connected
220
+ # to the dataset in the GoodData Logical Data Model.
221
+ date_references.append(
222
+ self._date_ref_from_field(custom_field)
223
+ )
224
+
225
+ else:
226
+ raise ValueError(
227
+ f"Unsupported custom field type: {custom_field.custom_field_type}"
228
+ )
229
+
230
+ # Get the data source info
231
+ dataset_source_table_id, dataset_sql = self._get_sources(dataset)
232
+
233
+ # Construct the declarative dataset object and append it to the list.
234
+ declarative_datasets.append(
235
+ CatalogDeclarativeDataset(
236
+ id=dataset.definition.dataset_id,
237
+ title=dataset.definition.dataset_name,
238
+ grain=[],
239
+ references=[
240
+ CatalogDeclarativeReference(
241
+ identifier=CatalogReferenceIdentifier(
242
+ id=dataset.definition.parent_dataset_reference,
243
+ ),
244
+ multivalue=True,
245
+ sources=[
246
+ CatalogDeclarativeReferenceSource(
247
+ column=dataset.definition.dataset_reference_source_column,
248
+ data_type=dataset.definition.dataset_reference_source_column_data_type.value,
249
+ target=CatalogGrainIdentifier(
250
+ id=dataset.definition.parent_dataset_reference_attribute_id,
251
+ type=CustomFieldType.ATTRIBUTE.value,
252
+ ),
253
+ )
254
+ ],
255
+ ),
256
+ ]
257
+ + date_references,
258
+ description=None,
259
+ attributes=attributes,
260
+ facts=facts,
261
+ data_source_table_id=dataset_source_table_id,
262
+ sql=dataset_sql,
263
+ workspace_data_filter_columns=[
264
+ CatalogDeclarativeWorkspaceDataFilterColumn(
265
+ name=dataset.definition.workspace_data_filter_column_name,
266
+ data_type=ColumnDataType.STRING.value,
267
+ )
268
+ ],
269
+ workspace_data_filter_references=[
270
+ CatalogDeclarativeWorkspaceDataFilterReferences(
271
+ filter_id=CatalogDatasetWorkspaceDataFilterIdentifier(
272
+ id=dataset.definition.workspace_data_filter_id
273
+ ),
274
+ filter_column=dataset.definition.workspace_data_filter_column_name,
275
+ filter_column_data_type=ColumnDataType.STRING.value,
276
+ )
277
+ ],
278
+ tags=[dataset.definition.dataset_name],
279
+ )
280
+ )
281
+
282
+ # Create the Logical Data Model from the datasets and the date instances.
283
+ ldm = CatalogDeclarativeLdm(
284
+ datasets=declarative_datasets, date_instances=date_instances
285
+ )
286
+ return CatalogDeclarativeModel(ldm=ldm)