acryl-datahub 1.0.0rc8__py3-none-any.whl → 1.0.0rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/METADATA +2445 -2445
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/RECORD +46 -42
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/ingestion/graph/client.py +15 -11
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/superset.py +158 -24
- datahub/metadata/_schema_classes.py +157 -14
- datahub/metadata/_urns/urn_defs.py +58 -58
- datahub/metadata/schema.avsc +23 -10
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_shared.py +88 -3
- datahub/sdk/container.py +7 -1
- datahub/sdk/dataset.py +7 -1
- datahub/sdk/{_entity.py → entity.py} +4 -0
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +7 -1
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
from dataclasses import dataclass, field
|
|
3
4
|
from datetime import datetime
|
|
4
5
|
from functools import lru_cache
|
|
5
6
|
from typing import Any, Dict, Iterable, List, Optional
|
|
@@ -22,6 +23,7 @@ from datahub.emitter.mce_builder import (
|
|
|
22
23
|
make_dataset_urn,
|
|
23
24
|
make_dataset_urn_with_platform_instance,
|
|
24
25
|
make_domain_urn,
|
|
26
|
+
make_user_urn,
|
|
25
27
|
)
|
|
26
28
|
from datahub.emitter.mcp_builder import add_domain_to_entity_wu
|
|
27
29
|
from datahub.ingestion.api.common import PipelineContext
|
|
@@ -46,7 +48,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
46
48
|
StatefulIngestionSourceBase,
|
|
47
49
|
)
|
|
48
50
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
49
|
-
AuditStamp,
|
|
50
51
|
ChangeAuditStamps,
|
|
51
52
|
Status,
|
|
52
53
|
TimeStamp,
|
|
@@ -65,17 +66,22 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
65
66
|
SchemaMetadata,
|
|
66
67
|
)
|
|
67
68
|
from datahub.metadata.schema_classes import (
|
|
69
|
+
AuditStampClass,
|
|
68
70
|
ChartInfoClass,
|
|
69
71
|
ChartTypeClass,
|
|
70
72
|
DashboardInfoClass,
|
|
71
73
|
DatasetLineageTypeClass,
|
|
72
74
|
DatasetPropertiesClass,
|
|
73
75
|
GlobalTagsClass,
|
|
76
|
+
OwnerClass,
|
|
77
|
+
OwnershipClass,
|
|
78
|
+
OwnershipTypeClass,
|
|
74
79
|
TagAssociationClass,
|
|
75
80
|
UpstreamClass,
|
|
76
81
|
UpstreamLineageClass,
|
|
77
82
|
)
|
|
78
83
|
from datahub.utilities import config_clean
|
|
84
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
79
85
|
from datahub.utilities.registries.domain_registry import DomainRegistry
|
|
80
86
|
|
|
81
87
|
logger = logging.getLogger(__name__)
|
|
@@ -103,6 +109,14 @@ chart_type_from_viz_type = {
|
|
|
103
109
|
platform_without_databases = ["druid"]
|
|
104
110
|
|
|
105
111
|
|
|
112
|
+
@dataclass
|
|
113
|
+
class SupersetSourceReport(StaleEntityRemovalSourceReport):
|
|
114
|
+
filtered: LossyList[str] = field(default_factory=LossyList)
|
|
115
|
+
|
|
116
|
+
def report_dropped(self, name: str) -> None:
|
|
117
|
+
self.filtered.append(name)
|
|
118
|
+
|
|
119
|
+
|
|
106
120
|
class SupersetDataset(BaseModel):
|
|
107
121
|
id: int
|
|
108
122
|
table_name: str
|
|
@@ -138,6 +152,18 @@ class SupersetConfig(
|
|
|
138
152
|
default=dict(),
|
|
139
153
|
description="regex patterns for tables to filter to assign domain_key. ",
|
|
140
154
|
)
|
|
155
|
+
dataset_pattern: AllowDenyPattern = Field(
|
|
156
|
+
default=AllowDenyPattern.allow_all(),
|
|
157
|
+
description="Regex patterns for dataset to filter in ingestion.",
|
|
158
|
+
)
|
|
159
|
+
chart_pattern: AllowDenyPattern = Field(
|
|
160
|
+
AllowDenyPattern.allow_all(),
|
|
161
|
+
description="Patterns for selecting chart names that are to be included",
|
|
162
|
+
)
|
|
163
|
+
dashboard_pattern: AllowDenyPattern = Field(
|
|
164
|
+
AllowDenyPattern.allow_all(),
|
|
165
|
+
description="Patterns for selecting dashboard names that are to be included",
|
|
166
|
+
)
|
|
141
167
|
username: Optional[str] = Field(default=None, description="Superset username.")
|
|
142
168
|
password: Optional[str] = Field(default=None, description="Superset password.")
|
|
143
169
|
# Configuration for stateful ingestion
|
|
@@ -218,7 +244,7 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
218
244
|
"""
|
|
219
245
|
|
|
220
246
|
config: SupersetConfig
|
|
221
|
-
report:
|
|
247
|
+
report: SupersetSourceReport
|
|
222
248
|
platform = "superset"
|
|
223
249
|
|
|
224
250
|
def __hash__(self):
|
|
@@ -227,13 +253,14 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
227
253
|
def __init__(self, ctx: PipelineContext, config: SupersetConfig):
|
|
228
254
|
super().__init__(config, ctx)
|
|
229
255
|
self.config = config
|
|
230
|
-
self.report =
|
|
256
|
+
self.report = SupersetSourceReport()
|
|
231
257
|
if self.config.domain:
|
|
232
258
|
self.domain_registry = DomainRegistry(
|
|
233
259
|
cached_domains=[domain_id for domain_id in self.config.domain],
|
|
234
260
|
graph=self.ctx.graph,
|
|
235
261
|
)
|
|
236
262
|
self.session = self.login()
|
|
263
|
+
self.owner_info = self.parse_owner_info()
|
|
237
264
|
|
|
238
265
|
def login(self) -> requests.Session:
|
|
239
266
|
login_response = requests.post(
|
|
@@ -273,7 +300,7 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
273
300
|
|
|
274
301
|
while current_page * page_size < total_items:
|
|
275
302
|
response = self.session.get(
|
|
276
|
-
f"{self.config.connect_uri}/api/v1/{entity_type}
|
|
303
|
+
f"{self.config.connect_uri}/api/v1/{entity_type}",
|
|
277
304
|
params={"q": f"(page:{current_page},page_size:{page_size})"},
|
|
278
305
|
)
|
|
279
306
|
|
|
@@ -289,6 +316,25 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
289
316
|
|
|
290
317
|
current_page += 1
|
|
291
318
|
|
|
319
|
+
def parse_owner_info(self) -> Dict[str, Any]:
|
|
320
|
+
entity_types = ["dataset", "dashboard", "chart"]
|
|
321
|
+
owners_info = {}
|
|
322
|
+
|
|
323
|
+
for entity in entity_types:
|
|
324
|
+
for owner in self.paginate_entity_api_results(f"{entity}/related/owners"):
|
|
325
|
+
owner_id = owner.get("value")
|
|
326
|
+
if owner_id:
|
|
327
|
+
owners_info[owner_id] = owner.get("extra", {}).get("email", "")
|
|
328
|
+
|
|
329
|
+
return owners_info
|
|
330
|
+
|
|
331
|
+
def build_owner_urn(self, data: Dict[str, Any]) -> List[str]:
|
|
332
|
+
return [
|
|
333
|
+
make_user_urn(self.owner_info.get(owner.get("id"), ""))
|
|
334
|
+
for owner in data.get("owners", [])
|
|
335
|
+
if owner.get("id")
|
|
336
|
+
]
|
|
337
|
+
|
|
292
338
|
@lru_cache(maxsize=None)
|
|
293
339
|
def get_dataset_info(self, dataset_id: int) -> dict:
|
|
294
340
|
dataset_response = self.session.get(
|
|
@@ -346,15 +392,16 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
346
392
|
aspects=[Status(removed=False)],
|
|
347
393
|
)
|
|
348
394
|
|
|
349
|
-
modified_actor = f"urn:li:corpuser:{(dashboard_data.get('changed_by') or {}).get('
|
|
395
|
+
modified_actor = f"urn:li:corpuser:{self.owner_info.get((dashboard_data.get('changed_by') or {}).get('id', -1), 'unknown')}"
|
|
350
396
|
modified_ts = int(
|
|
351
397
|
dp.parse(dashboard_data.get("changed_on_utc", "now")).timestamp() * 1000
|
|
352
398
|
)
|
|
353
399
|
title = dashboard_data.get("dashboard_title", "")
|
|
354
400
|
# note: the API does not currently supply created_by usernames due to a bug
|
|
355
|
-
last_modified =
|
|
356
|
-
|
|
357
|
-
|
|
401
|
+
last_modified = AuditStampClass(time=modified_ts, actor=modified_actor)
|
|
402
|
+
|
|
403
|
+
change_audit_stamps = ChangeAuditStamps(
|
|
404
|
+
created=None, lastModified=last_modified
|
|
358
405
|
)
|
|
359
406
|
dashboard_url = f"{self.config.display_uri}{dashboard_data.get('url', '')}"
|
|
360
407
|
|
|
@@ -380,7 +427,7 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
380
427
|
"IsPublished": str(dashboard_data.get("published", False)).lower(),
|
|
381
428
|
"Owners": ", ".join(
|
|
382
429
|
map(
|
|
383
|
-
lambda owner: owner.get("
|
|
430
|
+
lambda owner: self.owner_info.get(owner.get("id", -1), "unknown"),
|
|
384
431
|
dashboard_data.get("owners", []),
|
|
385
432
|
)
|
|
386
433
|
),
|
|
@@ -400,16 +447,39 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
400
447
|
description="",
|
|
401
448
|
title=title,
|
|
402
449
|
charts=chart_urns,
|
|
403
|
-
lastModified=last_modified,
|
|
404
450
|
dashboardUrl=dashboard_url,
|
|
405
451
|
customProperties=custom_properties,
|
|
452
|
+
lastModified=change_audit_stamps,
|
|
406
453
|
)
|
|
407
454
|
dashboard_snapshot.aspects.append(dashboard_info)
|
|
455
|
+
|
|
456
|
+
dashboard_owners_list = self.build_owner_urn(dashboard_data)
|
|
457
|
+
owners_info = OwnershipClass(
|
|
458
|
+
owners=[
|
|
459
|
+
OwnerClass(
|
|
460
|
+
owner=urn,
|
|
461
|
+
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
462
|
+
)
|
|
463
|
+
for urn in (dashboard_owners_list or [])
|
|
464
|
+
],
|
|
465
|
+
lastModified=last_modified,
|
|
466
|
+
)
|
|
467
|
+
dashboard_snapshot.aspects.append(owners_info)
|
|
468
|
+
|
|
408
469
|
return dashboard_snapshot
|
|
409
470
|
|
|
410
471
|
def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:
|
|
411
|
-
for dashboard_data in self.paginate_entity_api_results("dashboard", PAGE_SIZE):
|
|
472
|
+
for dashboard_data in self.paginate_entity_api_results("dashboard/", PAGE_SIZE):
|
|
412
473
|
try:
|
|
474
|
+
dashboard_id = str(dashboard_data.get("id"))
|
|
475
|
+
dashboard_title = dashboard_data.get("dashboard_title", "")
|
|
476
|
+
|
|
477
|
+
if not self.config.dashboard_pattern.allowed(dashboard_title):
|
|
478
|
+
self.report.report_dropped(
|
|
479
|
+
f"Dashboard '{dashboard_title}' (id: {dashboard_id}) filtered by dashboard_pattern"
|
|
480
|
+
)
|
|
481
|
+
continue
|
|
482
|
+
|
|
413
483
|
dashboard_snapshot = self.construct_dashboard_from_api_data(
|
|
414
484
|
dashboard_data
|
|
415
485
|
)
|
|
@@ -422,7 +492,7 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
422
492
|
mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
|
|
423
493
|
yield MetadataWorkUnit(id=dashboard_snapshot.urn, mce=mce)
|
|
424
494
|
yield from self._get_domain_wu(
|
|
425
|
-
title=
|
|
495
|
+
title=dashboard_title,
|
|
426
496
|
entity_urn=dashboard_snapshot.urn,
|
|
427
497
|
)
|
|
428
498
|
|
|
@@ -437,17 +507,19 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
437
507
|
aspects=[Status(removed=False)],
|
|
438
508
|
)
|
|
439
509
|
|
|
440
|
-
modified_actor = f"urn:li:corpuser:{(chart_data.get('changed_by') or {}).get('
|
|
510
|
+
modified_actor = f"urn:li:corpuser:{self.owner_info.get((chart_data.get('changed_by') or {}).get('id', -1), 'unknown')}"
|
|
441
511
|
modified_ts = int(
|
|
442
512
|
dp.parse(chart_data.get("changed_on_utc", "now")).timestamp() * 1000
|
|
443
513
|
)
|
|
444
514
|
title = chart_data.get("slice_name", "")
|
|
445
515
|
|
|
446
516
|
# note: the API does not currently supply created_by usernames due to a bug
|
|
447
|
-
last_modified =
|
|
448
|
-
|
|
449
|
-
|
|
517
|
+
last_modified = AuditStampClass(time=modified_ts, actor=modified_actor)
|
|
518
|
+
|
|
519
|
+
change_audit_stamps = ChangeAuditStamps(
|
|
520
|
+
created=None, lastModified=last_modified
|
|
450
521
|
)
|
|
522
|
+
|
|
451
523
|
chart_type = chart_type_from_viz_type.get(chart_data.get("viz_type", ""))
|
|
452
524
|
chart_url = f"{self.config.display_uri}{chart_data.get('url', '')}"
|
|
453
525
|
|
|
@@ -504,23 +576,61 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
504
576
|
type=chart_type,
|
|
505
577
|
description="",
|
|
506
578
|
title=title,
|
|
507
|
-
lastModified=last_modified,
|
|
508
579
|
chartUrl=chart_url,
|
|
509
580
|
inputs=[datasource_urn] if datasource_urn else None,
|
|
510
581
|
customProperties=custom_properties,
|
|
582
|
+
lastModified=change_audit_stamps,
|
|
511
583
|
)
|
|
512
584
|
chart_snapshot.aspects.append(chart_info)
|
|
585
|
+
|
|
586
|
+
chart_owners_list = self.build_owner_urn(chart_data)
|
|
587
|
+
owners_info = OwnershipClass(
|
|
588
|
+
owners=[
|
|
589
|
+
OwnerClass(
|
|
590
|
+
owner=urn,
|
|
591
|
+
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
592
|
+
)
|
|
593
|
+
for urn in (chart_owners_list or [])
|
|
594
|
+
],
|
|
595
|
+
lastModified=last_modified,
|
|
596
|
+
)
|
|
597
|
+
chart_snapshot.aspects.append(owners_info)
|
|
513
598
|
return chart_snapshot
|
|
514
599
|
|
|
515
600
|
def emit_chart_mces(self) -> Iterable[MetadataWorkUnit]:
|
|
516
|
-
for chart_data in self.paginate_entity_api_results("chart", PAGE_SIZE):
|
|
601
|
+
for chart_data in self.paginate_entity_api_results("chart/", PAGE_SIZE):
|
|
517
602
|
try:
|
|
603
|
+
chart_id = str(chart_data.get("id"))
|
|
604
|
+
chart_name = chart_data.get("slice_name", "")
|
|
605
|
+
|
|
606
|
+
if not self.config.chart_pattern.allowed(chart_name):
|
|
607
|
+
self.report.report_dropped(
|
|
608
|
+
f"Chart '{chart_name}' (id: {chart_id}) filtered by chart_pattern"
|
|
609
|
+
)
|
|
610
|
+
continue
|
|
611
|
+
|
|
612
|
+
# Emit a warning if charts use data from a dataset that will be filtered out
|
|
613
|
+
if self.config.dataset_pattern != AllowDenyPattern.allow_all():
|
|
614
|
+
datasource_id = chart_data.get("datasource_id")
|
|
615
|
+
if datasource_id:
|
|
616
|
+
dataset_response = self.get_dataset_info(datasource_id)
|
|
617
|
+
dataset_name = dataset_response.get("result", {}).get(
|
|
618
|
+
"table_name", ""
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
if dataset_name and not self.config.dataset_pattern.allowed(
|
|
622
|
+
dataset_name
|
|
623
|
+
):
|
|
624
|
+
self.report.warning(
|
|
625
|
+
f"Chart '{chart_name}' (id: {chart_id}) uses dataset '{dataset_name}' which is filtered by dataset_pattern"
|
|
626
|
+
)
|
|
627
|
+
|
|
518
628
|
chart_snapshot = self.construct_chart_from_chart_data(chart_data)
|
|
519
629
|
|
|
520
630
|
mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
|
|
521
631
|
except Exception as e:
|
|
522
632
|
self.report.warning(
|
|
523
|
-
f"Failed to construct chart snapshot. Chart name: {
|
|
633
|
+
f"Failed to construct chart snapshot. Chart name: {chart_name}. Error: \n{e}"
|
|
524
634
|
)
|
|
525
635
|
continue
|
|
526
636
|
# Emit the chart
|
|
@@ -583,6 +693,12 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
583
693
|
)
|
|
584
694
|
dataset_url = f"{self.config.display_uri}{dataset_response.get('result', {}).get('url', '')}"
|
|
585
695
|
|
|
696
|
+
modified_actor = f"urn:li:corpuser:{self.owner_info.get((dataset_data.get('changed_by') or {}).get('id', -1), 'unknown')}"
|
|
697
|
+
modified_ts = int(
|
|
698
|
+
dp.parse(dataset_data.get("changed_on_utc", "now")).timestamp() * 1000
|
|
699
|
+
)
|
|
700
|
+
last_modified = AuditStampClass(time=modified_ts, actor=modified_actor)
|
|
701
|
+
|
|
586
702
|
upstream_warehouse_platform = (
|
|
587
703
|
dataset_response.get("result", {}).get("database", {}).get("backend")
|
|
588
704
|
)
|
|
@@ -618,10 +734,8 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
618
734
|
dataset_info = DatasetPropertiesClass(
|
|
619
735
|
name=dataset.table_name,
|
|
620
736
|
description="",
|
|
621
|
-
lastModified=(
|
|
622
|
-
TimeStamp(time=dataset.modified_ts) if dataset.modified_ts else None
|
|
623
|
-
),
|
|
624
737
|
externalUrl=dataset_url,
|
|
738
|
+
lastModified=TimeStamp(time=modified_ts),
|
|
625
739
|
)
|
|
626
740
|
global_tags = GlobalTagsClass(tags=[TagAssociationClass(tag=tag_urn)])
|
|
627
741
|
|
|
@@ -640,13 +754,33 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
640
754
|
aspects=aspects_items,
|
|
641
755
|
)
|
|
642
756
|
|
|
643
|
-
|
|
757
|
+
dataset_owners_list = self.build_owner_urn(dataset_data)
|
|
758
|
+
owners_info = OwnershipClass(
|
|
759
|
+
owners=[
|
|
760
|
+
OwnerClass(
|
|
761
|
+
owner=urn,
|
|
762
|
+
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
763
|
+
)
|
|
764
|
+
for urn in (dataset_owners_list or [])
|
|
765
|
+
],
|
|
766
|
+
lastModified=last_modified,
|
|
767
|
+
)
|
|
768
|
+
aspects_items.append(owners_info)
|
|
644
769
|
|
|
645
770
|
return dataset_snapshot
|
|
646
771
|
|
|
647
772
|
def emit_dataset_mces(self) -> Iterable[MetadataWorkUnit]:
|
|
648
|
-
for dataset_data in self.paginate_entity_api_results("dataset", PAGE_SIZE):
|
|
773
|
+
for dataset_data in self.paginate_entity_api_results("dataset/", PAGE_SIZE):
|
|
649
774
|
try:
|
|
775
|
+
dataset_name = dataset_data.get("table_name", "")
|
|
776
|
+
|
|
777
|
+
# Check if dataset should be filtered by dataset name
|
|
778
|
+
if not self.config.dataset_pattern.allowed(dataset_name):
|
|
779
|
+
self.report.report_dropped(
|
|
780
|
+
f"Dataset '{dataset_name}' filtered by dataset_pattern"
|
|
781
|
+
)
|
|
782
|
+
continue
|
|
783
|
+
|
|
650
784
|
dataset_snapshot = self.construct_dataset_from_dataset_data(
|
|
651
785
|
dataset_data
|
|
652
786
|
)
|