acryl-datahub 0.15.0.2rc4__py3-none-any.whl → 0.15.0.2rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/METADATA +2578 -2578
- {acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/RECORD +49 -45
- datahub/__init__.py +1 -1
- datahub/cli/delete_cli.py +3 -3
- datahub/cli/migrate.py +2 -2
- datahub/emitter/mcp_builder.py +27 -0
- datahub/emitter/rest_emitter.py +1 -1
- datahub/ingestion/api/source.py +2 -2
- datahub/ingestion/source/delta_lake/source.py +0 -5
- datahub/ingestion/source/demo_data.py +1 -1
- datahub/ingestion/source/fivetran/fivetran.py +1 -6
- datahub/ingestion/source/iceberg/iceberg.py +10 -3
- datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
- datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
- datahub/ingestion/source/kafka_connect/kafka_connect.py +1 -6
- datahub/ingestion/source/metabase.py +1 -6
- datahub/ingestion/source/mlflow.py +0 -5
- datahub/ingestion/source/nifi.py +0 -5
- datahub/ingestion/source/redash.py +0 -5
- datahub/ingestion/source/redshift/redshift.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +5 -2
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +112 -20
- datahub/ingestion/source/snowflake/snowflake_tag.py +14 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +0 -6
- datahub/ingestion/source/sql/sql_types.py +1 -1
- datahub/ingestion/source/sql/sql_utils.py +5 -0
- datahub/ingestion/source/superset.py +1 -6
- datahub/ingestion/source/tableau/tableau.py +0 -6
- datahub/metadata/_schema_classes.py +314 -41
- datahub/metadata/_urns/urn_defs.py +54 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
- datahub/metadata/schema.avsc +296 -87
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/metadata/schemas/MLModelProperties.avsc +96 -48
- datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
- datahub/metadata/schemas/VersionProperties.avsc +216 -0
- datahub/metadata/schemas/VersionSetKey.avsc +26 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
- {acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
17
17
|
platform_name,
|
|
18
18
|
support_status,
|
|
19
19
|
)
|
|
20
|
-
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
|
|
20
|
+
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
|
|
21
21
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
22
22
|
from datahub.ingestion.source.kafka_connect.common import (
|
|
23
23
|
CONNECTOR_CLASS,
|
|
@@ -94,11 +94,6 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
94
94
|
if not jpype.isJVMStarted():
|
|
95
95
|
jpype.startJVM()
|
|
96
96
|
|
|
97
|
-
@classmethod
|
|
98
|
-
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
|
99
|
-
config = KafkaConnectSourceConfig.parse_obj(config_dict)
|
|
100
|
-
return cls(config, ctx)
|
|
101
|
-
|
|
102
97
|
def get_connectors_manifest(self) -> Iterable[ConnectorManifest]:
|
|
103
98
|
"""Get Kafka Connect connectors manifest using REST API.
|
|
104
99
|
Enrich with lineages metadata.
|
|
@@ -23,7 +23,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
23
23
|
platform_name,
|
|
24
24
|
support_status,
|
|
25
25
|
)
|
|
26
|
-
from datahub.ingestion.api.source import MetadataWorkUnitProcessor,
|
|
26
|
+
from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
|
|
27
27
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
28
28
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
29
29
|
StaleEntityRemovalHandler,
|
|
@@ -789,11 +789,6 @@ class MetabaseSource(StatefulIngestionSourceBase):
|
|
|
789
789
|
|
|
790
790
|
return platform, dbname, schema, platform_instance
|
|
791
791
|
|
|
792
|
-
@classmethod
|
|
793
|
-
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
|
794
|
-
config = MetabaseConfig.parse_obj(config_dict)
|
|
795
|
-
return cls(ctx, config)
|
|
796
|
-
|
|
797
792
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
798
793
|
return [
|
|
799
794
|
*super().get_workunit_processors(),
|
datahub/ingestion/source/nifi.py
CHANGED
|
@@ -484,11 +484,6 @@ class NifiSource(Source):
|
|
|
484
484
|
def rest_api_base_url(self):
|
|
485
485
|
return self.config.site_url[: -len("nifi/")] + "nifi-api/"
|
|
486
486
|
|
|
487
|
-
@classmethod
|
|
488
|
-
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
|
|
489
|
-
config = NifiSourceConfig.parse_obj(config_dict)
|
|
490
|
-
return cls(config, ctx)
|
|
491
|
-
|
|
492
487
|
def get_report(self) -> SourceReport:
|
|
493
488
|
return self.report
|
|
494
489
|
|
|
@@ -369,11 +369,6 @@ class RedashSource(Source):
|
|
|
369
369
|
else:
|
|
370
370
|
raise ValueError(f"Failed to connect to {self.config.connect_uri}/api")
|
|
371
371
|
|
|
372
|
-
@classmethod
|
|
373
|
-
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
|
374
|
-
config = RedashConfig.parse_obj(config_dict)
|
|
375
|
-
return cls(ctx, config)
|
|
376
|
-
|
|
377
372
|
def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict:
|
|
378
373
|
url = f"/api/data_sources/{data_source_id}"
|
|
379
374
|
resp = self.client._get(url).json()
|
|
@@ -244,6 +244,11 @@ class SnowflakeV2Config(
|
|
|
244
244
|
description="""Optional. Allowed values are `without_lineage`, `with_lineage`, and `skip` (default). `without_lineage` only extracts tags that have been applied directly to the given entity. `with_lineage` extracts both directly applied and propagated tags, but will be significantly slower. See the [Snowflake documentation](https://docs.snowflake.com/en/user-guide/object-tagging.html#tag-lineage) for information about tag lineage/propagation. """,
|
|
245
245
|
)
|
|
246
246
|
|
|
247
|
+
extract_tags_as_structured_properties: bool = Field(
|
|
248
|
+
default=False,
|
|
249
|
+
description="If enabled along with `extract_tags`, extracts snowflake's key-value tags as DataHub structured properties instead of DataHub tags.",
|
|
250
|
+
)
|
|
251
|
+
|
|
247
252
|
include_external_url: bool = Field(
|
|
248
253
|
default=True,
|
|
249
254
|
description="Whether to populate Snowsight url for Snowflake Objects",
|
|
@@ -263,6 +268,14 @@ class SnowflakeV2Config(
|
|
|
263
268
|
description="List of regex patterns for tags to include in ingestion. Only used if `extract_tags` is enabled.",
|
|
264
269
|
)
|
|
265
270
|
|
|
271
|
+
structured_property_pattern: AllowDenyPattern = Field(
|
|
272
|
+
default=AllowDenyPattern.allow_all(),
|
|
273
|
+
description=(
|
|
274
|
+
"List of regex patterns for structured properties to include in ingestion."
|
|
275
|
+
" Only used if `extract_tags` and `extract_tags_as_structured_properties` are enabled."
|
|
276
|
+
),
|
|
277
|
+
)
|
|
278
|
+
|
|
266
279
|
# This is required since access_history table does not capture whether the table was temporary table.
|
|
267
280
|
temporary_tables_pattern: List[str] = Field(
|
|
268
281
|
default=DEFAULT_TEMP_TABLES_PATTERNS,
|
|
@@ -45,15 +45,18 @@ class SnowflakeTag:
|
|
|
45
45
|
name: str
|
|
46
46
|
value: str
|
|
47
47
|
|
|
48
|
-
def
|
|
48
|
+
def tag_display_name(self) -> str:
|
|
49
49
|
return f"{self.name}: {self.value}"
|
|
50
50
|
|
|
51
|
-
def
|
|
51
|
+
def tag_identifier(self) -> str:
|
|
52
52
|
return f"{self._id_prefix_as_str()}:{self.value}"
|
|
53
53
|
|
|
54
54
|
def _id_prefix_as_str(self) -> str:
|
|
55
55
|
return f"{self.database}.{self.schema}.{self.name}"
|
|
56
56
|
|
|
57
|
+
def structured_property_identifier(self) -> str:
|
|
58
|
+
return f"snowflake.{self.database}.{self.schema}.{self.name}"
|
|
59
|
+
|
|
57
60
|
|
|
58
61
|
@dataclass
|
|
59
62
|
class SnowflakeColumn(BaseColumn):
|
|
@@ -4,12 +4,14 @@ from typing import Dict, Iterable, List, Optional, Union
|
|
|
4
4
|
|
|
5
5
|
from datahub.configuration.pattern_utils import is_schema_allowed
|
|
6
6
|
from datahub.emitter.mce_builder import (
|
|
7
|
+
get_sys_time,
|
|
7
8
|
make_data_platform_urn,
|
|
8
9
|
make_dataset_urn_with_platform_instance,
|
|
9
10
|
make_schema_field_urn,
|
|
10
11
|
make_tag_urn,
|
|
11
12
|
)
|
|
12
13
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
14
|
+
from datahub.emitter.mcp_builder import add_structured_properties_to_entity_wu
|
|
13
15
|
from datahub.ingestion.api.source import SourceReport
|
|
14
16
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
15
17
|
from datahub.ingestion.glossary.classification_mixin import (
|
|
@@ -72,6 +74,7 @@ from datahub.ingestion.source_report.ingestion_stage import (
|
|
|
72
74
|
PROFILING,
|
|
73
75
|
)
|
|
74
76
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
77
|
+
AuditStamp,
|
|
75
78
|
GlobalTags,
|
|
76
79
|
Status,
|
|
77
80
|
SubTypes,
|
|
@@ -98,7 +101,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
98
101
|
StringType,
|
|
99
102
|
TimeType,
|
|
100
103
|
)
|
|
104
|
+
from datahub.metadata.com.linkedin.pegasus2avro.structured import (
|
|
105
|
+
StructuredPropertyDefinition,
|
|
106
|
+
)
|
|
101
107
|
from datahub.metadata.com.linkedin.pegasus2avro.tag import TagProperties
|
|
108
|
+
from datahub.metadata.urns import (
|
|
109
|
+
ContainerUrn,
|
|
110
|
+
DatasetUrn,
|
|
111
|
+
DataTypeUrn,
|
|
112
|
+
EntityTypeUrn,
|
|
113
|
+
SchemaFieldUrn,
|
|
114
|
+
StructuredPropertyUrn,
|
|
115
|
+
)
|
|
102
116
|
from datahub.sql_parsing.sql_parsing_aggregator import (
|
|
103
117
|
KnownLineageMapping,
|
|
104
118
|
SqlParsingAggregator,
|
|
@@ -673,14 +687,31 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
673
687
|
yield from self.gen_dataset_workunits(view, schema_name, db_name)
|
|
674
688
|
|
|
675
689
|
def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
|
|
676
|
-
|
|
690
|
+
use_sp = self.config.extract_tags_as_structured_properties
|
|
691
|
+
identifier = (
|
|
692
|
+
self.snowflake_identifier(tag.structured_property_identifier())
|
|
693
|
+
if use_sp
|
|
694
|
+
else tag.tag_identifier()
|
|
695
|
+
)
|
|
677
696
|
|
|
678
|
-
if self.report.is_tag_processed(
|
|
697
|
+
if self.report.is_tag_processed(identifier):
|
|
679
698
|
return
|
|
680
699
|
|
|
681
|
-
self.report.report_tag_processed(
|
|
682
|
-
|
|
683
|
-
|
|
700
|
+
self.report.report_tag_processed(identifier)
|
|
701
|
+
if use_sp:
|
|
702
|
+
yield from self.gen_tag_as_structured_property_workunits(tag)
|
|
703
|
+
else:
|
|
704
|
+
yield from self.gen_tag_workunits(tag)
|
|
705
|
+
|
|
706
|
+
def _format_tags_as_structured_properties(
|
|
707
|
+
self, tags: List[SnowflakeTag]
|
|
708
|
+
) -> Dict[StructuredPropertyUrn, str]:
|
|
709
|
+
return {
|
|
710
|
+
StructuredPropertyUrn(
|
|
711
|
+
self.snowflake_identifier(tag.structured_property_identifier())
|
|
712
|
+
): tag.value
|
|
713
|
+
for tag in tags
|
|
714
|
+
}
|
|
684
715
|
|
|
685
716
|
def gen_dataset_workunits(
|
|
686
717
|
self,
|
|
@@ -725,6 +756,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
725
756
|
env=self.config.env,
|
|
726
757
|
)
|
|
727
758
|
|
|
759
|
+
if self.config.extract_tags_as_structured_properties:
|
|
760
|
+
yield from self.gen_column_tags_as_structured_properties(dataset_urn, table)
|
|
761
|
+
|
|
728
762
|
yield from add_table_to_schema_container(
|
|
729
763
|
dataset_urn=dataset_urn,
|
|
730
764
|
parent_container_key=schema_container_key,
|
|
@@ -758,16 +792,24 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
758
792
|
)
|
|
759
793
|
|
|
760
794
|
if table.tags:
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
795
|
+
if self.config.extract_tags_as_structured_properties:
|
|
796
|
+
yield from add_structured_properties_to_entity_wu(
|
|
797
|
+
dataset_urn,
|
|
798
|
+
self._format_tags_as_structured_properties(table.tags),
|
|
764
799
|
)
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
800
|
+
else:
|
|
801
|
+
tag_associations = [
|
|
802
|
+
TagAssociation(
|
|
803
|
+
tag=make_tag_urn(
|
|
804
|
+
self.snowflake_identifier(tag.tag_identifier())
|
|
805
|
+
)
|
|
806
|
+
)
|
|
807
|
+
for tag in table.tags
|
|
808
|
+
]
|
|
809
|
+
global_tags = GlobalTags(tag_associations)
|
|
810
|
+
yield MetadataChangeProposalWrapper(
|
|
811
|
+
entityUrn=dataset_urn, aspect=global_tags
|
|
812
|
+
).as_workunit()
|
|
771
813
|
|
|
772
814
|
if isinstance(table, SnowflakeView) and table.view_definition is not None:
|
|
773
815
|
view_properties_aspect = ViewProperties(
|
|
@@ -840,10 +882,10 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
840
882
|
)
|
|
841
883
|
|
|
842
884
|
def gen_tag_workunits(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
|
|
843
|
-
tag_urn = make_tag_urn(self.snowflake_identifier(tag.
|
|
885
|
+
tag_urn = make_tag_urn(self.snowflake_identifier(tag.tag_identifier()))
|
|
844
886
|
|
|
845
887
|
tag_properties_aspect = TagProperties(
|
|
846
|
-
name=tag.
|
|
888
|
+
name=tag.tag_display_name(),
|
|
847
889
|
description=f"Represents the Snowflake tag `{tag._id_prefix_as_str()}` with value `{tag.value}`.",
|
|
848
890
|
)
|
|
849
891
|
|
|
@@ -851,6 +893,41 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
851
893
|
entityUrn=tag_urn, aspect=tag_properties_aspect
|
|
852
894
|
).as_workunit()
|
|
853
895
|
|
|
896
|
+
def gen_tag_as_structured_property_workunits(
|
|
897
|
+
self, tag: SnowflakeTag
|
|
898
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
899
|
+
identifier = self.snowflake_identifier(tag.structured_property_identifier())
|
|
900
|
+
urn = StructuredPropertyUrn(identifier).urn()
|
|
901
|
+
aspect = StructuredPropertyDefinition(
|
|
902
|
+
qualifiedName=identifier,
|
|
903
|
+
displayName=tag.name,
|
|
904
|
+
valueType=DataTypeUrn("datahub.string").urn(),
|
|
905
|
+
entityTypes=[
|
|
906
|
+
EntityTypeUrn(f"datahub.{ContainerUrn.ENTITY_TYPE}").urn(),
|
|
907
|
+
EntityTypeUrn(f"datahub.{DatasetUrn.ENTITY_TYPE}").urn(),
|
|
908
|
+
EntityTypeUrn(f"datahub.{SchemaFieldUrn.ENTITY_TYPE}").urn(),
|
|
909
|
+
],
|
|
910
|
+
lastModified=AuditStamp(
|
|
911
|
+
time=get_sys_time(), actor="urn:li:corpuser:datahub"
|
|
912
|
+
),
|
|
913
|
+
)
|
|
914
|
+
yield MetadataChangeProposalWrapper(
|
|
915
|
+
entityUrn=urn,
|
|
916
|
+
aspect=aspect,
|
|
917
|
+
).as_workunit()
|
|
918
|
+
|
|
919
|
+
def gen_column_tags_as_structured_properties(
|
|
920
|
+
self, dataset_urn: str, table: Union[SnowflakeTable, SnowflakeView]
|
|
921
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
922
|
+
for column_name in table.column_tags:
|
|
923
|
+
schema_field_urn = SchemaFieldUrn(dataset_urn, column_name).urn()
|
|
924
|
+
yield from add_structured_properties_to_entity_wu(
|
|
925
|
+
schema_field_urn,
|
|
926
|
+
self._format_tags_as_structured_properties(
|
|
927
|
+
table.column_tags[column_name]
|
|
928
|
+
),
|
|
929
|
+
)
|
|
930
|
+
|
|
854
931
|
def gen_schema_metadata(
|
|
855
932
|
self,
|
|
856
933
|
table: Union[SnowflakeTable, SnowflakeView],
|
|
@@ -892,13 +969,14 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
892
969
|
[
|
|
893
970
|
TagAssociation(
|
|
894
971
|
make_tag_urn(
|
|
895
|
-
self.snowflake_identifier(tag.
|
|
972
|
+
self.snowflake_identifier(tag.tag_identifier())
|
|
896
973
|
)
|
|
897
974
|
)
|
|
898
975
|
for tag in table.column_tags[col.name]
|
|
899
976
|
]
|
|
900
977
|
)
|
|
901
978
|
if col.name in table.column_tags
|
|
979
|
+
and not self.config.extract_tags_as_structured_properties
|
|
902
980
|
else None
|
|
903
981
|
),
|
|
904
982
|
)
|
|
@@ -985,8 +1063,17 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
985
1063
|
)
|
|
986
1064
|
),
|
|
987
1065
|
tags=(
|
|
988
|
-
[
|
|
1066
|
+
[
|
|
1067
|
+
self.snowflake_identifier(tag.tag_identifier())
|
|
1068
|
+
for tag in database.tags
|
|
1069
|
+
]
|
|
989
1070
|
if database.tags
|
|
1071
|
+
and not self.config.extract_tags_as_structured_properties
|
|
1072
|
+
else None
|
|
1073
|
+
),
|
|
1074
|
+
structured_properties=(
|
|
1075
|
+
self._format_tags_as_structured_properties(database.tags)
|
|
1076
|
+
if database.tags and self.config.extract_tags_as_structured_properties
|
|
990
1077
|
else None
|
|
991
1078
|
),
|
|
992
1079
|
)
|
|
@@ -1038,8 +1125,13 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
1038
1125
|
else None
|
|
1039
1126
|
),
|
|
1040
1127
|
tags=(
|
|
1041
|
-
[self.snowflake_identifier(tag.
|
|
1042
|
-
if schema.tags
|
|
1128
|
+
[self.snowflake_identifier(tag.tag_identifier()) for tag in schema.tags]
|
|
1129
|
+
if schema.tags and not self.config.extract_tags_as_structured_properties
|
|
1130
|
+
else None
|
|
1131
|
+
),
|
|
1132
|
+
structured_properties=(
|
|
1133
|
+
self._format_tags_as_structured_properties(schema.tags)
|
|
1134
|
+
if schema.tags and self.config.extract_tags_as_structured_properties
|
|
1043
1135
|
else None
|
|
1044
1136
|
),
|
|
1045
1137
|
)
|
|
@@ -165,10 +165,20 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
|
|
|
165
165
|
|
|
166
166
|
allowed_tags = []
|
|
167
167
|
for tag in tags:
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
168
|
+
identifier = (
|
|
169
|
+
tag._id_prefix_as_str()
|
|
170
|
+
if self.config.extract_tags_as_structured_properties
|
|
171
|
+
else tag.tag_identifier()
|
|
172
|
+
)
|
|
173
|
+
self.report.report_entity_scanned(identifier, "tag")
|
|
174
|
+
|
|
175
|
+
pattern = (
|
|
176
|
+
self.config.structured_property_pattern
|
|
177
|
+
if self.config.extract_tags_as_structured_properties
|
|
178
|
+
else self.config.tag_pattern
|
|
179
|
+
)
|
|
180
|
+
if not pattern.allowed(identifier):
|
|
181
|
+
self.report.report_dropped(identifier)
|
|
172
182
|
else:
|
|
173
183
|
allowed_tags.append(tag)
|
|
174
184
|
return allowed_tags
|
|
@@ -23,7 +23,6 @@ from datahub.ingestion.api.incremental_properties_helper import (
|
|
|
23
23
|
from datahub.ingestion.api.source import (
|
|
24
24
|
CapabilityReport,
|
|
25
25
|
MetadataWorkUnitProcessor,
|
|
26
|
-
Source,
|
|
27
26
|
SourceCapability,
|
|
28
27
|
SourceReport,
|
|
29
28
|
TestableSource,
|
|
@@ -251,11 +250,6 @@ class SnowflakeV2Source(
|
|
|
251
250
|
|
|
252
251
|
self.add_config_to_report()
|
|
253
252
|
|
|
254
|
-
@classmethod
|
|
255
|
-
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
|
|
256
|
-
config = SnowflakeV2Config.parse_obj(config_dict)
|
|
257
|
-
return cls(ctx, config)
|
|
258
|
-
|
|
259
253
|
@staticmethod
|
|
260
254
|
def test_connection(config_dict: dict) -> TestConnectionReport:
|
|
261
255
|
test_report = TestConnectionReport()
|
|
@@ -20,6 +20,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
20
20
|
from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
|
|
21
21
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
|
|
22
22
|
from datahub.metadata.schema_classes import DataPlatformInstanceClass
|
|
23
|
+
from datahub.metadata.urns import StructuredPropertyUrn
|
|
23
24
|
from datahub.utilities.registries.domain_registry import DomainRegistry
|
|
24
25
|
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
|
25
26
|
|
|
@@ -75,6 +76,7 @@ def gen_schema_container(
|
|
|
75
76
|
created: Optional[int] = None,
|
|
76
77
|
last_modified: Optional[int] = None,
|
|
77
78
|
extra_properties: Optional[Dict[str, str]] = None,
|
|
79
|
+
structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None,
|
|
78
80
|
) -> Iterable[MetadataWorkUnit]:
|
|
79
81
|
domain_urn: Optional[str] = None
|
|
80
82
|
if domain_registry:
|
|
@@ -99,6 +101,7 @@ def gen_schema_container(
|
|
|
99
101
|
owner_urn=owner_urn,
|
|
100
102
|
qualified_name=qualified_name,
|
|
101
103
|
extra_properties=extra_properties,
|
|
104
|
+
structured_properties=structured_properties,
|
|
102
105
|
)
|
|
103
106
|
|
|
104
107
|
|
|
@@ -133,6 +136,7 @@ def gen_database_container(
|
|
|
133
136
|
created: Optional[int] = None,
|
|
134
137
|
last_modified: Optional[int] = None,
|
|
135
138
|
extra_properties: Optional[Dict[str, str]] = None,
|
|
139
|
+
structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None,
|
|
136
140
|
) -> Iterable[MetadataWorkUnit]:
|
|
137
141
|
domain_urn: Optional[str] = None
|
|
138
142
|
if domain_registry:
|
|
@@ -154,6 +158,7 @@ def gen_database_container(
|
|
|
154
158
|
owner_urn=owner_urn,
|
|
155
159
|
qualified_name=qualified_name,
|
|
156
160
|
extra_properties=extra_properties,
|
|
161
|
+
structured_properties=structured_properties,
|
|
157
162
|
)
|
|
158
163
|
|
|
159
164
|
|
|
@@ -33,7 +33,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
33
33
|
platform_name,
|
|
34
34
|
support_status,
|
|
35
35
|
)
|
|
36
|
-
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
|
|
36
|
+
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
|
|
37
37
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
38
38
|
from datahub.ingestion.source.sql.sql_types import resolve_sql_type
|
|
39
39
|
from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
|
|
@@ -265,11 +265,6 @@ class SupersetSource(StatefulIngestionSourceBase):
|
|
|
265
265
|
# TODO(Gabe): how should we message about this error?
|
|
266
266
|
return requests_session
|
|
267
267
|
|
|
268
|
-
@classmethod
|
|
269
|
-
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
|
270
|
-
config = SupersetConfig.parse_obj(config_dict)
|
|
271
|
-
return cls(ctx, config)
|
|
272
|
-
|
|
273
268
|
def paginate_entity_api_results(self, entity_type, page_size=100):
|
|
274
269
|
current_page = 0
|
|
275
270
|
total_items = page_size
|
|
@@ -71,7 +71,6 @@ from datahub.ingestion.api.decorators import (
|
|
|
71
71
|
from datahub.ingestion.api.source import (
|
|
72
72
|
CapabilityReport,
|
|
73
73
|
MetadataWorkUnitProcessor,
|
|
74
|
-
Source,
|
|
75
74
|
StructuredLogLevel,
|
|
76
75
|
TestableSource,
|
|
77
76
|
TestConnectionReport,
|
|
@@ -804,11 +803,6 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
804
803
|
def get_report(self) -> TableauSourceReport:
|
|
805
804
|
return self.report
|
|
806
805
|
|
|
807
|
-
@classmethod
|
|
808
|
-
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
|
809
|
-
config = TableauConfig.parse_obj(config_dict)
|
|
810
|
-
return cls(config, ctx)
|
|
811
|
-
|
|
812
806
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
813
807
|
return [
|
|
814
808
|
*super().get_workunit_processors(),
|