PyPI - acryl-datahub - Versions diffs - 0.15.0.2rc4__py3-none-any.whl → 0.15.0.2rc6__py3-none-any.whl - Mend

acryl-datahub 0.15.0.2rc4py3-none-any.whl → 0.15.0.2rc6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (50) hide show

{acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc6.dist-info}/METADATA +2440 -2440
{acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc6.dist-info}/RECORD +50 -46
datahub/__init__.py +1 -1
datahub/cli/delete_cli.py +3 -3
datahub/cli/migrate.py +2 -2
datahub/emitter/mcp_builder.py +27 -0
datahub/emitter/rest_emitter.py +1 -1
datahub/ingestion/api/source.py +2 -2
datahub/ingestion/source/delta_lake/source.py +0 -5
datahub/ingestion/source/demo_data.py +1 -1
datahub/ingestion/source/fivetran/fivetran.py +1 -6
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +6 -2
datahub/ingestion/source/iceberg/iceberg.py +10 -3
datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
datahub/ingestion/source/kafka_connect/kafka_connect.py +1 -6
datahub/ingestion/source/metabase.py +1 -6
datahub/ingestion/source/mlflow.py +0 -5
datahub/ingestion/source/nifi.py +0 -5
datahub/ingestion/source/redash.py +0 -5
datahub/ingestion/source/redshift/redshift.py +1 -0
datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
datahub/ingestion/source/snowflake/snowflake_schema.py +5 -2
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +112 -20
datahub/ingestion/source/snowflake/snowflake_tag.py +14 -4
datahub/ingestion/source/snowflake/snowflake_v2.py +0 -6
datahub/ingestion/source/sql/sql_types.py +1 -1
datahub/ingestion/source/sql/sql_utils.py +5 -0
datahub/ingestion/source/superset.py +1 -6
datahub/ingestion/source/tableau/tableau.py +0 -6
datahub/metadata/_schema_classes.py +314 -41
datahub/metadata/_urns/urn_defs.py +54 -0
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
datahub/metadata/schema.avsc +296 -87
datahub/metadata/schemas/DatasetKey.avsc +2 -1
datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
datahub/metadata/schemas/MLModelKey.avsc +2 -1
datahub/metadata/schemas/MLModelProperties.avsc +96 -48
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
datahub/metadata/schemas/VersionProperties.avsc +216 -0
datahub/metadata/schemas/VersionSetKey.avsc +26 -0
datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
{acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc6.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc6.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.2rc4.dist-info → acryl_datahub-0.15.0.2rc6.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/iceberg/iceberg_common.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Optional
 from humanfriendly import format_timespan
 from pydantic import Field, validator
 from pyiceberg.catalog import Catalog, load_catalog
+from sortedcontainers import SortedList
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
 from datahub.configuration.source_common import DatasetSourceConfigMixin
@@ -146,19 +147,40 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
         return load_catalog(name=catalog_name, **catalog_config)
+class TopTableTimings:
+    _VALUE_FIELD: str = "timing"
+    top_entites: SortedList
+    _size: int
+    def __init__(self, size: int = 10):
+        self._size = size
+        self.top_entites = SortedList(key=lambda x: -x.get(self._VALUE_FIELD, 0))
+    def add(self, entity: Dict[str, Any]) -> None:
+        if self._VALUE_FIELD not in entity:
+            return
+        self.top_entites.add(entity)
+        if len(self.top_entites) > self._size:
+            self.top_entites.pop()
+    def __str__(self) -> str:
+        if len(self.top_entites) == 0:
+            return "no timings reported"
+        return str(list(self.top_entites))
 class TimingClass:
-    times: List[int]
+    times: SortedList
     def __init__(self):
-        self.times = []
+        self.times = SortedList()
-    def add_timing(self, t):
-        self.times.append(t)
+    def add_timing(self, t: float) -> None:
+        self.times.add(t)
-    def __str__(self):
+    def __str__(self) -> str:
         if len(self.times) == 0:
             return "no timings reported"
-        self.times.sort()
         total = sum(self.times)
         avg = total / len(self.times)
         return str(
@@ -180,6 +202,9 @@ class IcebergSourceReport(StaleEntityRemovalSourceReport):
     load_table_timings: TimingClass = field(default_factory=TimingClass)
     processing_table_timings: TimingClass = field(default_factory=TimingClass)
     profiling_table_timings: TimingClass = field(default_factory=TimingClass)
+    tables_load_timings: TopTableTimings = field(default_factory=TopTableTimings)
+    tables_profile_timings: TopTableTimings = field(default_factory=TopTableTimings)
+    tables_process_timings: TopTableTimings = field(default_factory=TopTableTimings)
     listed_namespaces: int = 0
     total_listed_tables: int = 0
     tables_listed_per_namespace: TopKDict[str, int] = field(
@@ -201,11 +226,26 @@ class IcebergSourceReport(StaleEntityRemovalSourceReport):
     def report_dropped(self, ent_name: str) -> None:
         self.filtered.append(ent_name)
-    def report_table_load_time(self, t: float) -> None:
+    def report_table_load_time(
+        self, t: float, table_name: str, table_metadata_location: str
+    ) -> None:
         self.load_table_timings.add_timing(t)
+        self.tables_load_timings.add(
+            {"table": table_name, "timing": t, "metadata_file": table_metadata_location}
+        )
-    def report_table_processing_time(self, t: float) -> None:
+    def report_table_processing_time(
+        self, t: float, table_name: str, table_metadata_location: str
+    ) -> None:
         self.processing_table_timings.add_timing(t)
+        self.tables_process_timings.add(
+            {"table": table_name, "timing": t, "metadata_file": table_metadata_location}
+        )
-    def report_table_profiling_time(self, t: float) -> None:
+    def report_table_profiling_time(
+        self, t: float, table_name: str, table_metadata_location: str
+    ) -> None:
         self.profiling_table_timings.add_timing(t)
+        self.tables_profile_timings.add(
+            {"table": table_name, "timing": t, "metadata_file": table_metadata_location}
+        )

datahub/ingestion/source/iceberg/iceberg_profiler.py CHANGED Viewed

@@ -204,7 +204,9 @@ class IcebergProfiler:
                         )
                     dataset_profile.fieldProfiles.append(column_profile)
             time_taken = timer.elapsed_seconds()
-            self.report.report_table_profiling_time(time_taken)
+            self.report.report_table_profiling_time(
+                time_taken, dataset_name, table.metadata_location
+            )
             LOGGER.debug(
                 f"Finished profiling of dataset: {dataset_name} in {time_taken}"
             )

datahub/ingestion/source/kafka_connect/kafka_connect.py CHANGED Viewed

@@ -17,7 +17,7 @@ from datahub.ingestion.api.decorators import (
     platform_name,
     support_status,
 )
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.kafka_connect.common import (
     CONNECTOR_CLASS,
@@ -94,11 +94,6 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
         if not jpype.isJVMStarted():
             jpype.startJVM()
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
-        config = KafkaConnectSourceConfig.parse_obj(config_dict)
-        return cls(config, ctx)
     def get_connectors_manifest(self) -> Iterable[ConnectorManifest]:
         """Get Kafka Connect connectors manifest using REST API.
         Enrich with lineages metadata.

datahub/ingestion/source/metabase.py CHANGED Viewed

@@ -23,7 +23,7 @@ from datahub.ingestion.api.decorators import (
     platform_name,
     support_status,
 )
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalHandler,
@@ -789,11 +789,6 @@ class MetabaseSource(StatefulIngestionSourceBase):
         return platform, dbname, schema, platform_instance
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
-        config = MetabaseConfig.parse_obj(config_dict)
-        return cls(ctx, config)
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         return [
             *super().get_workunit_processors(),

datahub/ingestion/source/mlflow.py CHANGED Viewed

@@ -333,8 +333,3 @@ class MLflowSource(Source):
             aspect=global_tags,
         )
         return wu
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
-        config = MLflowConfig.parse_obj(config_dict)
-        return cls(ctx, config)

datahub/ingestion/source/nifi.py CHANGED Viewed

@@ -484,11 +484,6 @@ class NifiSource(Source):
     def rest_api_base_url(self):
         return self.config.site_url[: -len("nifi/")] + "nifi-api/"
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
-        config = NifiSourceConfig.parse_obj(config_dict)
-        return cls(config, ctx)
     def get_report(self) -> SourceReport:
         return self.report

datahub/ingestion/source/redash.py CHANGED Viewed

@@ -369,11 +369,6 @@ class RedashSource(Source):
         else:
             raise ValueError(f"Failed to connect to {self.config.connect_uri}/api")
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
-        config = RedashConfig.parse_obj(config_dict)
-        return cls(ctx, config)
     def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict:
         url = f"/api/data_sources/{data_source_id}"
         resp = self.client._get(url).json()

datahub/ingestion/source/redshift/redshift.py CHANGED Viewed

@@ -276,6 +276,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
         "HLLSKETCH": NullType,
         "TIMETZ": TimeType,
         "VARBYTE": StringType,
+        "SUPER": NullType,
     }
     def get_platform_instance_id(self) -> str:

datahub/ingestion/source/snowflake/snowflake_config.py CHANGED Viewed

@@ -244,6 +244,11 @@ class SnowflakeV2Config(
         description="""Optional. Allowed values are `without_lineage`, `with_lineage`, and `skip` (default). `without_lineage` only extracts tags that have been applied directly to the given entity. `with_lineage` extracts both directly applied and propagated tags, but will be significantly slower. See the [Snowflake documentation](https://docs.snowflake.com/en/user-guide/object-tagging.html#tag-lineage) for information about tag lineage/propagation. """,
     )
+    extract_tags_as_structured_properties: bool = Field(
+        default=False,
+        description="If enabled along with `extract_tags`, extracts snowflake's key-value tags as DataHub structured properties instead of DataHub tags.",
+    )
     include_external_url: bool = Field(
         default=True,
         description="Whether to populate Snowsight url for Snowflake Objects",
@@ -263,6 +268,14 @@ class SnowflakeV2Config(
         description="List of regex patterns for tags to include in ingestion. Only used if `extract_tags` is enabled.",
     )
+    structured_property_pattern: AllowDenyPattern = Field(
+        default=AllowDenyPattern.allow_all(),
+        description=(
+            "List of regex patterns for structured properties to include in ingestion."
+            " Only used if `extract_tags` and `extract_tags_as_structured_properties` are enabled."
+        ),
+    )
     # This is required since access_history table does not capture whether the table was temporary table.
     temporary_tables_pattern: List[str] = Field(
         default=DEFAULT_TEMP_TABLES_PATTERNS,

datahub/ingestion/source/snowflake/snowflake_schema.py CHANGED Viewed

@@ -45,15 +45,18 @@ class SnowflakeTag:
     name: str
     value: str
-    def display_name(self) -> str:
+    def tag_display_name(self) -> str:
         return f"{self.name}: {self.value}"
-    def identifier(self) -> str:
+    def tag_identifier(self) -> str:
         return f"{self._id_prefix_as_str()}:{self.value}"
     def _id_prefix_as_str(self) -> str:
         return f"{self.database}.{self.schema}.{self.name}"
+    def structured_property_identifier(self) -> str:
+        return f"snowflake.{self.database}.{self.schema}.{self.name}"
 @dataclass
 class SnowflakeColumn(BaseColumn):

datahub/ingestion/source/snowflake/snowflake_schema_gen.py CHANGED Viewed

@@ -4,12 +4,14 @@ from typing import Dict, Iterable, List, Optional, Union
 from datahub.configuration.pattern_utils import is_schema_allowed
 from datahub.emitter.mce_builder import (
+    get_sys_time,
     make_data_platform_urn,
     make_dataset_urn_with_platform_instance,
     make_schema_field_urn,
     make_tag_urn,
 )
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.emitter.mcp_builder import add_structured_properties_to_entity_wu
 from datahub.ingestion.api.source import SourceReport
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.glossary.classification_mixin import (
@@ -72,6 +74,7 @@ from datahub.ingestion.source_report.ingestion_stage import (
     PROFILING,
 )
 from datahub.metadata.com.linkedin.pegasus2avro.common import (
+    AuditStamp,
     GlobalTags,
     Status,
     SubTypes,
@@ -98,7 +101,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     StringType,
     TimeType,
 )
+from datahub.metadata.com.linkedin.pegasus2avro.structured import (
+    StructuredPropertyDefinition,
+)
 from datahub.metadata.com.linkedin.pegasus2avro.tag import TagProperties
+from datahub.metadata.urns import (
+    ContainerUrn,
+    DatasetUrn,
+    DataTypeUrn,
+    EntityTypeUrn,
+    SchemaFieldUrn,
+    StructuredPropertyUrn,
+)
 from datahub.sql_parsing.sql_parsing_aggregator import (
     KnownLineageMapping,
     SqlParsingAggregator,
@@ -673,14 +687,31 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
             yield from self.gen_dataset_workunits(view, schema_name, db_name)
     def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
-        tag_identifier = tag.identifier()
+        use_sp = self.config.extract_tags_as_structured_properties
+        identifier = (
+            self.snowflake_identifier(tag.structured_property_identifier())
+            if use_sp
+            else tag.tag_identifier()
+        )
-        if self.report.is_tag_processed(tag_identifier):
+        if self.report.is_tag_processed(identifier):
             return
-        self.report.report_tag_processed(tag_identifier)
-        yield from self.gen_tag_workunits(tag)
+        self.report.report_tag_processed(identifier)
+        if use_sp:
+            yield from self.gen_tag_as_structured_property_workunits(tag)
+        else:
+            yield from self.gen_tag_workunits(tag)
+    def _format_tags_as_structured_properties(
+        self, tags: List[SnowflakeTag]
+    ) -> Dict[StructuredPropertyUrn, str]:
+        return {
+            StructuredPropertyUrn(
+                self.snowflake_identifier(tag.structured_property_identifier())
+            ): tag.value
+            for tag in tags
+        }
     def gen_dataset_workunits(
         self,
@@ -725,6 +756,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
             env=self.config.env,
         )
+        if self.config.extract_tags_as_structured_properties:
+            yield from self.gen_column_tags_as_structured_properties(dataset_urn, table)
         yield from add_table_to_schema_container(
             dataset_urn=dataset_urn,
             parent_container_key=schema_container_key,
@@ -758,16 +792,24 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
             )
         if table.tags:
-            tag_associations = [
-                TagAssociation(
-                    tag=make_tag_urn(self.snowflake_identifier(tag.identifier()))
+            if self.config.extract_tags_as_structured_properties:
+                yield from add_structured_properties_to_entity_wu(
+                    dataset_urn,
+                    self._format_tags_as_structured_properties(table.tags),
                 )
-                for tag in table.tags
-            ]
-            global_tags = GlobalTags(tag_associations)
-            yield MetadataChangeProposalWrapper(
-                entityUrn=dataset_urn, aspect=global_tags
-            ).as_workunit()
+            else:
+                tag_associations = [
+                    TagAssociation(
+                        tag=make_tag_urn(
+                            self.snowflake_identifier(tag.tag_identifier())
+                        )
+                    )
+                    for tag in table.tags
+                ]
+                global_tags = GlobalTags(tag_associations)
+                yield MetadataChangeProposalWrapper(
+                    entityUrn=dataset_urn, aspect=global_tags
+                ).as_workunit()
         if isinstance(table, SnowflakeView) and table.view_definition is not None:
             view_properties_aspect = ViewProperties(
@@ -840,10 +882,10 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
         )
     def gen_tag_workunits(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
-        tag_urn = make_tag_urn(self.snowflake_identifier(tag.identifier()))
+        tag_urn = make_tag_urn(self.snowflake_identifier(tag.tag_identifier()))
         tag_properties_aspect = TagProperties(
-            name=tag.display_name(),
+            name=tag.tag_display_name(),
             description=f"Represents the Snowflake tag `{tag._id_prefix_as_str()}` with value `{tag.value}`.",
         )
@@ -851,6 +893,41 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
             entityUrn=tag_urn, aspect=tag_properties_aspect
         ).as_workunit()
+    def gen_tag_as_structured_property_workunits(
+        self, tag: SnowflakeTag
+    ) -> Iterable[MetadataWorkUnit]:
+        identifier = self.snowflake_identifier(tag.structured_property_identifier())
+        urn = StructuredPropertyUrn(identifier).urn()
+        aspect = StructuredPropertyDefinition(
+            qualifiedName=identifier,
+            displayName=tag.name,
+            valueType=DataTypeUrn("datahub.string").urn(),
+            entityTypes=[
+                EntityTypeUrn(f"datahub.{ContainerUrn.ENTITY_TYPE}").urn(),
+                EntityTypeUrn(f"datahub.{DatasetUrn.ENTITY_TYPE}").urn(),
+                EntityTypeUrn(f"datahub.{SchemaFieldUrn.ENTITY_TYPE}").urn(),
+            ],
+            lastModified=AuditStamp(
+                time=get_sys_time(), actor="urn:li:corpuser:datahub"
+            ),
+        )
+        yield MetadataChangeProposalWrapper(
+            entityUrn=urn,
+            aspect=aspect,
+        ).as_workunit()
+    def gen_column_tags_as_structured_properties(
+        self, dataset_urn: str, table: Union[SnowflakeTable, SnowflakeView]
+    ) -> Iterable[MetadataWorkUnit]:
+        for column_name in table.column_tags:
+            schema_field_urn = SchemaFieldUrn(dataset_urn, column_name).urn()
+            yield from add_structured_properties_to_entity_wu(
+                schema_field_urn,
+                self._format_tags_as_structured_properties(
+                    table.column_tags[column_name]
+                ),
+            )
     def gen_schema_metadata(
         self,
         table: Union[SnowflakeTable, SnowflakeView],
@@ -892,13 +969,14 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
                             [
                                 TagAssociation(
                                     make_tag_urn(
-                                        self.snowflake_identifier(tag.identifier())
+                                        self.snowflake_identifier(tag.tag_identifier())
                                     )
                                 )
                                 for tag in table.column_tags[col.name]
                             ]
                         )
                         if col.name in table.column_tags
+                        and not self.config.extract_tags_as_structured_properties
                         else None
                     ),
                 )
@@ -985,8 +1063,17 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
                 )
             ),
             tags=(
-                [self.snowflake_identifier(tag.identifier()) for tag in database.tags]
+                [
+                    self.snowflake_identifier(tag.tag_identifier())
+                    for tag in database.tags
+                ]
                 if database.tags
+                and not self.config.extract_tags_as_structured_properties
+                else None
+            ),
+            structured_properties=(
+                self._format_tags_as_structured_properties(database.tags)
+                if database.tags and self.config.extract_tags_as_structured_properties
                 else None
             ),
         )
@@ -1038,8 +1125,13 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
                 else None
             ),
             tags=(
-                [self.snowflake_identifier(tag.identifier()) for tag in schema.tags]
-                if schema.tags
+                [self.snowflake_identifier(tag.tag_identifier()) for tag in schema.tags]
+                if schema.tags and not self.config.extract_tags_as_structured_properties
+                else None
+            ),
+            structured_properties=(
+                self._format_tags_as_structured_properties(schema.tags)
+                if schema.tags and self.config.extract_tags_as_structured_properties
                 else None
             ),
         )

datahub/ingestion/source/snowflake/snowflake_tag.py CHANGED Viewed

@@ -165,10 +165,20 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
         allowed_tags = []
         for tag in tags:
-            tag_identifier = tag.identifier()
-            self.report.report_entity_scanned(tag_identifier, "tag")
-            if not self.config.tag_pattern.allowed(tag_identifier):
-                self.report.report_dropped(tag_identifier)
+            identifier = (
+                tag._id_prefix_as_str()
+                if self.config.extract_tags_as_structured_properties
+                else tag.tag_identifier()
+            )
+            self.report.report_entity_scanned(identifier, "tag")
+            pattern = (
+                self.config.structured_property_pattern
+                if self.config.extract_tags_as_structured_properties
+                else self.config.tag_pattern
+            )
+            if not pattern.allowed(identifier):
+                self.report.report_dropped(identifier)
             else:
                 allowed_tags.append(tag)
         return allowed_tags

datahub/ingestion/source/snowflake/snowflake_v2.py CHANGED Viewed

@@ -23,7 +23,6 @@ from datahub.ingestion.api.incremental_properties_helper import (
 from datahub.ingestion.api.source import (
     CapabilityReport,
     MetadataWorkUnitProcessor,
-    Source,
     SourceCapability,
     SourceReport,
     TestableSource,
@@ -251,11 +250,6 @@ class SnowflakeV2Source(
         self.add_config_to_report()
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
-        config = SnowflakeV2Config.parse_obj(config_dict)
-        return cls(ctx, config)
     @staticmethod
     def test_connection(config_dict: dict) -> TestConnectionReport:
         test_report = TestConnectionReport()

datahub/ingestion/source/sql/sql_types.py CHANGED Viewed

@@ -93,7 +93,7 @@ POSTGRES_TYPES_MAP: Dict[str, Any] = {
     "regtype": None,
     "regrole": None,
     "regnamespace": None,
-    "super": None,
+    "super": NullType,
     "uuid": StringType,
     "pg_lsn": None,
     "tsvector": None,  # text search vector

datahub/ingestion/source/sql/sql_utils.py CHANGED Viewed

@@ -20,6 +20,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
 from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
 from datahub.metadata.schema_classes import DataPlatformInstanceClass
+from datahub.metadata.urns import StructuredPropertyUrn
 from datahub.utilities.registries.domain_registry import DomainRegistry
 from datahub.utilities.urns.dataset_urn import DatasetUrn
@@ -75,6 +76,7 @@ def gen_schema_container(
     created: Optional[int] = None,
     last_modified: Optional[int] = None,
     extra_properties: Optional[Dict[str, str]] = None,
+    structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None,
 ) -> Iterable[MetadataWorkUnit]:
     domain_urn: Optional[str] = None
     if domain_registry:
@@ -99,6 +101,7 @@ def gen_schema_container(
         owner_urn=owner_urn,
         qualified_name=qualified_name,
         extra_properties=extra_properties,
+        structured_properties=structured_properties,
     )
@@ -133,6 +136,7 @@ def gen_database_container(
     created: Optional[int] = None,
     last_modified: Optional[int] = None,
     extra_properties: Optional[Dict[str, str]] = None,
+    structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None,
 ) -> Iterable[MetadataWorkUnit]:
     domain_urn: Optional[str] = None
     if domain_registry:
@@ -154,6 +158,7 @@ def gen_database_container(
         owner_urn=owner_urn,
         qualified_name=qualified_name,
         extra_properties=extra_properties,
+        structured_properties=structured_properties,
     )

datahub/ingestion/source/superset.py CHANGED Viewed

@@ -33,7 +33,7 @@ from datahub.ingestion.api.decorators import (
     platform_name,
     support_status,
 )
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.sql.sql_types import resolve_sql_type
 from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
@@ -265,11 +265,6 @@ class SupersetSource(StatefulIngestionSourceBase):
             # TODO(Gabe): how should we message about this error?
         return requests_session
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
-        config = SupersetConfig.parse_obj(config_dict)
-        return cls(ctx, config)
     def paginate_entity_api_results(self, entity_type, page_size=100):
         current_page = 0
         total_items = page_size

datahub/ingestion/source/tableau/tableau.py CHANGED Viewed

@@ -71,7 +71,6 @@ from datahub.ingestion.api.decorators import (
 from datahub.ingestion.api.source import (
     CapabilityReport,
     MetadataWorkUnitProcessor,
-    Source,
     StructuredLogLevel,
     TestableSource,
     TestConnectionReport,
@@ -804,11 +803,6 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
     def get_report(self) -> TableauSourceReport:
         return self.report
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
-        config = TableauConfig.parse_obj(config_dict)
-        return cls(config, ctx)
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         return [
             *super().get_workunit_processors(),

acryl-datahub 0.15.0.2rc4__py3-none-any.whl → 0.15.0.2rc6__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.2rc4py3-none-any.whl → 0.15.0.2rc6py3-none-any.whl