PyPI - acryl-datahub - Versions diffs - 1.2.0.3rc2__py3-none-any.whl → 1.2.0.4__py3-none-any.whl - Mend

acryl-datahub 1.2.0.3rc2py3-none-any.whl → 1.2.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (68) hide show

{acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/METADATA +2665 -2664
{acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/RECORD +68 -67
datahub/_version.py +1 -1
datahub/api/entities/dataset/dataset.py +3 -3
datahub/api/entities/external/external_tag.py +6 -4
datahub/api/entities/external/lake_formation_external_entites.py +50 -49
datahub/api/entities/external/restricted_text.py +105 -180
datahub/api/entities/external/unity_catalog_external_entites.py +51 -52
datahub/api/entities/forms/forms.py +3 -3
datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
datahub/cli/quickstart_versioning.py +1 -1
datahub/cli/specific/assertions_cli.py +37 -2
datahub/cli/specific/datacontract_cli.py +54 -4
datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +1 -1
datahub/ingestion/api/report.py +21 -2
datahub/ingestion/api/source.py +81 -7
datahub/ingestion/autogenerated/capability_summary.json +47 -19
datahub/ingestion/source/abs/config.py +1 -1
datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
datahub/ingestion/source/abs/source.py +9 -0
datahub/ingestion/source/aws/glue.py +18 -2
datahub/ingestion/source/aws/tag_entities.py +4 -4
datahub/ingestion/source/data_lake_common/path_spec.py +6 -3
datahub/ingestion/source/datahub/datahub_source.py +8 -1
datahub/ingestion/source/dbt/dbt_cloud.py +6 -3
datahub/ingestion/source/delta_lake/source.py +8 -1
datahub/ingestion/source/dremio/dremio_source.py +19 -2
datahub/ingestion/source/fivetran/fivetran.py +9 -3
datahub/ingestion/source/fivetran/fivetran_log_api.py +4 -3
datahub/ingestion/source/ge_data_profiler.py +8 -0
datahub/ingestion/source/grafana/models.py +6 -0
datahub/ingestion/source/hex/hex.py +1 -1
datahub/ingestion/source/iceberg/iceberg.py +4 -4
datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
datahub/ingestion/source/mock_data/datahub_mock_data.py +26 -10
datahub/ingestion/source/powerbi/powerbi.py +4 -1
datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
datahub/ingestion/source/redshift/datashares.py +1 -1
datahub/ingestion/source/redshift/redshift.py +1 -0
datahub/ingestion/source/salesforce.py +8 -0
datahub/ingestion/source/slack/slack.py +7 -14
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +4 -4
datahub/ingestion/source/sql/hive_metastore.py +8 -0
datahub/ingestion/source/sql/teradata.py +8 -1
datahub/ingestion/source/sql/trino.py +9 -0
datahub/ingestion/source/tableau/tableau.py +1 -1
datahub/ingestion/source/unity/config.py +36 -1
datahub/ingestion/source/unity/proxy.py +332 -46
datahub/ingestion/source/unity/proxy_types.py +12 -2
datahub/ingestion/source/unity/source.py +91 -34
datahub/ingestion/source/unity/tag_entities.py +5 -5
datahub/ingestion/source/usage/starburst_trino_usage.py +2 -2
datahub/ingestion/transformer/base_transformer.py +8 -5
datahub/metadata/_internal_schema_classes.py +513 -513
datahub/metadata/_urns/urn_defs.py +1684 -1684
datahub/metadata/schema.avsc +16745 -16348
datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
datahub/sdk/entity_client.py +22 -7
datahub/sdk/search_client.py +3 -0
datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
datahub/specific/datajob.py +15 -1
datahub/specific/dataset.py +37 -59
datahub/utilities/mapping.py +29 -2
datahub/utilities/server_config_util.py +2 -1
{acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/WHEEL +0 -0
{acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/licenses/LICENSE +0 -0
{acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/data_lake_common/path_spec.py CHANGED Viewed

@@ -62,7 +62,6 @@ class SortKey(ConfigModel):
     date_format: Optional[str] = Field(
         default=None,
-        type=str,
         description="The date format to use when sorting. This is used to parse the date from the key. The format should follow the java [SimpleDateFormat](https://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html) format.",
     )
@@ -260,7 +259,7 @@ class PathSpec(ConfigModel):
     ) -> Union[None, parse.Result, parse.Match]:
         return self.compiled_folder_include.parse(path)
-    @pydantic.root_validator()
+    @pydantic.root_validator(skip_on_failure=True)
     def validate_no_double_stars(cls, values: Dict) -> Dict:
         if "include" not in values:
             return values
@@ -456,7 +455,11 @@ class PathSpec(ConfigModel):
                 partition = partition.rsplit("/", 1)[0]
                 for partition_key in partition.split("/"):
                     if partition_key.find("=") != -1:
-                        partition_keys.append(tuple(partition_key.split("=")))
+                        key_value = partition_key.split(
+                            "=", 1
+                        )  # Split into at most 2 parts
+                        if len(key_value) == 2:
+                            partition_keys.append((key_value[0], key_value[1]))
             else:
                 partition_split = partition.rsplit("/", 1)
                 if len(partition_split) == 1:

datahub/ingestion/source/datahub/datahub_source.py CHANGED Viewed

@@ -19,6 +19,7 @@ from datahub.ingestion.api.source_helpers import (
     auto_workunit_reporter,
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
 from datahub.ingestion.source.datahub.config import DataHubSourceConfig
 from datahub.ingestion.source.datahub.datahub_api_reader import DataHubApiReader
 from datahub.ingestion.source.datahub.datahub_database_reader import (
@@ -39,7 +40,13 @@ logger = logging.getLogger(__name__)
 @platform_name("DataHub")
 @config_class(DataHubSourceConfig)
 @support_status(SupportStatus.TESTING)
-@capability(SourceCapability.CONTAINERS, "Enabled by default")
+@capability(
+    SourceCapability.CONTAINERS,
+    "Enabled by default",
+    subtype_modifier=[
+        SourceCapabilityModifier.DATABASE,
+    ],
+)
 class DataHubSource(StatefulIngestionSourceBase):
     platform: str = "datahub"

datahub/ingestion/source/dbt/dbt_cloud.py CHANGED Viewed

@@ -370,9 +370,12 @@ class DBTCloudSource(DBTSourceBase, TestableSource):
             name = node["alias"]
         comment = node.get("comment", "")
-        description = node["description"]
-        if node.get("sourceDescription"):
-            description = node["sourceDescription"]
+        # In dbt sources, there are two types of descriptions:
+        # - description: table-level description (specific to the source table)
+        # - sourceDescription: schema-level description (describes the overall source schema)
+        # The table-level description should take precedence since it's more specific.
+        description = node["description"] or node.get("sourceDescription", "")
         if node["resourceType"] == "model":
             materialization = node["materializedType"]

datahub/ingestion/source/delta_lake/source.py CHANGED Viewed

@@ -29,6 +29,7 @@ from datahub.ingestion.source.aws.s3_util import (
     get_key_prefix,
     strip_s3_prefix,
 )
+from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
 from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator
 from datahub.ingestion.source.delta_lake.config import DeltaLakeSourceConfig
 from datahub.ingestion.source.delta_lake.delta_lake_utils import (
@@ -85,7 +86,13 @@ OPERATION_STATEMENT_TYPES = {
 @config_class(DeltaLakeSourceConfig)
 @support_status(SupportStatus.INCUBATING)
 @capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled")
-@capability(SourceCapability.CONTAINERS, "Enabled by default")
+@capability(
+    SourceCapability.CONTAINERS,
+    "Enabled by default",
+    subtype_modifier=[
+        SourceCapabilityModifier.FOLDER,
+    ],
+)
 class DeltaLakeSource(StatefulIngestionSourceBase):
     """
     This plugin extracts:

datahub/ingestion/source/dremio/dremio_source.py CHANGED Viewed

@@ -22,6 +22,7 @@ from datahub.ingestion.api.source import (
     SourceReport,
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
 from datahub.ingestion.source.dremio.dremio_api import (
     DremioAPIOperations,
     DremioEdition,
@@ -86,11 +87,27 @@ class DremioSourceMapEntry:
 @platform_name("Dremio")
 @config_class(DremioSourceConfig)
 @support_status(SupportStatus.CERTIFIED)
-@capability(SourceCapability.CONTAINERS, "Enabled by default")
+@capability(
+    SourceCapability.CONTAINERS,
+    "Enabled by default",
+)
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
 @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
-@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
+@capability(
+    SourceCapability.LINEAGE_COARSE,
+    "Enabled by default",
+    subtype_modifier=[
+        SourceCapabilityModifier.TABLE,
+    ],
+)
+@capability(
+    SourceCapability.LINEAGE_FINE,
+    "Extract column-level lineage",
+    subtype_modifier=[
+        SourceCapabilityModifier.TABLE,
+    ],
+)
 @capability(SourceCapability.OWNERSHIP, "Enabled by default")
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
 @capability(SourceCapability.USAGE_STATS, "Enabled by default to get usage stats")

datahub/ingestion/source/fivetran/fivetran.py CHANGED Viewed

@@ -16,7 +16,11 @@ from datahub.ingestion.api.decorators import (
     platform_name,
     support_status,
 )
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
+from datahub.ingestion.api.source import (
+    MetadataWorkUnitProcessor,
+    SourceReport,
+    StructuredLogCategory,
+)
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.fivetran.config import (
     KNOWN_DATA_PLATFORM_MAPPING,
@@ -96,8 +100,10 @@ class FivetranSource(StatefulIngestionSourceBase):
                 self.report.info(
                     title="Guessing source platform for lineage",
                     message="We encountered a connector type that we don't fully support yet. "
-                    "We will attempt to guess the platform based on the connector type.",
-                    context=f"{connector.connector_name} (connector_id: {connector.connector_id}, connector_type: {connector.connector_type})",
+                    "We will attempt to guess the platform based on the connector type. "
+                    "Note that we use connector_id as the key not connector_name which you may see in the UI of Fivetran. ",
+                    context=f"connector_name: {connector.connector_name} (connector_id: {connector.connector_id}, connector_type: {connector.connector_type})",
+                    log_category=StructuredLogCategory.LINEAGE,
                 )
                 source_details.platform = connector.connector_type

datahub/ingestion/source/fivetran/fivetran_log_api.py CHANGED Viewed

@@ -69,9 +69,10 @@ class FivetranLogAPI:
                 fivetran_log_query.set_schema(bigquery_destination_config.dataset)
                 # The "database" should be the BigQuery project name.
-                fivetran_log_database = engine.execute(
-                    "SELECT @@project_id"
-                ).fetchone()[0]
+                result = engine.execute("SELECT @@project_id").fetchone()
+                if result is None:
+                    raise ValueError("Failed to retrieve BigQuery project ID")
+                fivetran_log_database = result[0]
         else:
             raise ConfigurationError(
                 f"Destination platform '{destination_platform}' is not yet supported."

datahub/ingestion/source/ge_data_profiler.py CHANGED Viewed

@@ -216,6 +216,14 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in
                 )
             ).scalar()
         )
+    elif self.engine.dialect.name.lower() == DATABRICKS:
+        return convert_to_json_serializable(
+            self.engine.execute(
+                sa.select(sa.func.approx_count_distinct(sa.column(column))).select_from(
+                    self._table
+                )
+            ).scalar()
+        )
     return convert_to_json_serializable(
         self.engine.execute(
             sa.select([sa.func.count(sa.func.distinct(sa.column(column)))]).select_from(

datahub/ingestion/source/grafana/models.py CHANGED Viewed

@@ -12,6 +12,7 @@ from typing import Any, Dict, List, Optional
 from pydantic import BaseModel, Field
+from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
 from datahub.emitter.mcp_builder import ContainerKey
 # Grafana-specific type definitions for better type safety
@@ -106,6 +107,11 @@ class Folder(BaseModel):
     title: str
     description: Optional[str] = ""
+    if PYDANTIC_VERSION_2:
+        from pydantic import ConfigDict
+        model_config = ConfigDict(coerce_numbers_to_str=True)  # type: ignore
 class FolderKey(ContainerKey):
     """Key for identifying a Grafana folder."""

datahub/ingestion/source/hex/hex.py CHANGED Viewed

@@ -69,7 +69,7 @@ class HexSourceConfig(
     )
     include_components: bool = Field(
         default=True,
-        desciption="Include Hex Components in the ingestion",
+        description="Include Hex Components in the ingestion",
     )
     page_size: int = Field(
         default=HEX_API_PAGE_SIZE_DEFAULT,

datahub/ingestion/source/iceberg/iceberg.py CHANGED Viewed

@@ -524,11 +524,11 @@ class IcebergSource(StatefulIngestionSourceBase):
         custom_properties["format-version"] = str(table.metadata.format_version)
         custom_properties["partition-spec"] = str(self._get_partition_aspect(table))
         last_modified: Optional[int] = table.metadata.last_updated_ms
-        if table.current_snapshot():
-            custom_properties["snapshot-id"] = str(table.current_snapshot().snapshot_id)
-            custom_properties["manifest-list"] = table.current_snapshot().manifest_list
+        if current_snapshot := table.current_snapshot():
+            custom_properties["snapshot-id"] = str(current_snapshot.snapshot_id)
+            custom_properties["manifest-list"] = current_snapshot.manifest_list
             if not last_modified:
-                last_modified = int(table.current_snapshot().timestamp_ms)
+                last_modified = int(current_snapshot.timestamp_ms)
         if "created-at" in custom_properties:
             try:
                 dt = dateutil_parser.isoparse(custom_properties["created-at"])

datahub/ingestion/source/looker/looker_liquid_tag.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from functools import lru_cache
-from typing import ClassVar, Optional, TextIO
+from typing import ClassVar, Optional, TextIO, Type
 from liquid import Environment
 from liquid.ast import Node
@@ -20,16 +20,27 @@ class CustomTagException(Exception):
 class ConditionNode(Node):
     def __init__(self, tok: Token, sql_or_lookml_reference: str, filter_name: str):
         self.tok = tok
         self.sql_or_lookml_reference = sql_or_lookml_reference
         self.filter_name = filter_name
     def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
         # This implementation will make sure that sql parse work correctly if looker condition tag
         # is used in lookml sql field
         buffer.write(f"{self.sql_or_lookml_reference}='dummy_value'")
+        return True
+class IncrementConditionNode(Node):
+    def __init__(self, tok: Token, sql_or_lookml_reference: str):
+        self.tok = tok
+        self.sql_or_lookml_reference = sql_or_lookml_reference
+    def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
+        # For incrementcondition, we need to generate a condition that would be used
+        # in incremental PDT updates. This typically involves date/time comparisons.
+        # We'll render it as a date comparison with a placeholder value
+        # See details in Looker documentation for incrementcondition tag -> cloud.google.com/looker/docs/reference/param-view-increment-key
+        buffer.write(f"{self.sql_or_lookml_reference} > '2023-01-01'")
         return True
@@ -44,7 +55,6 @@ class ConditionTag(Tag):
     This class render the below tag as order.region='ap-south-1' if order_region is provided in config.liquid_variables
     as order_region: 'ap-south-1'
         {% condition order_region %} order.region {% endcondition %}
     """
     TAG_START: ClassVar[str] = "condition"
@@ -79,7 +89,48 @@ class ConditionTag(Tag):
         )
-custom_tags = [ConditionTag]
+class IncrementConditionTag(Tag):
+    """
+    IncrementConditionTag is the equivalent implementation of looker's custom liquid tag "incrementcondition".
+    Refer doc: https://cloud.google.com/looker/docs/incremental-pdts#using_the_incrementcondition_tag
+    This tag is used for incremental PDTs to determine which records should be updated.
+    It typically works with date/time fields to filter data that has changed since the last update.
+    Example usage in Looker:
+        {% incrementcondition created_at %} order.created_at {% endincrementcondition %}
+    This would generate SQL like: order.created_at > '2023-01-01 00:00:00'
+    """
+    TAG_START: ClassVar[str] = "incrementcondition"
+    TAG_END: ClassVar[str] = "endincrementcondition"
+    name: str = "incrementcondition"
+    def __init__(self, env: Environment):
+        super().__init__(env)
+        self.parser = get_parser(self.env)
+    def parse(self, stream: TokenStream) -> Node:
+        expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_START)
+        start_token = stream.current
+        stream.next_token()
+        expect(stream, TOKEN_LITERAL)
+        sql_or_lookml_reference: str = stream.current.value.strip()
+        stream.next_token()
+        expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_END)
+        return IncrementConditionNode(
+            tok=start_token,
+            sql_or_lookml_reference=sql_or_lookml_reference,
+        )
+# Updated custom_tags list to include both tags
+custom_tags: list[Type[Tag]] = [ConditionTag, IncrementConditionTag]
 @string_filter

datahub/ingestion/source/mock_data/datahub_mock_data.py CHANGED Viewed

@@ -13,7 +13,7 @@ from datahub.ingestion.api.decorators import (
     platform_name,
     support_status,
 )
-from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.source import Source, SourceReport, StructuredLogCategory
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.common.subtypes import DatasetSubTypes
 from datahub.ingestion.source.mock_data.datahub_mock_data_report import (
@@ -35,6 +35,8 @@ from datahub.utilities.str_enum import StrEnum
 logger = logging.getLogger(__name__)
+PLATFORM_NAME = "fake"
 class SubTypePattern(StrEnum):
     ALTERNATING = "alternating"
@@ -137,6 +139,10 @@ class DataHubMockDataConfig(ConfigModel):
         default=0,
         description="Number of warnings to add in report for testing",
     )
+    num_info: int = Field(
+        default=0,
+        description="Number of info to add in report for testing",
+    )
     gen_1: LineageConfigGen1 = Field(
         default_factory=LineageConfigGen1,
@@ -144,7 +150,7 @@ class DataHubMockDataConfig(ConfigModel):
     )
-@platform_name("DataHubMockData")
+@platform_name(PLATFORM_NAME)
 @config_class(DataHubMockDataConfig)
 @support_status(SupportStatus.TESTING)
 class DataHubMockDataSource(Source):
@@ -159,6 +165,9 @@ class DataHubMockDataSource(Source):
         self.report = DataHubMockDataReport()
     def get_workunits(self) -> Iterable[MetadataWorkUnit]:
+        # We don't want any implicit aspects to be produced
+        # so we are not using get_workunits_internal
         if self.config.throw_uncaught_exceptions:
             raise Exception("This is a test exception")
@@ -176,10 +185,17 @@ class DataHubMockDataSource(Source):
                     message="This is test warning",
                     title="Test Warning",
                     context=f"This is test warning {i}",
+                    log_category=StructuredLogCategory.LINEAGE,
+                )
+        if self.config.num_info > 0:
+            for i in range(self.config.num_info):
+                self.report.info(
+                    message="This is test info",
+                    title="Test Info",
+                    context=f"This is test info {i}",
                 )
-        # We don't want any implicit aspects to be produced
-        # so we are not using get_workunits_internal
         if self.config.gen_1.enabled:
             for wu in self._data_gen_1():
                 if self.report.first_urn_seen is None:
@@ -309,7 +325,7 @@ class DataHubMockDataSource(Source):
             table_level, table_index, subtype_pattern, subtype_types, level_subtypes
         )
-        urn = make_dataset_urn(platform="fake", name=table_name)
+        urn = make_dataset_urn(platform=PLATFORM_NAME, name=table_name)
         mcp = MetadataChangeProposalWrapper(
             entityUrn=urn,
             entityType="dataset",
@@ -433,7 +449,7 @@ class DataHubMockDataSource(Source):
     def _get_status_aspect(self, table: str) -> MetadataWorkUnit:
         urn = make_dataset_urn(
-            platform="fake",
+            platform=PLATFORM_NAME,
             name=table,
         )
         mcp = MetadataChangeProposalWrapper(
@@ -448,7 +464,7 @@ class DataHubMockDataSource(Source):
     ) -> MetadataWorkUnit:
         mcp = MetadataChangeProposalWrapper(
             entityUrn=make_dataset_urn(
-                platform="fake",
+                platform=PLATFORM_NAME,
                 name=downstream_table,
             ),
             entityType="dataset",
@@ -456,7 +472,7 @@ class DataHubMockDataSource(Source):
                 upstreams=[
                     UpstreamClass(
                         dataset=make_dataset_urn(
-                            platform="fake",
+                            platform=PLATFORM_NAME,
                             name=upstream_table,
                         ),
                         type=DatasetLineageTypeClass.TRANSFORMED,
@@ -468,7 +484,7 @@ class DataHubMockDataSource(Source):
     def _get_profile_aspect(self, table: str) -> MetadataWorkUnit:
         urn = make_dataset_urn(
-            platform="fake",
+            platform=PLATFORM_NAME,
             name=table,
         )
         mcp = MetadataChangeProposalWrapper(
@@ -485,7 +501,7 @@ class DataHubMockDataSource(Source):
     def _get_usage_aspect(self, table: str) -> MetadataWorkUnit:
         urn = make_dataset_urn(
-            platform="fake",
+            platform=PLATFORM_NAME,
             name=table,
         )
         mcp = MetadataChangeProposalWrapper(

datahub/ingestion/source/powerbi/powerbi.py CHANGED Viewed

@@ -1226,7 +1226,10 @@ class Mapper:
 @platform_name("PowerBI")
 @config_class(PowerBiDashboardSourceConfig)
 @support_status(SupportStatus.CERTIFIED)
-@capability(SourceCapability.CONTAINERS, "Enabled by default")
+@capability(
+    SourceCapability.CONTAINERS,
+    "Enabled by default",
+)
 @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
 @capability(SourceCapability.OWNERSHIP, "Enabled by default")
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")

datahub/ingestion/source/powerbi_report_server/report_server_domain.py CHANGED Viewed

@@ -27,10 +27,8 @@ class CatalogItem(BaseModel):
     is_favorite: bool = Field(alias="IsFavorite")
     user_info: Any = Field(None, alias="UserInfo")
     display_name: Optional[str] = Field(None, alias="DisplayName")
-    has_data_sources: bool = Field(default=False, alias="HasDataSources")
-    data_sources: Optional[List["DataSource"]] = Field(
-        default_factory=list, alias="DataSources"
-    )
+    has_data_sources: bool = Field(False, alias="HasDataSources")
+    data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
     @validator("display_name", always=True)
     def validate_diplay_name(cls, value, values):

datahub/ingestion/source/redshift/datashares.py CHANGED Viewed

@@ -26,7 +26,7 @@ from datahub.utilities.search_utils import LogicalOperator
 class OutboundSharePlatformResource(BaseModel):
     namespace: str
-    platform_instance: Optional[str]
+    platform_instance: Optional[str] = None
     env: str
     source_database: str
     share_name: str

datahub/ingestion/source/redshift/redshift.py CHANGED Viewed

@@ -132,6 +132,7 @@ logger: logging.Logger = logging.getLogger(__name__)
     "Enabled by default",
     subtype_modifier=[
         SourceCapabilityModifier.DATABASE,
+        SourceCapabilityModifier.SCHEMA,
     ],
 )
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")

datahub/ingestion/source/salesforce.py CHANGED Viewed

@@ -549,6 +549,14 @@ class SalesforceApi:
     capability_name=SourceCapability.TAGS,
     description="Enabled by default",
 )
+@capability(
+    capability_name=SourceCapability.LINEAGE_COARSE,
+    description="Extract table-level lineage for Salesforce objects",
+    subtype_modifier=[
+        SourceCapabilityModifier.SALESFORCE_CUSTOM_OBJECT,
+        SourceCapabilityModifier.SALESFORCE_STANDARD_OBJECT,
+    ],
+)
 class SalesforceSource(StatefulIngestionSourceBase):
     def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
         super().__init__(config, ctx)

datahub/ingestion/source/slack/slack.py CHANGED Viewed

@@ -203,38 +203,31 @@ class SlackSourceConfig(
         description="Bot token for the Slack workspace. Needs `users:read`, `users:read.email`, `users.profile:read`, and `team:read` scopes.",
     )
     enrich_user_metadata: bool = Field(
-        type=bool,
-        default=True,
+        True,
         description="When enabled, will enrich provisioned DataHub users' metadata with information from Slack.",
     )
     ingest_users: bool = Field(
-        type=bool,
-        default=True,
+        True,
         description="Whether to ingest users. When set to true, will ingest all users in the Slack workspace (as platform resources) to simplify user enrichment after they are provisioned on DataHub.",
     )
     api_requests_per_min: int = Field(
-        type=int,
-        default=10,
+        10,
         description="Number of API requests per minute. Low-level config. Do not tweak unless you are facing any issues.",
     )
     ingest_public_channels: bool = Field(
-        type=bool,
-        default=False,
+        False,
         description="Whether to ingest public channels. If set to true needs `channels:read` scope.",
     )
     channels_iteration_limit: int = Field(
-        type=int,
-        default=200,
+        200,
         description="Limit the number of channels to be ingested in a iteration. Low-level config. Do not tweak unless you are facing any issues.",
     )
     channel_min_members: int = Field(
-        type=int,
-        default=2,
+        2,
         description="Ingest channels with at least this many members.",
     )
     should_ingest_archived_channels: bool = Field(
-        type=bool,
-        default=False,
+        False,
         description="Whether to ingest archived channels.",
     )

datahub/ingestion/source/snowflake/snowflake_lineage_v2.py CHANGED Viewed

@@ -72,7 +72,7 @@ class ColumnUpstreamJob(BaseModel):
 class ColumnUpstreamLineage(BaseModel):
-    column_name: Optional[str]
+    column_name: Optional[str] = None
     upstreams: List[ColumnUpstreamJob] = Field(default_factory=list)
@@ -91,9 +91,9 @@ class Query(BaseModel):
 class UpstreamLineageEdge(BaseModel):
     DOWNSTREAM_TABLE_NAME: str
     DOWNSTREAM_TABLE_DOMAIN: str
-    UPSTREAM_TABLES: Optional[List[UpstreamTableNode]]
-    UPSTREAM_COLUMNS: Optional[List[ColumnUpstreamLineage]]
-    QUERIES: Optional[List[Query]]
+    UPSTREAM_TABLES: Optional[List[UpstreamTableNode]] = None
+    UPSTREAM_COLUMNS: Optional[List[ColumnUpstreamLineage]] = None
+    QUERIES: Optional[List[Query]] = None
     _json_upstream_tables = pydantic_parse_json("UPSTREAM_TABLES")
     _json_upstream_columns = pydantic_parse_json("UPSTREAM_COLUMNS")

datahub/ingestion/source/sql/hive_metastore.py CHANGED Viewed

@@ -27,6 +27,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.common.subtypes import (
     DatasetContainerSubTypes,
     DatasetSubTypes,
+    SourceCapabilityModifier,
 )
 from datahub.ingestion.source.sql.sql_common import (
     SQLAlchemySource,
@@ -168,6 +169,13 @@ class HiveMetastore(BasicSQLAlchemyConfig):
 @capability(
     SourceCapability.LINEAGE_COARSE, "View lineage is not supported", supported=False
 )
+@capability(
+    SourceCapability.CONTAINERS,
+    "Enabled by default",
+    subtype_modifier=[
+        SourceCapabilityModifier.CATALOG,
+    ],
+)
 class HiveMetastoreSource(SQLAlchemySource):
     """
     This plugin extracts the following:

datahub/ingestion/source/sql/teradata.py CHANGED Viewed

@@ -42,6 +42,7 @@ from datahub.ingestion.api.decorators import (
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.graph.client import DataHubGraph
+from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
 from datahub.ingestion.source.sql.sql_common import register_custom_type
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
 from datahub.ingestion.source.sql.sql_report import SQLSourceReport
@@ -539,7 +540,13 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
 @config_class(TeradataConfig)
 @support_status(SupportStatus.TESTING)
 @capability(SourceCapability.DOMAINS, "Enabled by default")
-@capability(SourceCapability.CONTAINERS, "Enabled by default")
+@capability(
+    SourceCapability.CONTAINERS,
+    "Enabled by default",
+    subtype_modifier=[
+        SourceCapabilityModifier.DATABASE,
+    ],
+)
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
 @capability(
     SourceCapability.DELETION_DETECTION,

datahub/ingestion/source/sql/trino.py CHANGED Viewed

@@ -36,6 +36,7 @@ from datahub.ingestion.api.decorators import (
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.extractor import schema_util
 from datahub.ingestion.source.common.data_reader import DataReader
+from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
 from datahub.ingestion.source.sql.sql_common import (
     SQLAlchemySource,
     SqlWorkUnit,
@@ -249,6 +250,14 @@ class TrinoConfig(BasicSQLAlchemyConfig):
 @support_status(SupportStatus.CERTIFIED)
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
+@capability(
+    SourceCapability.LINEAGE_COARSE,
+    "Extract table-level lineage",
+    subtype_modifier=[
+        SourceCapabilityModifier.TABLE,
+        SourceCapabilityModifier.VIEW,
+    ],
+)
 class TrinoSource(SQLAlchemySource):
     """

datahub/ingestion/source/tableau/tableau.py CHANGED Viewed

@@ -1184,7 +1184,7 @@ class TableauSiteSource:
                     self.report.warning(
                         title="Incomplete project hierarchy",
                         message="Project details missing. Child projects will be ingested without reference to their parent project. We generally need Site Administrator Explorer permissions to extract the complete project hierarchy.",
-                        context=f"Missing {project.parent_id}, referenced by {project.id} {project.project_name}",
+                        context=f"Missing {project.parent_id}, referenced by {project.id} {project.name}",
                     )
                     project.parent_id = None

acryl-datahub 1.2.0.3rc2__py3-none-any.whl → 1.2.0.4__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.2.0.3rc2py3-none-any.whl → 1.2.0.4py3-none-any.whl