PyPI - acryl-datahub - Versions diffs - 1.2.0.3rc1__py3-none-any.whl → 1.2.0.4__py3-none-any.whl - Mend

acryl-datahub 1.2.0.3rc1py3-none-any.whl → 1.2.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (74) hide show

{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4.dist-info}/METADATA +2609 -2608
{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4.dist-info}/RECORD +74 -73
datahub/_version.py +1 -1
datahub/api/entities/dataset/dataset.py +3 -3
datahub/api/entities/external/external_tag.py +6 -4
datahub/api/entities/external/lake_formation_external_entites.py +50 -49
datahub/api/entities/external/restricted_text.py +105 -180
datahub/api/entities/external/unity_catalog_external_entites.py +51 -52
datahub/api/entities/forms/forms.py +3 -3
datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
datahub/cli/quickstart_versioning.py +1 -1
datahub/cli/specific/assertions_cli.py +37 -2
datahub/cli/specific/datacontract_cli.py +54 -4
datahub/emitter/rest_emitter.py +18 -5
datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +1 -1
datahub/ingestion/api/report.py +21 -2
datahub/ingestion/api/source.py +81 -7
datahub/ingestion/autogenerated/capability_summary.json +47 -19
datahub/ingestion/graph/client.py +19 -3
datahub/ingestion/sink/datahub_rest.py +2 -0
datahub/ingestion/source/abs/config.py +1 -1
datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
datahub/ingestion/source/abs/source.py +9 -0
datahub/ingestion/source/aws/glue.py +18 -2
datahub/ingestion/source/aws/tag_entities.py +4 -4
datahub/ingestion/source/data_lake_common/path_spec.py +6 -3
datahub/ingestion/source/datahub/datahub_source.py +8 -1
datahub/ingestion/source/dbt/dbt_cloud.py +6 -3
datahub/ingestion/source/dbt/dbt_common.py +10 -0
datahub/ingestion/source/delta_lake/source.py +8 -1
datahub/ingestion/source/dremio/dremio_source.py +19 -2
datahub/ingestion/source/fivetran/fivetran.py +9 -3
datahub/ingestion/source/fivetran/fivetran_log_api.py +4 -3
datahub/ingestion/source/ge_data_profiler.py +8 -0
datahub/ingestion/source/grafana/models.py +6 -0
datahub/ingestion/source/hex/hex.py +1 -1
datahub/ingestion/source/hex/query_fetcher.py +1 -1
datahub/ingestion/source/iceberg/iceberg.py +4 -4
datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
datahub/ingestion/source/mock_data/datahub_mock_data.py +26 -10
datahub/ingestion/source/powerbi/powerbi.py +4 -1
datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
datahub/ingestion/source/redshift/datashares.py +1 -1
datahub/ingestion/source/redshift/redshift.py +1 -0
datahub/ingestion/source/salesforce.py +8 -0
datahub/ingestion/source/slack/slack.py +7 -14
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +4 -4
datahub/ingestion/source/sql/athena_properties_extractor.py +2 -2
datahub/ingestion/source/sql/hive_metastore.py +8 -0
datahub/ingestion/source/sql/teradata.py +8 -1
datahub/ingestion/source/sql/trino.py +9 -0
datahub/ingestion/source/tableau/tableau.py +1 -1
datahub/ingestion/source/unity/config.py +36 -1
datahub/ingestion/source/unity/proxy.py +332 -46
datahub/ingestion/source/unity/proxy_types.py +12 -2
datahub/ingestion/source/unity/source.py +91 -34
datahub/ingestion/source/unity/tag_entities.py +5 -5
datahub/ingestion/source/usage/starburst_trino_usage.py +2 -2
datahub/ingestion/transformer/base_transformer.py +8 -5
datahub/metadata/_internal_schema_classes.py +513 -513
datahub/metadata/_urns/urn_defs.py +1684 -1684
datahub/metadata/schema.avsc +16745 -16348
datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
datahub/sdk/entity_client.py +22 -7
datahub/sdk/search_client.py +3 -0
datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
datahub/specific/datajob.py +15 -1
datahub/specific/dataset.py +37 -59
datahub/utilities/mapping.py +29 -2
datahub/utilities/server_config_util.py +2 -1
{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4.dist-info}/WHEEL +0 -0
{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4.dist-info}/licenses/LICENSE +0 -0
{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4.dist-info}/top_level.txt +0 -0

datahub/cli/quickstart_versioning.py CHANGED Viewed

@@ -44,7 +44,7 @@ def get_minimum_supported_version_message(version: str) -> str:
 class QuickstartExecutionPlan(BaseModel):
     composefile_git_ref: str
     docker_tag: str
-    mysql_tag: Optional[str]
+    mysql_tag: Optional[str] = None
 def _is_it_a_version(version: str) -> bool:

datahub/cli/specific/assertions_cli.py CHANGED Viewed

@@ -1,3 +1,8 @@
+"""
+DEPRECATED: This assertions CLI is no longer supported and will be removed in a future version.
+Please use alternative methods for managing assertions in DataHub.
+"""
 import logging
 import os
 from pathlib import Path
@@ -26,7 +31,18 @@ REPORT_FILE_NAME = "compile_report.json"
 @click.group(cls=DefaultGroup, default="upsert")
 def assertions() -> None:
-    """A group of commands to interact with the Assertion entity in DataHub."""
+    """A group of commands to interact with the Assertion entity in DataHub.
+    ⚠️  DEPRECATED: This assertions CLI is no longer supported and will be removed
+    in a future version. Please use alternative methods for managing assertions in DataHub.
+    """
+    click.secho(
+        "⚠️  WARNING: The assertions CLI is deprecated and no longer supported. "
+        "It may be removed in a future version. Please use alternative methods for managing assertions in DataHub.",
+        fg="yellow",
+        bold=True,
+        err=True,
+    )
     pass
@@ -34,7 +50,16 @@ def assertions() -> None:
 @click.option("-f", "--file", required=True, type=click.Path(exists=True))
 @upgrade.check_upgrade
 def upsert(file: str) -> None:
-    """Upsert (create or update) a set of assertions in DataHub."""
+    """Upsert (create or update) a set of assertions in DataHub.
+    ⚠️  DEPRECATED: This command is deprecated and no longer supported.
+    """
+    click.secho(
+        "⚠️  WARNING: The 'upsert' command is deprecated and no longer supported.",
+        fg="yellow",
+        bold=True,
+        err=True,
+    )
     assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file)
@@ -78,7 +103,15 @@ def compile(
     In future, we may introduce separate command to automatically apply these compiled changes
     in assertion platform. Currently, generated result artifacts are stored in target folder
     unless another folder is specified using option `--output-to <folder>`.
+    ⚠️  DEPRECATED: This command is deprecated and no longer supported.
     """
+    click.secho(
+        "⚠️  WARNING: The 'compile' command is deprecated and no longer supported.",
+        fg="yellow",
+        bold=True,
+        err=True,
+    )
     if platform not in ASSERTION_PLATFORMS:
         click.secho(
@@ -146,3 +179,5 @@ def extras_list_to_dict(extras: List[str]) -> Dict[str, str]:
 # Later:
 # 3. execute compiled assertions on assertion platform (Later, requires connection details to platform),
 # 4. cleanup assertions from assertion platform (generate artifacts. optionally execute)
+#
+# NOTE: This entire assertions CLI is deprecated and these TODOs will not be implemented.

datahub/cli/specific/datacontract_cli.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
+import warnings
 from typing import Optional
 import click
@@ -14,15 +15,52 @@ logger = logging.getLogger(__name__)
 @click.group(cls=DefaultGroup, default="upsert")
 def datacontract() -> None:
-    """A group of commands to interact with the DataContract entity in DataHub."""
-    pass
+    """
+    A group of commands to interact with the DataContract entity in DataHub.
+    WARNING: This CLI is DEPRECATED and no longer supported.
+    Please migrate to alternative data contract solutions.
+    """
+    # Issue deprecation warning
+    warnings.warn(
+        "The datacontract CLI is deprecated and no longer supported. "
+        "Please migrate to alternative data contract solutions.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    # Log deprecation message for runtime visibility
+    logger.warning(
+        "DEPRECATED: The datacontract CLI is no longer supported and will be removed in a future version. "
+        "Please migrate to alternative data contract solutions."
+    )
+    # Display deprecation message to user
+    click.secho(
+        "⚠️  WARNING: This datacontract CLI is DEPRECATED and no longer supported.",
+        fg="yellow",
+        bold=True,
+    )
+    click.secho("Please migrate to alternative data contract solutions.", fg="yellow")
 @datacontract.command()
 @click.option("-f", "--file", required=True, type=click.Path(exists=True))
 @upgrade.check_upgrade
 def upsert(file: str) -> None:
-    """Upsert (create or update) a Data Contract in DataHub."""
+    """
+    Upsert (create or update) a Data Contract in DataHub.
+    WARNING: This command is DEPRECATED and no longer supported.
+    """
+    click.secho(
+        "⚠️  WARNING: The 'upsert' command is deprecated and no longer supported.",
+        fg="yellow",
+        bold=True,
+    )
+    logger.warning("DEPRECATED: datacontract upsert command is no longer supported")
     data_contract: DataContract = DataContract.from_yaml(file)
     urn = data_contract.urn
@@ -59,7 +97,19 @@ def upsert(file: str) -> None:
 @click.option("--hard/--soft", required=False, is_flag=True, default=False)
 @upgrade.check_upgrade
 def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
-    """Delete a Data Contract in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
+    """
+    Delete a Data Contract in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata.
+    WARNING: This command is DEPRECATED and no longer supported.
+    """
+    click.secho(
+        "⚠️  WARNING: The 'delete' command is deprecated and no longer supported.",
+        fg="yellow",
+        bold=True,
+    )
+    logger.warning("DEPRECATED: datacontract delete command is no longer supported")
     if not urn:
         if not file:

datahub/emitter/rest_emitter.py CHANGED Viewed

@@ -95,7 +95,7 @@ TRACE_INITIAL_BACKOFF = 1.0  # Start with 1 second
 TRACE_MAX_BACKOFF = 300.0  # Cap at 5 minutes
 TRACE_BACKOFF_FACTOR = 2.0  # Double the wait time each attempt
-# The limit is 16mb. We will use a max of 15mb to have some space
+# The limit is 16,000,000 bytes. We will use a max of 15mb to have some space
 # for overhead like request headers.
 # This applies to pretty much all calls to GMS.
 INGEST_MAX_PAYLOAD_BYTES = int(
@@ -586,6 +586,11 @@ class DataHubRestEmitter(Closeable, Emitter):
             "systemMetadata": system_metadata_obj,
         }
         payload = json.dumps(snapshot)
+        if len(payload) > INGEST_MAX_PAYLOAD_BYTES:
+            logger.warning(
+                f"MCE object has size {len(payload)} that exceeds the max payload size of {INGEST_MAX_PAYLOAD_BYTES}, "
+                "so this metadata will likely fail to be emitted."
+            )
         self._emit_generic(url, payload)
@@ -764,16 +769,24 @@ class DataHubRestEmitter(Closeable, Emitter):
         url = f"{self._gms_server}/aspects?action=ingestProposalBatch"
         mcp_objs = [pre_json_transform(mcp.to_obj()) for mcp in mcps]
+        if len(mcp_objs) == 0:
+            return 0
         # As a safety mechanism, we need to make sure we don't exceed the max payload size for GMS.
         # If we will exceed the limit, we need to break it up into chunks.
-        mcp_obj_chunks: List[List[str]] = []
-        current_chunk_size = INGEST_MAX_PAYLOAD_BYTES
+        mcp_obj_chunks: List[List[str]] = [[]]
+        current_chunk_size = 0
         for mcp_obj in mcp_objs:
+            mcp_identifier = f"{mcp_obj.get('entityUrn')}-{mcp_obj.get('aspectName')}"
             mcp_obj_size = len(json.dumps(mcp_obj))
             if _DATAHUB_EMITTER_TRACE:
                 logger.debug(
-                    f"Iterating through object with size {mcp_obj_size} (type: {mcp_obj.get('aspectName')}"
+                    f"Iterating through object ({mcp_identifier}) with size {mcp_obj_size}"
+                )
+            if mcp_obj_size > INGEST_MAX_PAYLOAD_BYTES:
+                logger.warning(
+                    f"MCP object {mcp_identifier} has size {mcp_obj_size} that exceeds the max payload size of {INGEST_MAX_PAYLOAD_BYTES}, "
+                    "so this metadata will likely fail to be emitted."
                 )
             if (
@@ -786,7 +799,7 @@ class DataHubRestEmitter(Closeable, Emitter):
                 current_chunk_size = 0
             mcp_obj_chunks[-1].append(mcp_obj)
             current_chunk_size += mcp_obj_size
-        if len(mcp_obj_chunks) > 0:
+        if len(mcp_obj_chunks) > 1 or _DATAHUB_EMITTER_TRACE:
             logger.debug(
                 f"Decided to send {len(mcps)} MCP batch in {len(mcp_obj_chunks)} chunks"
             )

datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py CHANGED Viewed

@@ -90,7 +90,7 @@ class EnsureAspectSizeProcessor:
         on GMS side and failure of the entire ingestion. This processor will attempt to trim suspected aspects.
         """
         for wu in stream:
-            logger.debug(f"Ensuring size of workunit: {wu.id}")
+            # logger.debug(f"Ensuring size of workunit: {wu.id}")
             if schema := wu.get_aspect_of_type(SchemaMetadataClass):
                 self.ensure_schema_metadata_size(wu.get_urn(), schema)

datahub/ingestion/api/report.py CHANGED Viewed

@@ -186,11 +186,19 @@ class ExamplesReport(Report, Closeable):
     aspects: Dict[str, Dict[str, int]] = field(
         default_factory=lambda: defaultdict(lambda: defaultdict(int))
     )
+    # This counts existence of aspects for each entity/subtype
+    # This is used for the UI to calculate %age of entities with the aspect
     aspects_by_subtypes: Dict[str, Dict[str, Dict[str, int]]] = field(
         default_factory=lambda: defaultdict(
             lambda: defaultdict(lambda: defaultdict(int))
         )
     )
+    # This counts all aspects for each entity/subtype
+    aspects_by_subtypes_full_count: Dict[str, Dict[str, Dict[str, int]]] = field(
+        default_factory=lambda: defaultdict(
+            lambda: defaultdict(lambda: defaultdict(int))
+        )
+    )
     samples: Dict[str, Dict[str, List[str]]] = field(
         default_factory=lambda: defaultdict(lambda: defaultdict(list))
     )
@@ -399,6 +407,9 @@ class ExamplesReport(Report, Closeable):
         entity_subtype_aspect_counts: Dict[str, Dict[str, Dict[str, int]]] = (
             defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
         )
+        entity_subtype_aspect_counts_exist: Dict[str, Dict[str, Dict[str, int]]] = (
+            defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
+        )
         for row in self._file_based_dict.sql_query(query):
             entity_type = row["entityType"]
             sub_type = row["subTypes"]
@@ -410,15 +421,23 @@ class ExamplesReport(Report, Closeable):
                 entity_subtype_aspect_counts[entity_type][sub_type][aspect] += (
                     aspect_count * count
                 )
+                entity_subtype_aspect_counts_exist[entity_type][sub_type][aspect] += (
+                    count
+                )
         self.aspects.clear()
         self.aspects_by_subtypes.clear()
-        _aspects_seen: Set[str] = set()
+        self.aspects_by_subtypes_full_count.clear()
         for entity_type, subtype_counts in entity_subtype_aspect_counts.items():
             for sub_type, aspect_counts in subtype_counts.items():
                 for aspect, count in aspect_counts.items():
                     self.aspects[entity_type][aspect] += count
-                    _aspects_seen.add(aspect)
+                self.aspects_by_subtypes_full_count[entity_type][sub_type] = dict(
+                    aspect_counts
+                )
+        for entity_type, subtype_counts in entity_subtype_aspect_counts_exist.items():
+            for sub_type, aspect_counts in subtype_counts.items():
                 self.aspects_by_subtypes[entity_type][sub_type] = dict(aspect_counts)
         self.samples.clear()

datahub/ingestion/api/source.py CHANGED Viewed

@@ -81,11 +81,24 @@ class StructuredLogLevel(Enum):
     ERROR = logging.ERROR
+class StructuredLogCategory(Enum):
+    """
+    This is used to categorise the errors mainly based on the biggest impact area
+    This is to be used to help in self-serve understand the impact of any log entry
+    More enums to be added as logs are updated to be self-serve
+    """
+    LINEAGE = "LINEAGE"
+    USAGE = "USAGE"
+    PROFILING = "PROFILING"
 @dataclass
 class StructuredLogEntry(Report):
     title: Optional[str]
     message: str
     context: LossyList[str]
+    log_category: Optional[StructuredLogCategory] = None
 @dataclass
@@ -108,9 +121,10 @@ class StructuredLogs(Report):
         exc: Optional[BaseException] = None,
         log: bool = False,
         stacklevel: int = 1,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
         """
-        Report a user-facing warning for the ingestion run.
+        Report a user-facing log for the ingestion run.
         Args:
             level: The level of the log entry.
@@ -118,6 +132,9 @@ class StructuredLogs(Report):
             title: The category / heading to present on for this message in the UI.
             context: Additional context (e.g. where, how) for the log entry.
             exc: The exception associated with the event. We'll show the stack trace when in debug mode.
+            log_category: The type of the log entry. This is used to categorise the log entry.
+            log: Whether to log the entry to the console.
+            stacklevel: The stack level to use for the log entry.
         """
         # One for this method, and one for the containing report_* call.
@@ -160,6 +177,7 @@ class StructuredLogs(Report):
                 title=title,
                 message=message,
                 context=context_list,
+                log_category=log_category,
             )
         else:
             if context is not None:
@@ -219,9 +237,19 @@ class SourceReport(ExamplesReport):
         context: Optional[str] = None,
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.WARN, message, title, context, exc, log=False
+            StructuredLogLevel.WARN,
+            message,
+            title,
+            context,
+            exc,
+            log=False,
+            log_category=log_category,
         )
     def warning(
@@ -231,9 +259,19 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
         log: bool = True,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.WARN, message, title, context, exc, log=log
+            StructuredLogLevel.WARN,
+            message,
+            title,
+            context,
+            exc,
+            log=log,
+            log_category=log_category,
         )
     def report_failure(
@@ -243,9 +281,19 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
         log: bool = True,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.ERROR, message, title, context, exc, log=log
+            StructuredLogLevel.ERROR,
+            message,
+            title,
+            context,
+            exc,
+            log=log,
+            log_category=log_category,
         )
     def failure(
@@ -255,9 +303,19 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
         log: bool = True,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.ERROR, message, title, context, exc, log=log
+            StructuredLogLevel.ERROR,
+            message,
+            title,
+            context,
+            exc,
+            log=log,
+            log_category=log_category,
         )
     def info(
@@ -267,9 +325,19 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
         log: bool = True,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.INFO, message, title, context, exc, log=log
+            StructuredLogLevel.INFO,
+            message,
+            title,
+            context,
+            exc,
+            log=log,
+            log_category=log_category,
         )
     @contextlib.contextmanager
@@ -279,6 +347,7 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         context: Optional[str] = None,
         level: StructuredLogLevel = StructuredLogLevel.ERROR,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> Iterator[None]:
         # Convenience method that helps avoid boilerplate try/except blocks.
         # TODO: I'm not super happy with the naming here - it's not obvious that this
@@ -287,7 +356,12 @@ class SourceReport(ExamplesReport):
             yield
         except Exception as exc:
             self._structured_logs.report_log(
-                level, message=message, title=title, context=context, exc=exc
+                level,
+                message=message,
+                title=title,
+                context=context,
+                exc=exc,
+                log_category=log_category,
             )
     def __post_init__(self) -> None:

datahub/ingestion/autogenerated/capability_summary.json CHANGED Viewed

@@ -1,9 +1,18 @@
 {
-  "generated_at": "2025-07-24T13:24:05.751563+00:00",
+  "generated_at": "2025-07-31T12:54:30.557618+00:00",
   "generated_by": "metadata-ingestion/scripts/capability_summary.py",
   "plugin_details": {
     "abs": {
       "capabilities": [
+        {
+          "capability": "CONTAINERS",
+          "description": "Extract ABS containers and folders",
+          "subtype_modifier": [
+            "Folder",
+            "ABS container"
+          ],
+          "supported": true
+        },
         {
           "capability": "DATA_PROFILING",
           "description": "Optionally enabled via configuration",
@@ -468,7 +477,9 @@
         {
           "capability": "CONTAINERS",
           "description": "Enabled by default",
-          "subtype_modifier": null,
+          "subtype_modifier": [
+            "Database"
+          ],
           "supported": true
         },
         {
@@ -531,13 +542,6 @@
       "platform_name": "File Based Lineage",
       "support_status": "CERTIFIED"
     },
-    "datahub-mock-data": {
-      "capabilities": [],
-      "classname": "datahub.ingestion.source.mock_data.datahub_mock_data.DataHubMockDataSource",
-      "platform_id": "datahubmockdata",
-      "platform_name": "DataHubMockData",
-      "support_status": "TESTING"
-    },
     "dbt": {
       "capabilities": [
         {
@@ -607,7 +611,9 @@
         {
           "capability": "CONTAINERS",
           "description": "Enabled by default",
-          "subtype_modifier": null,
+          "subtype_modifier": [
+            "Folder"
+          ],
           "supported": true
         },
         {
@@ -643,6 +649,14 @@
           "subtype_modifier": null,
           "supported": true
         },
+        {
+          "capability": "LINEAGE_FINE",
+          "description": "Extract column-level lineage",
+          "subtype_modifier": [
+            "Table"
+          ],
+          "supported": true
+        },
         {
           "capability": "DATA_PROFILING",
           "description": "Optionally enabled via configuration",
@@ -688,7 +702,9 @@
         {
           "capability": "LINEAGE_COARSE",
           "description": "Enabled by default",
-          "subtype_modifier": null,
+          "subtype_modifier": [
+            "Table"
+          ],
           "supported": true
         }
       ],
@@ -1229,8 +1245,7 @@
           "capability": "CONTAINERS",
           "description": "Enabled by default",
           "subtype_modifier": [
-            "Database",
-            "Schema"
+            "Catalog"
           ],
           "supported": true
         },
@@ -2387,8 +2402,9 @@
         },
         {
           "capability": "LINEAGE_COARSE",
-          "description": "Enabled by default to get lineage for views via `include_view_lineage`",
+          "description": "Extract table-level lineage",
           "subtype_modifier": [
+            "Table",
             "View"
           ],
           "supported": true
@@ -2411,8 +2427,7 @@
           "capability": "CONTAINERS",
           "description": "Enabled by default",
           "subtype_modifier": [
-            "Database",
-            "Schema"
+            "Catalog"
           ],
           "supported": true
         },
@@ -2598,7 +2613,8 @@
           "capability": "CONTAINERS",
           "description": "Enabled by default",
           "subtype_modifier": [
-            "Database"
+            "Database",
+            "Schema"
           ],
           "supported": true
         },
@@ -2812,6 +2828,15 @@
           "description": "Enabled by default",
           "subtype_modifier": null,
           "supported": true
+        },
+        {
+          "capability": "LINEAGE_COARSE",
+          "description": "Extract table-level lineage for Salesforce objects",
+          "subtype_modifier": [
+            "Custom Object",
+            "Object"
+          ],
+          "supported": true
         }
       ],
       "classname": "datahub.ingestion.source.salesforce.SalesforceSource",
@@ -3207,7 +3232,9 @@
         {
           "capability": "CONTAINERS",
           "description": "Enabled by default",
-          "subtype_modifier": null,
+          "subtype_modifier": [
+            "Database"
+          ],
           "supported": true
         },
         {
@@ -3339,8 +3366,9 @@
         },
         {
           "capability": "LINEAGE_COARSE",
-          "description": "Enabled by default to get lineage for views via `include_view_lineage`",
+          "description": "Extract table-level lineage",
           "subtype_modifier": [
+            "Table",
             "View"
           ],
           "supported": true

datahub/ingestion/graph/client.py CHANGED Viewed

@@ -76,7 +76,15 @@ from datahub.metadata.schema_classes import (
     SystemMetadataClass,
     TelemetryClientIdClass,
 )
-from datahub.metadata.urns import CorpUserUrn, Urn
+from datahub.metadata.urns import (
+    CorpUserUrn,
+    MlFeatureTableUrn,
+    MlFeatureUrn,
+    MlModelGroupUrn,
+    MlModelUrn,
+    MlPrimaryKeyUrn,
+    Urn,
+)
 from datahub.telemetry.telemetry import telemetry_instance
 from datahub.utilities.perf_timer import PerfTimer
 from datahub.utilities.str_enum import StrEnum
@@ -118,8 +126,16 @@ def entity_type_to_graphql(entity_type: str) -> str:
     """Convert the entity types into GraphQL "EntityType" enum values."""
     # Hard-coded special cases.
-    if entity_type == CorpUserUrn.ENTITY_TYPE:
-        return "CORP_USER"
+    special_cases = {
+        CorpUserUrn.ENTITY_TYPE: "CORP_USER",
+        MlModelUrn.ENTITY_TYPE: "MLMODEL",
+        MlModelGroupUrn.ENTITY_TYPE: "MLMODEL_GROUP",
+        MlFeatureTableUrn.ENTITY_TYPE: "MLFEATURE_TABLE",
+        MlFeatureUrn.ENTITY_TYPE: "MLFEATURE",
+        MlPrimaryKeyUrn.ENTITY_TYPE: "MLPRIMARY_KEY",
+    }
+    if entity_type in special_cases:
+        return special_cases[entity_type]
     # Convert camelCase to UPPER_UNDERSCORE.
     entity_type = (

datahub/ingestion/sink/datahub_rest.py CHANGED Viewed

@@ -92,6 +92,7 @@ class DatahubRestSinkConfig(DatahubClientConfig):
 @dataclasses.dataclass
 class DataHubRestSinkReport(SinkReport):
     mode: Optional[RestSinkMode] = None
+    endpoint: Optional[RestSinkEndpoint] = None
     max_threads: Optional[int] = None
     gms_version: Optional[str] = None
     pending_requests: int = 0
@@ -142,6 +143,7 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
         self.report.gms_version = gms_config.service_version
         self.report.mode = self.config.mode
+        self.report.endpoint = self.config.endpoint
         self.report.max_threads = self.config.max_threads
         logger.debug("Setting env variables to override config")
         logger.debug("Setting gms config")

datahub/ingestion/source/abs/config.py CHANGED Viewed

@@ -151,7 +151,7 @@ class DataLakeSourceConfig(
             raise ValueError("platform must not be empty")
         return platform
-    @pydantic.root_validator()
+    @pydantic.root_validator(skip_on_failure=True)
     def ensure_profiling_pattern_is_passed_to_profiling(
         cls, values: Dict[str, Any]
     ) -> Dict[str, Any]:

acryl-datahub 1.2.0.3rc1__py3-none-any.whl → 1.2.0.4__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.2.0.3rc1py3-none-any.whl → 1.2.0.4py3-none-any.whl