PyPI - acryl-datahub - Versions diffs - 1.0.0.3rc12__py3-none-any.whl → 1.0.0.4rc1__py3-none-any.whl - Mend

acryl-datahub 1.0.0.3rc12py3-none-any.whl → 1.0.0.4rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (36) hide show

{acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/METADATA +2509 -2512
{acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/RECORD +36 -33
{acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/WHEEL +1 -1
datahub/_version.py +1 -1
datahub/emitter/request_helper.py +10 -5
datahub/emitter/rest_emitter.py +183 -106
datahub/ingestion/extractor/schema_util.py +17 -1
datahub/ingestion/graph/client.py +17 -4
datahub/ingestion/graph/links.py +53 -0
datahub/ingestion/sink/datahub_rest.py +11 -10
datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
datahub/ingestion/source/fivetran/config.py +1 -1
datahub/ingestion/source/ge_data_profiler.py +25 -0
datahub/ingestion/source/snowflake/snowflake_config.py +1 -12
datahub/ingestion/source/snowflake/snowflake_connection.py +5 -17
datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
datahub/ingestion/source/sql/athena.py +2 -1
datahub/ingestion/source/sql/hive_metastore.py +1 -1
datahub/ingestion/source/sql/mssql/source.py +1 -1
datahub/ingestion/source/sql/sql_config.py +1 -34
datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
datahub/ingestion/source/unity/config.py +2 -1
datahub/metadata/_internal_schema_classes.py +503 -490
datahub/metadata/_urns/urn_defs.py +1528 -1528
datahub/metadata/schema.avsc +15431 -15414
datahub/metadata/schemas/Operation.avsc +17 -0
datahub/sdk/main_client.py +15 -0
datahub/sql_parsing/_sqlglot_patch.py +1 -2
datahub/sql_parsing/sql_parsing_aggregator.py +3 -2
datahub/utilities/server_config_util.py +14 -75
{acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/licenses/LICENSE +0 -0
{acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/top_level.txt +0 -0

datahub/emitter/rest_emitter.py CHANGED Viewed

@@ -20,6 +20,7 @@ from typing import (
     Sequence,
     Tuple,
     Union,
+    overload,
 )
 import pydantic
@@ -103,9 +104,28 @@ BATCH_INGEST_MAX_PAYLOAD_LENGTH = int(
 )
-class RestTraceMode(ConfigEnum):
-    ENABLED = auto()
-    DISABLED = auto()
+class EmitMode(ConfigEnum):
+    # Fully synchronous processing that updates both primary storage (SQL) and search storage (Elasticsearch) before returning.
+    # Provides the strongest consistency guarantee but with the highest cost. Best for critical operations where immediate
+    # searchability and consistent reads are required.
+    SYNC_WAIT = auto()
+    # Synchronously updates the primary storage (SQL) but asynchronously updates search storage (Elasticsearch). Provides
+    # a balance between consistency and performance. Suitable for updates that need to be immediately reflected in direct
+    # entity retrievals but where search index consistency can be slightly delayed.
+    SYNC_PRIMARY = auto()
+    # Queues the metadata change for asynchronous processing and returns immediately. The client continues execution without
+    # waiting for the change to be fully processed. Best for high-throughput scenarios where eventual consistency is acceptable.
+    ASYNC = auto()
+    # Queues the metadata change asynchronously but blocks until confirmation that the write has been fully persisted.
+    # More efficient than fully synchronous operations due to backend parallelization and batching while still providing
+    # strong consistency guarantees. Useful when you need confirmation of successful persistence without sacrificing performance.
+    ASYNC_WAIT = auto()
+_DEFAULT_EMIT_MODE = pydantic.parse_obj_as(
+    EmitMode,
+    os.getenv("DATAHUB_EMIT_MODE", EmitMode.SYNC_PRIMARY),
+)
 class RestSinkEndpoint(ConfigEnum):
@@ -119,13 +139,6 @@ DEFAULT_REST_EMITTER_ENDPOINT = pydantic.parse_obj_as(
 )
-# Supported with v1.0
-DEFAULT_REST_TRACE_MODE = pydantic.parse_obj_as(
-    RestTraceMode,
-    os.getenv("DATAHUB_REST_TRACE_MODE", RestTraceMode.DISABLED),
-)
 class RequestsSessionConfig(ConfigModel):
     timeout: Union[float, Tuple[float, float], None] = _DEFAULT_TIMEOUT_SEC
@@ -282,8 +295,7 @@ class DataHubRestEmitter(Closeable, Emitter):
     _token: Optional[str]
     _session: requests.Session
     _openapi_ingestion: Optional[bool]
-    _default_trace_mode: bool
-    server_config: RestServiceConfig
+    _server_config: RestServiceConfig
     def __init__(
         self,
@@ -300,7 +312,6 @@ class DataHubRestEmitter(Closeable, Emitter):
         client_certificate_path: Optional[str] = None,
         disable_ssl_verification: bool = False,
         openapi_ingestion: Optional[bool] = None,
-        default_trace_mode: bool = False,
         client_mode: Optional[ClientMode] = None,
         datahub_component: Optional[str] = None,
     ):
@@ -314,15 +325,11 @@ class DataHubRestEmitter(Closeable, Emitter):
         self._gms_server = fixup_gms_url(gms_server)
         self._token = token
-        self._default_trace_mode = default_trace_mode
         self._session = requests.Session()
         self._openapi_ingestion = (
             openapi_ingestion  # Re-evaluated after test connection
         )
-        if self._default_trace_mode:
-            logger.debug("Using API Tracing for ingestion.")
         headers = {
             "X-RestLi-Protocol-Version": "2.0.0",
             "Content-Type": "application/json",
@@ -376,50 +383,88 @@ class DataHubRestEmitter(Closeable, Emitter):
         self._session = self._session_config.build_session()
-    def test_connection(self) -> None:
-        url = f"{self._gms_server}/config"
-        try:
-            # Create a config instance with session and URL
-            config = RestServiceConfig(session=self._session, url=url)
-            # Attempt to load config, which will throw ConfigurationError if there's an issue
-            config.fetch_config()
-            self.server_config = config
-            # Determine OpenAPI mode
-            if self._openapi_ingestion is None:
-                # No constructor parameter
-                if (
-                    not os.getenv("DATAHUB_REST_EMITTER_DEFAULT_ENDPOINT")
-                    and self._session_config.client_mode == ClientMode.SDK
-                    and self.server_config.supports_feature(ServiceFeature.OPEN_API_SDK)
-                ):
-                    # Enable if SDK client and no environment variable specified
-                    self._openapi_ingestion = True
-                else:
-                    # The system env is specifying the value
-                    self._openapi_ingestion = (
-                        DEFAULT_REST_EMITTER_ENDPOINT == RestSinkEndpoint.OPENAPI
+    @property
+    def server_config(self) -> RestServiceConfig:
+        return self.fetch_server_config()
+    # TODO: This should move to DataHubGraph once it no longer inherits from DataHubRestEmitter
+    def fetch_server_config(self) -> RestServiceConfig:
+        """
+        Fetch configuration from the server if not already loaded.
+        Returns:
+            The configuration dictionary
+        Raises:
+            ConfigurationError: If there's an error fetching or validating the configuration
+        """
+        if not hasattr(self, "_server_config") or not self._server_config:
+            if self._session is None or self._gms_server is None:
+                raise ConfigurationError(
+                    "Session and URL are required to load configuration"
+                )
+            url = f"{self._gms_server}/config"
+            response = self._session.get(url)
+            if response.status_code == 200:
+                raw_config = response.json()
+                # Validate that we're connected to the correct service
+                if not raw_config.get("noCode") == "true":
+                    raise ConfigurationError(
+                        "You seem to have connected to the frontend service instead of the GMS endpoint. "
+                        "The rest emitter should connect to DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms). "
+                        "For Acryl users, the endpoint should be https://<name>.acryl.io/gms"
                     )
-            logger.debug(
-                f"Using {'OpenAPI' if self._openapi_ingestion else 'Restli'} for ingestion."
-            )
+                self._server_config = RestServiceConfig(raw_config=raw_config)
+                self._post_fetch_server_config()
-            # Set default tracing for SDK
+            else:
+                logger.debug(
+                    f"Unable to connect to {url} with status_code: {response.status_code}. Response: {response.text}"
+                )
+                if response.status_code == 401:
+                    message = f"Unable to connect to {url} - got an authentication error: {response.text}."
+                else:
+                    message = f"Unable to connect to {url} with status_code: {response.status_code}."
+                message += "\nPlease check your configuration and make sure you are talking to the DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)."
+                raise ConfigurationError(message)
+        return self._server_config
+    def _post_fetch_server_config(self) -> None:
+        # Determine OpenAPI mode
+        if self._openapi_ingestion is None:
+            # No constructor parameter
             if (
-                self._session_config.client_mode == ClientMode.SDK
-                and self.server_config.supports_feature(ServiceFeature.API_TRACING)
+                not os.getenv("DATAHUB_REST_EMITTER_DEFAULT_ENDPOINT")
+                and self._session_config.client_mode == ClientMode.SDK
+                and self._server_config.supports_feature(ServiceFeature.OPEN_API_SDK)
             ):
-                # Enable tracing if using SDK & server supported
-                self._default_trace_mode = True
+                # Enable if SDK client and no environment variable specified
+                self._openapi_ingestion = True
+            else:
+                # The system env is specifying the value
+                self._openapi_ingestion = (
+                    DEFAULT_REST_EMITTER_ENDPOINT == RestSinkEndpoint.OPENAPI
+                )
+        logger.debug(
+            f"Using {'OpenAPI' if self._openapi_ingestion else 'Restli'} for ingestion."
+        )
+        logger.debug(
+            f"{EmitMode.ASYNC_WAIT} {'IS' if self._should_trace(emit_mode=EmitMode.ASYNC_WAIT, warn=False) else 'IS NOT'} supported."
+        )
-        except ConfigurationError as e:
-            # Just re-raise the exception
-            raise e
+    def test_connection(self) -> None:
+        self.fetch_server_config()
-    def get_server_config(self) -> RestServiceConfig:
-        self.test_connection()
-        return self.server_config
+    def get_server_config(self) -> dict:
+        return self.server_config.raw_config
     def to_graph(self) -> "DataHubGraph":
         from datahub.ingestion.graph.client import DataHubGraph
@@ -429,16 +474,14 @@ class DataHubRestEmitter(Closeable, Emitter):
     def _to_openapi_request(
         self,
         mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
-        async_flag: Optional[bool] = None,
-        async_default: bool = False,
+        emit_mode: EmitMode,
     ) -> Optional[OpenApiRequest]:
         """
         Convert a MetadataChangeProposal to an OpenAPI request format.
         Args:
             mcp: The metadata change proposal
-            async_flag: Optional flag to override async behavior
-            async_default: Default async behavior if not specified
+            emit_mode: Client emit mode
         Returns:
             An OpenApiRequest object or None if the MCP doesn't have required fields
@@ -446,8 +489,8 @@ class DataHubRestEmitter(Closeable, Emitter):
         return OpenApiRequest.from_mcp(
             mcp=mcp,
             gms_server=self._gms_server,
-            async_flag=async_flag,
-            async_default=async_default,
+            async_flag=emit_mode in (EmitMode.ASYNC, EmitMode.ASYNC_WAIT),
+            search_sync_flag=emit_mode == EmitMode.SYNC_WAIT,
         )
     def emit(
@@ -459,7 +502,7 @@ class DataHubRestEmitter(Closeable, Emitter):
             UsageAggregation,
         ],
         callback: Optional[Callable[[Exception, str], None]] = None,
-        async_flag: Optional[bool] = None,
+        emit_mode: EmitMode = _DEFAULT_EMIT_MODE,
     ) -> None:
         try:
             if isinstance(item, UsageAggregation):
@@ -467,7 +510,7 @@ class DataHubRestEmitter(Closeable, Emitter):
             elif isinstance(
                 item, (MetadataChangeProposal, MetadataChangeProposalWrapper)
             ):
-                self.emit_mcp(item, async_flag=async_flag)
+                self.emit_mcp(item, emit_mode=emit_mode)
             else:
                 self.emit_mce(item)
         except Exception as e:
@@ -498,41 +541,64 @@ class DataHubRestEmitter(Closeable, Emitter):
         self._emit_generic(url, payload)
+    @overload
+    @deprecated("Use emit_mode instead of async_flag")
     def emit_mcp(
         self,
         mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
+        *,
         async_flag: Optional[bool] = None,
-        trace_flag: Optional[bool] = None,
-        trace_timeout: Optional[timedelta] = timedelta(seconds=3600),
+    ) -> None: ...
+    @overload
+    def emit_mcp(
+        self,
+        mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
+        *,
+        emit_mode: EmitMode = _DEFAULT_EMIT_MODE,
+        wait_timeout: Optional[timedelta] = timedelta(seconds=3600),
+    ) -> None: ...
+    def emit_mcp(
+        self,
+        mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
+        async_flag: Optional[bool] = None,
+        emit_mode: EmitMode = _DEFAULT_EMIT_MODE,
+        wait_timeout: Optional[timedelta] = timedelta(seconds=3600),
     ) -> None:
+        if async_flag is True:
+            emit_mode = EmitMode.ASYNC
         ensure_has_system_metadata(mcp)
         trace_data = None
         if self._openapi_ingestion:
-            request = self._to_openapi_request(mcp, async_flag, async_default=False)
+            request = self._to_openapi_request(mcp, emit_mode)
             if request:
                 response = self._emit_generic(
                     request.url, payload=request.payload, method=request.method
                 )
-                if self._should_trace(async_flag, trace_flag):
+                if self._should_trace(emit_mode):
                     trace_data = extract_trace_data(response) if response else None
         else:
             url = f"{self._gms_server}/aspects?action=ingestProposal"
             mcp_obj = pre_json_transform(mcp.to_obj())
-            payload_dict = {"proposal": mcp_obj}
-            if async_flag is not None:
-                payload_dict["async"] = "true" if async_flag else "false"
+            payload_dict = {
+                "proposal": mcp_obj,
+                "async": "true"
+                if emit_mode in (EmitMode.ASYNC, EmitMode.ASYNC_WAIT)
+                else "false",
+            }
             payload = json.dumps(payload_dict)
             response = self._emit_generic(url, payload)
-            if self._should_trace(async_flag, trace_flag):
+            if self._should_trace(emit_mode):
                 trace_data = (
                     extract_trace_data_from_mcps(response, [mcp]) if response else None
                 )
@@ -540,15 +606,14 @@ class DataHubRestEmitter(Closeable, Emitter):
         if trace_data:
             self._await_status(
                 [trace_data],
-                trace_timeout,
+                wait_timeout,
             )
     def emit_mcps(
         self,
         mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
-        async_flag: Optional[bool] = None,
-        trace_flag: Optional[bool] = None,
-        trace_timeout: Optional[timedelta] = timedelta(seconds=3600),
+        emit_mode: EmitMode = _DEFAULT_EMIT_MODE,
+        wait_timeout: Optional[timedelta] = timedelta(seconds=3600),
     ) -> int:
         if _DATAHUB_EMITTER_TRACE:
             logger.debug(f"Attempting to emit MCP batch of size {len(mcps)}")
@@ -557,16 +622,15 @@ class DataHubRestEmitter(Closeable, Emitter):
             ensure_has_system_metadata(mcp)
         if self._openapi_ingestion:
-            return self._emit_openapi_mcps(mcps, async_flag, trace_flag, trace_timeout)
+            return self._emit_openapi_mcps(mcps, emit_mode, wait_timeout)
         else:
-            return self._emit_restli_mcps(mcps, async_flag)
+            return self._emit_restli_mcps(mcps, emit_mode)
     def _emit_openapi_mcps(
         self,
         mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
-        async_flag: Optional[bool] = None,
-        trace_flag: Optional[bool] = None,
-        trace_timeout: Optional[timedelta] = timedelta(seconds=3600),
+        emit_mode: EmitMode,
+        wait_timeout: Optional[timedelta] = timedelta(seconds=3600),
     ) -> int:
         """
         1. Grouping MCPs by their HTTP method and entity URL and HTTP method
@@ -580,9 +644,8 @@ class DataHubRestEmitter(Closeable, Emitter):
         The joining logic is efficient with a simple string concatenation
         :param mcps: metadata change proposals to transmit
-        :param async_flag: the mode
-        :param trace_flag: whether to trace the requests
-        :param trace_timeout: timeout for tracing
+        :param emit_mode: the mode to emit the MCPs
+        :param wait_timeout: timeout for blocking queue
         :return: number of requests
         """
         # Group by entity URL and HTTP method
@@ -591,7 +654,7 @@ class DataHubRestEmitter(Closeable, Emitter):
         )  # Initialize with one empty Chunk
         for mcp in mcps:
-            request = self._to_openapi_request(mcp, async_flag, async_default=True)
+            request = self._to_openapi_request(mcp, emit_mode)
             if request:
                 # Create a composite key with both method and URL
                 key = (request.method, request.url)
@@ -621,7 +684,7 @@ class DataHubRestEmitter(Closeable, Emitter):
                 )
                 responses.append(response)
-        if self._should_trace(async_flag, trace_flag, async_default=True):
+        if self._should_trace(emit_mode):
             trace_data = []
             for response in responses:
                 data = extract_trace_data(response) if response else None
@@ -629,14 +692,14 @@ class DataHubRestEmitter(Closeable, Emitter):
                     trace_data.append(data)
             if trace_data:
-                self._await_status(trace_data, trace_timeout)
+                self._await_status(trace_data, wait_timeout)
         return len(responses)
     def _emit_restli_mcps(
         self,
         mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
-        async_flag: Optional[bool] = None,
+        emit_mode: EmitMode,
     ) -> int:
         url = f"{self._gms_server}/aspects?action=ingestProposalBatch"
@@ -671,9 +734,12 @@ class DataHubRestEmitter(Closeable, Emitter):
         for mcp_obj_chunk in mcp_obj_chunks:
             # TODO: We're calling json.dumps on each MCP object twice, once to estimate
             # the size when chunking, and again for the actual request.
-            payload_dict: dict = {"proposals": mcp_obj_chunk}
-            if async_flag is not None:
-                payload_dict["async"] = "true" if async_flag else "false"
+            payload_dict: dict = {
+                "proposals": mcp_obj_chunk,
+                "async": "true"
+                if emit_mode in (EmitMode.ASYNC, EmitMode.ASYNC_WAIT)
+                else "false",
+            }
             payload = json.dumps(payload_dict)
             self._emit_generic(url, payload)
@@ -747,7 +813,7 @@ class DataHubRestEmitter(Closeable, Emitter):
     def _await_status(
         self,
         trace_data: List[TraceData],
-        trace_timeout: Optional[timedelta] = timedelta(seconds=3600),
+        wait_timeout: Optional[timedelta] = timedelta(seconds=3600),
     ) -> None:
         """Verify the status of asynchronous write operations.
         Args:
@@ -757,8 +823,8 @@ class DataHubRestEmitter(Closeable, Emitter):
             TraceTimeoutError: If verification fails or times out
             TraceValidationError: Expected write was not completed successfully
         """
-        if trace_timeout is None:
-            raise ValueError("trace_timeout cannot be None")
+        if wait_timeout is None:
+            raise ValueError("wait_timeout cannot be None")
         try:
             if not trace_data:
@@ -771,9 +837,9 @@ class DataHubRestEmitter(Closeable, Emitter):
                 current_backoff = TRACE_INITIAL_BACKOFF
                 while trace.data:
-                    if datetime.now() - start_time > trace_timeout:
+                    if datetime.now() - start_time > wait_timeout:
                         raise TraceTimeoutError(
-                            f"Timeout waiting for async write completion after {trace_timeout.total_seconds()} seconds"
+                            f"Timeout waiting for async write completion after {wait_timeout.total_seconds()} seconds"
                         )
                     base_url = f"{self._gms_server}/openapi/v1/trace/write"
@@ -825,17 +891,28 @@ class DataHubRestEmitter(Closeable, Emitter):
             logger.error(f"Error during status verification: {str(e)}")
             raise
-    def _should_trace(
-        self,
-        async_flag: Optional[bool] = None,
-        trace_flag: Optional[bool] = None,
-        async_default: bool = False,
-    ) -> bool:
-        resolved_trace_flag = (
-            trace_flag if trace_flag is not None else self._default_trace_mode
-        )
-        resolved_async_flag = async_flag if async_flag is not None else async_default
-        return resolved_trace_flag and resolved_async_flag
+    def _should_trace(self, emit_mode: EmitMode, warn: bool = True) -> bool:
+        if emit_mode == EmitMode.ASYNC_WAIT:
+            if not bool(self._openapi_ingestion):
+                if warn:
+                    logger.warning(
+                        f"{emit_mode} requested but is only available when using OpenAPI."
+                    )
+                return False
+            elif getattr(
+                self, "server_config", None
+            ) is None or not self.server_config.supports_feature(
+                ServiceFeature.API_TRACING
+            ):
+                if warn:
+                    logger.warning(
+                        f"{emit_mode} requested but is only available with a newer GMS version."
+                    )
+                return False
+            else:
+                return True
+        else:
+            return False
     def __repr__(self) -> str:
         token_str = (

datahub/ingestion/extractor/schema_util.py CHANGED Viewed

@@ -290,6 +290,12 @@ class AvroToMceSchemaConverter:
           This way we can use the type/description of the non-null type if needed.
         """
+        # props to skip when building jsonProps
+        json_props_to_skip = [
+            "_nullable",
+            "native_data_type",
+        ]
         def __init__(
             self,
             schema: SchemaOrField,
@@ -407,6 +413,16 @@ class AvroToMceSchemaConverter:
                     or self._actual_schema.props.get("logicalType"),
                 )
+                json_props: Optional[Dict[str, Any]] = (
+                    {
+                        k: v
+                        for k, v in merged_props.items()
+                        if k not in self.json_props_to_skip
+                    }
+                    if merged_props
+                    else None
+                )
                 field = SchemaField(
                     fieldPath=field_path,
                     # Populate it with the simple native type for now.
@@ -421,7 +437,7 @@ class AvroToMceSchemaConverter:
                     isPartOfKey=self._converter._is_key_schema,
                     globalTags=tags_aspect,
                     glossaryTerms=meta_terms_aspect,
-                    jsonProps=json.dumps(merged_props) if merged_props else None,
+                    jsonProps=json.dumps(json_props) if json_props else None,
                 )
                 yield field

datahub/ingestion/graph/client.py CHANGED Viewed

@@ -34,9 +34,7 @@ from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
 from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import (
-    DEFAULT_REST_TRACE_MODE,
     DatahubRestEmitter,
-    RestTraceMode,
 )
 from datahub.emitter.serialization_helper import post_json_transform
 from datahub.ingestion.graph.config import (
@@ -54,6 +52,7 @@ from datahub.ingestion.graph.filters import (
     RemovedStatusFilter,
     generate_filter,
 )
+from datahub.ingestion.graph.links import make_url_for_urn
 from datahub.ingestion.source.state.checkpoint import Checkpoint
 from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
     MetadataChangeEvent,
@@ -158,7 +157,6 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
             client_certificate_path=self.config.client_certificate_path,
             disable_ssl_verification=self.config.disable_ssl_verification,
             openapi_ingestion=self.config.openapi_ingestion,
-            default_trace_mode=DEFAULT_REST_TRACE_MODE == RestTraceMode.ENABLED,
             client_mode=config.client_mode,
             datahub_component=config.datahub_component,
         )
@@ -187,6 +185,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
         """Get the public-facing base url of the frontend
         This url can be used to construct links to the frontend. The url will not include a trailing slash.
         Note: Only supported with DataHub Cloud.
         """
@@ -198,6 +197,20 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
             raise ValueError("baseUrl not found in server config")
         return base_url
+    def url_for(self, entity_urn: Union[str, Urn]) -> str:
+        """Get the UI url for an entity.
+        Note: Only supported with DataHub Cloud.
+        Args:
+            entity_urn: The urn of the entity to get the url for.
+        Returns:
+            The public-facing url for the entity.
+        """
+        return make_url_for_urn(self.frontend_base_url, str(entity_urn))
     @classmethod
     def from_emitter(cls, emitter: DatahubRestEmitter) -> "DataHubGraph":
         session_config = emitter._session_config
@@ -361,7 +374,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
         )
     def get_config(self) -> Dict[str, Any]:
-        return self.get_server_config().config
+        return self.server_config.raw_config
     def get_ownership(self, entity_urn: str) -> Optional[OwnershipClass]:
         return self.get_aspect(entity_urn=entity_urn, aspect_type=OwnershipClass)

datahub/ingestion/graph/links.py ADDED Viewed

@@ -0,0 +1,53 @@
+from typing import Optional
+import datahub.metadata.urns as urns
+from datahub.utilities.urns.urn import guess_entity_type
+_url_prefixes = {
+    # Atypical mappings.
+    urns.DataJobUrn.ENTITY_TYPE: "tasks",
+    urns.DataFlowUrn.ENTITY_TYPE: "pipelines",
+    urns.CorpUserUrn.ENTITY_TYPE: "user",
+    urns.CorpGroupUrn.ENTITY_TYPE: "group",
+    # Normal mappings - matches the entity type.
+    urns.ChartUrn.ENTITY_TYPE: "chart",
+    urns.ContainerUrn.ENTITY_TYPE: "container",
+    urns.DataProductUrn.ENTITY_TYPE: "dataProduct",
+    urns.DatasetUrn.ENTITY_TYPE: "dataset",
+    urns.DashboardUrn.ENTITY_TYPE: "dashboard",
+    urns.DomainUrn.ENTITY_TYPE: "domain",
+    urns.GlossaryNodeUrn.ENTITY_TYPE: "glossaryNode",
+    urns.GlossaryTermUrn.ENTITY_TYPE: "glossaryTerm",
+    urns.TagUrn.ENTITY_TYPE: "tag",
+}
+def make_url_for_urn(
+    frontend_base_url: str,
+    entity_urn: str,
+    *,
+    tab: Optional[str] = None,
+) -> str:
+    """Build the public-facing URL for an entity urn.
+    Args:
+        frontend_url: The public-facing base url of the frontend.
+        entity_urn: The urn of the entity to get the url for.
+        tab: The tab to deep link into. If not provided, the default tab for the entity will be shown.
+    Returns:
+        The public-facing url for the entity.
+    Examples:
+        >>> make_url_for_urn("https://demo.datahub.com", "urn:li:container:b41c14bc5cb3ccfbb0433c8cbdef2992", tab="Contents")
+        'https://demo.datahub.com/container/urn:li:container:b41c14bc5cb3ccfbb0433c8cbdef2992/Contents'
+        >>> make_url_for_urn("https://demo.datahub.com", "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.actuating,PROD)")
+        'https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.actuating,PROD)/'
+    """
+    entity_type = guess_entity_type(entity_urn)
+    url_prefix = _url_prefixes.get(entity_type, entity_type)
+    url = f"{frontend_base_url}/{url_prefix}/{entity_urn}/"
+    if tab:
+        url += f"{tab}"
+    return url

acryl-datahub 1.0.0.3rc12__py3-none-any.whl → 1.0.0.4rc1__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.0.0.3rc12py3-none-any.whl → 1.0.0.4rc1py3-none-any.whl