PyPI - acryl-datahub - Versions diffs - 1.3.0.1rc2__py3-none-any.whl → 1.3.0.1rc4__py3-none-any.whl - Mend

acryl-datahub 1.3.0.1rc2py3-none-any.whl → 1.3.0.1rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (51) hide show

{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/METADATA +2469 -2467
{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/RECORD +50 -48
datahub/_version.py +1 -1
datahub/api/entities/dataproduct/dataproduct.py +26 -0
datahub/cli/config_utils.py +18 -10
datahub/cli/docker_check.py +2 -1
datahub/cli/docker_cli.py +4 -2
datahub/cli/graphql_cli.py +1422 -0
datahub/cli/quickstart_versioning.py +2 -2
datahub/cli/specific/dataproduct_cli.py +2 -4
datahub/cli/specific/user_cli.py +172 -1
datahub/configuration/env_vars.py +331 -0
datahub/configuration/kafka.py +6 -4
datahub/emitter/mce_builder.py +2 -4
datahub/emitter/rest_emitter.py +15 -15
datahub/entrypoints.py +2 -0
datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
datahub/ingestion/api/source.py +5 -0
datahub/ingestion/graph/client.py +197 -0
datahub/ingestion/graph/config.py +2 -2
datahub/ingestion/sink/datahub_rest.py +6 -5
datahub/ingestion/source/aws/aws_common.py +20 -13
datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -4
datahub/ingestion/source/grafana/models.py +5 -0
datahub/ingestion/source/iceberg/iceberg.py +39 -19
datahub/ingestion/source/kafka_connect/source_connectors.py +4 -1
datahub/ingestion/source/mode.py +13 -0
datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
datahub/ingestion/source/schema_inference/object.py +22 -6
datahub/ingestion/source/snowflake/snowflake_schema.py +2 -2
datahub/ingestion/source/sql/mssql/source.py +7 -1
datahub/ingestion/source/sql/teradata.py +80 -65
datahub/ingestion/source/unity/config.py +31 -0
datahub/ingestion/source/unity/proxy.py +73 -0
datahub/ingestion/source/unity/source.py +27 -70
datahub/ingestion/source/unity/usage.py +46 -4
datahub/metadata/_internal_schema_classes.py +544 -544
datahub/metadata/_urns/urn_defs.py +1728 -1728
datahub/metadata/schema.avsc +15157 -15157
datahub/sql_parsing/sql_parsing_aggregator.py +14 -5
datahub/sql_parsing/sqlglot_lineage.py +7 -0
datahub/telemetry/telemetry.py +8 -3
datahub/utilities/file_backed_collections.py +2 -2
datahub/utilities/is_pytest.py +3 -2
datahub/utilities/logging_manager.py +22 -6
datahub/utilities/sample_data.py +5 -4
datahub/emitter/sql_parsing_builder.py +0 -306
{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/WHEEL +0 -0
{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/licenses/LICENSE +0 -0
{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/top_level.txt +0 -0

datahub/sql_parsing/sql_parsing_aggregator.py CHANGED Viewed

@@ -4,7 +4,6 @@ import enum
 import functools
 import json
 import logging
-import os
 import pathlib
 import tempfile
 import uuid
@@ -14,10 +13,10 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Union, cast
 import datahub.emitter.mce_builder as builder
 import datahub.metadata.schema_classes as models
+from datahub.configuration.env_vars import get_sql_agg_query_log
 from datahub.configuration.time_window_config import get_time_bucket
 from datahub.emitter.mce_builder import get_sys_time, make_ts_millis
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.emitter.sql_parsing_builder import compute_upstream_fields
 from datahub.ingestion.api.closeable import Closeable
 from datahub.ingestion.api.report import Report
 from datahub.ingestion.api.workunit import MetadataWorkUnit
@@ -84,7 +83,7 @@ class QueryLogSetting(enum.Enum):
 _DEFAULT_USER_URN = CorpUserUrn("_ingestion")
 _MISSING_SESSION_ID = "__MISSING_SESSION_ID"
 _DEFAULT_QUERY_LOG_SETTING = QueryLogSetting[
-    os.getenv("DATAHUB_SQL_AGG_QUERY_LOG") or QueryLogSetting.DISABLED.name
+    get_sql_agg_query_log() or QueryLogSetting.DISABLED.name
 ]
 MAX_UPSTREAM_TABLES_COUNT = 300
 MAX_FINEGRAINEDLINEAGE_COUNT = 2000
@@ -868,7 +867,7 @@ class SqlParsingAggregator(Closeable):
                 downstream=parsed.out_tables[0] if parsed.out_tables else None,
                 column_lineage=parsed.column_lineage,
                 # TODO: We need a full list of columns referenced, not just the out tables.
-                column_usage=compute_upstream_fields(parsed),
+                column_usage=self._compute_upstream_fields(parsed),
                 inferred_schema=infer_output_schema(parsed),
                 confidence_score=parsed.debug_info.confidence,
                 extra_info=observed.extra_info,
@@ -1157,7 +1156,7 @@ class SqlParsingAggregator(Closeable):
                 actor=None,
                 upstreams=parsed.in_tables,
                 column_lineage=parsed.column_lineage or [],
-                column_usage=compute_upstream_fields(parsed),
+                column_usage=self._compute_upstream_fields(parsed),
                 confidence_score=parsed.debug_info.confidence,
             )
         )
@@ -1741,6 +1740,16 @@ class SqlParsingAggregator(Closeable):
         return resolved_query
+    @staticmethod
+    def _compute_upstream_fields(
+        result: SqlParsingResult,
+    ) -> Dict[UrnStr, Set[UrnStr]]:
+        upstream_fields: Dict[UrnStr, Set[UrnStr]] = defaultdict(set)
+        for cl in result.column_lineage or []:
+            for upstream in cl.upstreams:
+                upstream_fields[upstream.table].add(upstream.column)
+        return upstream_fields
     def _gen_usage_statistics_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
         if not self._usage_aggregator:
             return

datahub/sql_parsing/sqlglot_lineage.py CHANGED Viewed

@@ -691,6 +691,13 @@ def _column_level_lineage(
             select_statement=select_statement,
         )
+    # Handle VALUES expressions separately - they have no upstream tables and no column lineage
+    if isinstance(select_statement, sqlglot.exp.Values):
+        return _ColumnLineageWithDebugInfo(
+            column_lineage=[],
+            select_statement=select_statement,
+        )
     assert isinstance(select_statement, _SupportedColumnLineageTypesTuple)
     try:
         root_scope = sqlglot.optimizer.build_scope(select_statement)

datahub/telemetry/telemetry.py CHANGED Viewed

@@ -16,6 +16,11 @@ from datahub._version import __version__, nice_version_name
 from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
 from datahub.cli.env_utils import get_boolean_env_variable
 from datahub.configuration.common import ExceptionWithProps
+from datahub.configuration.env_vars import (
+    get_sentry_dsn,
+    get_sentry_environment,
+    get_telemetry_timeout,
+)
 from datahub.metadata.schema_classes import _custom_package_path
 from datahub.utilities.perf_timer import PerfTimer
@@ -97,11 +102,11 @@ if any(var in os.environ for var in CI_ENV_VARS):
 if _custom_package_path:
     ENV_ENABLED = False
-TIMEOUT = int(os.environ.get("DATAHUB_TELEMETRY_TIMEOUT", "10"))
+TIMEOUT = int(get_telemetry_timeout())
 MIXPANEL_ENDPOINT = "track.datahubproject.io/mp"
 MIXPANEL_TOKEN = "5ee83d940754d63cacbf7d34daa6f44a"
-SENTRY_DSN: Optional[str] = os.environ.get("SENTRY_DSN", None)
-SENTRY_ENVIRONMENT: str = os.environ.get("SENTRY_ENVIRONMENT", "dev")
+SENTRY_DSN: Optional[str] = get_sentry_dsn()
+SENTRY_ENVIRONMENT: str = get_sentry_environment()
 def _default_global_properties() -> Dict[str, Any]:

datahub/utilities/file_backed_collections.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import collections
 import gzip
 import logging
-import os
 import pathlib
 import pickle
 import shutil
@@ -28,6 +27,7 @@ from typing import (
     Union,
 )
+from datahub.configuration.env_vars import get_override_sqlite_version_req
 from datahub.ingestion.api.closeable import Closeable
 from datahub.utilities.sentinels import Unset, unset
@@ -36,7 +36,7 @@ logger: logging.Logger = logging.getLogger(__name__)
 def _get_sqlite_version_override() -> bool:
     """Check if SQLite version requirement should be overridden at runtime."""
-    override_str = os.environ.get("OVERRIDE_SQLITE_VERSION_REQ") or ""
+    override_str = get_override_sqlite_version_req()
     return bool(override_str and override_str.lower() != "false")

datahub/utilities/is_pytest.py CHANGED Viewed

@@ -1,6 +1,7 @@
-import os
 import sys
+from datahub.configuration.env_vars import get_test_mode
 def is_pytest_running() -> bool:
-    return "pytest" in sys.modules and os.environ.get("DATAHUB_TEST_MODE") == "1"
+    return "pytest" in sys.modules and get_test_mode() == "1"

datahub/utilities/logging_manager.py CHANGED Viewed

@@ -15,13 +15,13 @@ import collections
 import contextlib
 import itertools
 import logging
-import os
 import pathlib
 import sys
 from typing import Deque, Iterator, Optional
 import click
+from datahub.configuration.env_vars import get_no_color, get_suppress_logging_manager
 from datahub.utilities.tee_io import TeeIO
 BASE_LOGGING_FORMAT = (
@@ -38,7 +38,7 @@ IN_MEMORY_LOG_BUFFER_SIZE = 2000  # lines
 IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH = 2000  # characters
-NO_COLOR = os.environ.get("NO_COLOR", False)
+NO_COLOR = get_no_color()
 def extract_name_from_filename(filename: str, fallback_name: str) -> str:
@@ -179,6 +179,18 @@ class _LogBuffer:
         return text
+class _ResilientStreamHandler(logging.StreamHandler):
+    """StreamHandler that gracefully handles closed streams."""
+    def emit(self, record: logging.LogRecord) -> None:
+        try:
+            super().emit(record)
+        except (ValueError, OSError):
+            # Stream was closed (e.g., during pytest teardown)
+            # Silently ignore to prevent test failures
+            pass
 class _BufferLogHandler(logging.Handler):
     def __init__(self, storage: _LogBuffer) -> None:
         super().__init__()
@@ -201,7 +213,11 @@ class _BufferLogHandler(logging.Handler):
 def _remove_all_handlers(logger: logging.Logger) -> None:
     for handler in logger.handlers[:]:
         logger.removeHandler(handler)
-        handler.close()
+        try:
+            handler.close()
+        except (ValueError, OSError):
+            # Handler stream may already be closed (e.g., during pytest teardown)
+            pass
 _log_buffer = _LogBuffer(maxlen=IN_MEMORY_LOG_BUFFER_SIZE)
@@ -219,14 +235,14 @@ _default_formatter = logging.Formatter(BASE_LOGGING_FORMAT)
 def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[None]:
     _log_buffer.clear()
-    if os.environ.get("DATAHUB_SUPPRESS_LOGGING_MANAGER") == "1":
+    if get_suppress_logging_manager() == "1":
         # If we're running in pytest, we don't want to configure logging.
         yield
         return
     with contextlib.ExitStack() as stack:
         # Create stdout handler.
-        stream_handler = logging.StreamHandler()
+        stream_handler = _ResilientStreamHandler()
         stream_handler.addFilter(_DatahubLogFilter(debug=debug))
         stream_handler.setFormatter(_stream_formatter)
@@ -237,7 +253,7 @@ def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[N
             tee = TeeIO(sys.stdout, file)
             stack.enter_context(contextlib.redirect_stdout(tee))  # type: ignore
-            file_handler = logging.StreamHandler(file)
+            file_handler = _ResilientStreamHandler(file)
             file_handler.addFilter(_DatahubLogFilter(debug=True))
             file_handler.setFormatter(_default_formatter)
         else:

datahub/utilities/sample_data.py CHANGED Viewed

@@ -1,12 +1,13 @@
-import os
 import pathlib
 import tempfile
 import requests
-DOCKER_COMPOSE_BASE = os.getenv(
-    "DOCKER_COMPOSE_BASE",
-    "https://raw.githubusercontent.com/datahub-project/datahub/master",
+from datahub.configuration.env_vars import get_docker_compose_base
+DOCKER_COMPOSE_BASE = (
+    get_docker_compose_base()
+    or "https://raw.githubusercontent.com/datahub-project/datahub/master"
 )
 BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json"
 BOOTSTRAP_MCES_URL = f"{DOCKER_COMPOSE_BASE}/{BOOTSTRAP_MCES_FILE}"

datahub/emitter/sql_parsing_builder.py DELETED Viewed

@@ -1,306 +0,0 @@
-import logging
-import time
-from collections import defaultdict
-from dataclasses import dataclass, field
-from datetime import datetime
-from typing import Collection, Dict, Iterable, List, Optional, Set
-from datahub.emitter.mce_builder import make_schema_field_urn
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.usage.usage_common import BaseUsageConfig, UsageAggregator
-from datahub.metadata.schema_classes import (
-    AuditStampClass,
-    DatasetLineageTypeClass,
-    FineGrainedLineageClass,
-    FineGrainedLineageDownstreamTypeClass,
-    FineGrainedLineageUpstreamTypeClass,
-    OperationClass,
-    OperationTypeClass,
-    UpstreamClass,
-    UpstreamLineageClass,
-)
-from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult
-from datahub.utilities.file_backed_collections import FileBackedDict
-logger = logging.getLogger(__name__)
-# TODO: Use this over other sources' equivalent code, if possible
-DatasetUrn = str
-FieldUrn = str
-UserUrn = str
-@dataclass
-class LineageEdge:
-    """Stores information about a single lineage edge, from an upstream table to a downstream table."""
-    downstream_urn: DatasetUrn
-    upstream_urn: DatasetUrn
-    audit_stamp: Optional[datetime]
-    actor: Optional[UserUrn]
-    type: str = DatasetLineageTypeClass.TRANSFORMED
-    # Maps downstream_col -> {upstream_col}
-    column_map: Dict[str, Set[str]] = field(default_factory=lambda: defaultdict(set))
-    def gen_upstream_aspect(self) -> UpstreamClass:
-        return UpstreamClass(
-            auditStamp=(
-                AuditStampClass(
-                    time=int(self.audit_stamp.timestamp() * 1000),
-                    actor=self.actor or "",
-                )
-                if self.audit_stamp
-                else None
-            ),
-            dataset=self.upstream_urn,
-            type=self.type,
-        )
-    def gen_fine_grained_lineage_aspects(self) -> Iterable[FineGrainedLineageClass]:
-        for downstream_col, upstream_cols in self.column_map.items():
-            yield FineGrainedLineageClass(
-                upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
-                # Sort to avoid creating multiple aspects in backend with same lineage but different order
-                upstreams=sorted(
-                    make_schema_field_urn(self.upstream_urn, col)
-                    for col in upstream_cols
-                ),
-                downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
-                downstreams=[
-                    make_schema_field_urn(self.downstream_urn, downstream_col)
-                ],
-            )
-@dataclass
-class SqlParsingBuilder:
-    # Open question: does it make sense to iterate over out_tables? When will we have multiple?
-    generate_lineage: bool = True
-    generate_usage_statistics: bool = True
-    generate_operations: bool = True
-    usage_config: Optional[BaseUsageConfig] = None
-    # Maps downstream urn -> upstream urn -> LineageEdge
-    # Builds up a single LineageEdge for each upstream -> downstream pair
-    _lineage_map: FileBackedDict[Dict[DatasetUrn, LineageEdge]] = field(
-        default_factory=FileBackedDict, init=False
-    )
-    # TODO: Replace with FileBackedDict approach like in BigQuery usage
-    _usage_aggregator: UsageAggregator[DatasetUrn] = field(init=False)
-    def __post_init__(self) -> None:
-        if self.usage_config:
-            self._usage_aggregator = UsageAggregator(self.usage_config)
-        elif self.generate_usage_statistics:
-            logger.info("No usage config provided, not generating usage statistics")
-            self.generate_usage_statistics = False
-    def process_sql_parsing_result(
-        self,
-        result: SqlParsingResult,
-        *,
-        query: str,
-        query_timestamp: Optional[datetime] = None,
-        is_view_ddl: bool = False,
-        user: Optional[UserUrn] = None,
-        custom_operation_type: Optional[str] = None,
-        include_urns: Optional[Set[DatasetUrn]] = None,
-        include_column_lineage: bool = True,
-    ) -> Iterable[MetadataWorkUnit]:
-        """Process a single query and yield any generated workunits.
-        Args:
-            result: The result of parsing the query, or a mock result if parsing failed.
-            query: The SQL query to parse and process.
-            query_timestamp: When the query was run.
-            is_view_ddl: Whether the query is a DDL statement that creates a view.
-            user: The urn of the user who ran the query.
-            custom_operation_type: Platform-specific operation type, used if the operation type can't be parsed.
-            include_urns: If provided, only generate workunits for these urns.
-        """
-        downstreams_to_ingest = result.out_tables
-        upstreams_to_ingest = result.in_tables
-        if include_urns:
-            logger.debug(f"Skipping urns {set(downstreams_to_ingest) - include_urns}")
-            downstreams_to_ingest = list(set(downstreams_to_ingest) & include_urns)
-            upstreams_to_ingest = list(set(upstreams_to_ingest) & include_urns)
-        if self.generate_lineage:
-            for downstream_urn in downstreams_to_ingest:
-                # Set explicitly so that FileBackedDict registers any mutations
-                self._lineage_map[downstream_urn] = _merge_lineage_data(
-                    downstream_urn=downstream_urn,
-                    upstream_urns=result.in_tables,
-                    column_lineage=(
-                        result.column_lineage if include_column_lineage else None
-                    ),
-                    upstream_edges=self._lineage_map.get(downstream_urn, {}),
-                    query_timestamp=query_timestamp,
-                    is_view_ddl=is_view_ddl,
-                    user=user,
-                )
-        if self.generate_usage_statistics and query_timestamp is not None:
-            upstream_fields = compute_upstream_fields(result)
-            for upstream_urn in upstreams_to_ingest:
-                self._usage_aggregator.aggregate_event(
-                    resource=upstream_urn,
-                    start_time=query_timestamp,
-                    query=query,
-                    user=user,
-                    fields=sorted(upstream_fields.get(upstream_urn, [])),
-                )
-        if self.generate_operations and query_timestamp is not None:
-            for downstream_urn in downstreams_to_ingest:
-                yield from _gen_operation_workunit(
-                    result,
-                    downstream_urn=downstream_urn,
-                    query_timestamp=query_timestamp,
-                    user=user,
-                    custom_operation_type=custom_operation_type,
-                )
-    def add_lineage(
-        self,
-        downstream_urn: DatasetUrn,
-        upstream_urns: Collection[DatasetUrn],
-        timestamp: Optional[datetime] = None,
-        is_view_ddl: bool = False,
-        user: Optional[UserUrn] = None,
-    ) -> None:
-        """Manually add a single upstream -> downstream lineage edge, e.g. if sql parsing fails."""
-        # Set explicitly so that FileBackedDict registers any mutations
-        self._lineage_map[downstream_urn] = _merge_lineage_data(
-            downstream_urn=downstream_urn,
-            upstream_urns=upstream_urns,
-            column_lineage=None,
-            upstream_edges=self._lineage_map.get(downstream_urn, {}),
-            query_timestamp=timestamp,
-            is_view_ddl=is_view_ddl,
-            user=user,
-        )
-    def gen_workunits(self) -> Iterable[MetadataWorkUnit]:
-        if self.generate_lineage:
-            for mcp in self._gen_lineage_mcps():
-                yield mcp.as_workunit()
-        if self.generate_usage_statistics:
-            yield from self._gen_usage_statistics_workunits()
-    def _gen_lineage_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
-        for downstream_urn in self._lineage_map:
-            upstreams: List[UpstreamClass] = []
-            fine_upstreams: List[FineGrainedLineageClass] = []
-            for edge in self._lineage_map[downstream_urn].values():
-                upstreams.append(edge.gen_upstream_aspect())
-                fine_upstreams.extend(edge.gen_fine_grained_lineage_aspects())
-            if not upstreams:
-                continue
-            upstream_lineage = UpstreamLineageClass(
-                upstreams=sorted(upstreams, key=lambda x: x.dataset),
-                fineGrainedLineages=sorted(
-                    fine_upstreams,
-                    key=lambda x: (x.downstreams, x.upstreams),
-                )
-                or None,
-            )
-            yield MetadataChangeProposalWrapper(
-                entityUrn=downstream_urn, aspect=upstream_lineage
-            )
-    def _gen_usage_statistics_workunits(self) -> Iterable[MetadataWorkUnit]:
-        yield from self._usage_aggregator.generate_workunits(
-            resource_urn_builder=lambda urn: urn, user_urn_builder=lambda urn: urn
-        )
-def _merge_lineage_data(
-    downstream_urn: DatasetUrn,
-    *,
-    upstream_urns: Collection[DatasetUrn],
-    column_lineage: Optional[List[ColumnLineageInfo]],
-    upstream_edges: Dict[DatasetUrn, LineageEdge],
-    query_timestamp: Optional[datetime],
-    is_view_ddl: bool,
-    user: Optional[UserUrn],
-) -> Dict[str, LineageEdge]:
-    for upstream_urn in upstream_urns:
-        edge = upstream_edges.setdefault(
-            upstream_urn,
-            LineageEdge(
-                downstream_urn=downstream_urn,
-                upstream_urn=upstream_urn,
-                audit_stamp=query_timestamp,
-                actor=user,
-                type=(
-                    DatasetLineageTypeClass.VIEW
-                    if is_view_ddl
-                    else DatasetLineageTypeClass.TRANSFORMED
-                ),
-            ),
-        )
-        if query_timestamp and (  # Use the most recent query
-            edge.audit_stamp is None or query_timestamp > edge.audit_stamp
-        ):
-            edge.audit_stamp = query_timestamp
-            if user:
-                edge.actor = user
-    # Note: Inefficient as we loop through all column_lineage entries for each downstream table
-    for cl in column_lineage or []:
-        if cl.downstream.table == downstream_urn:
-            for upstream_column_info in cl.upstreams:
-                if upstream_column_info.table not in upstream_urns:
-                    continue
-                column_map = upstream_edges[upstream_column_info.table].column_map
-                column_map[cl.downstream.column].add(upstream_column_info.column)
-    return upstream_edges
-def compute_upstream_fields(
-    result: SqlParsingResult,
-) -> Dict[DatasetUrn, Set[DatasetUrn]]:
-    upstream_fields: Dict[DatasetUrn, Set[DatasetUrn]] = defaultdict(set)
-    for cl in result.column_lineage or []:
-        for upstream in cl.upstreams:
-            upstream_fields[upstream.table].add(upstream.column)
-    return upstream_fields
-def _gen_operation_workunit(
-    result: SqlParsingResult,
-    *,
-    downstream_urn: DatasetUrn,
-    query_timestamp: datetime,
-    user: Optional[UserUrn],
-    custom_operation_type: Optional[str],
-) -> Iterable[MetadataWorkUnit]:
-    operation_type = result.query_type.to_operation_type()
-    # Filter out SELECT and other undesired statements
-    if operation_type is None:
-        return
-    elif operation_type == OperationTypeClass.UNKNOWN:
-        if custom_operation_type is None:
-            return
-        else:
-            operation_type = OperationTypeClass.CUSTOM
-    aspect = OperationClass(
-        timestampMillis=int(time.time() * 1000),
-        operationType=operation_type,
-        lastUpdatedTimestamp=int(query_timestamp.timestamp() * 1000),
-        actor=user,
-        customOperationType=custom_operation_type,
-    )
-    yield MetadataChangeProposalWrapper(
-        entityUrn=downstream_urn, aspect=aspect
-    ).as_workunit()

{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 1.3.0.1rc2__py3-none-any.whl → 1.3.0.1rc4__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.3.0.1rc2py3-none-any.whl → 1.3.0.1rc4py3-none-any.whl