PyPI - acryl-datahub - Versions diffs - 1.2.0.2rc2__py3-none-any.whl → 1.2.0.3rc1__py3-none-any.whl - Mend

acryl-datahub 1.2.0.2rc2py3-none-any.whl → 1.2.0.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (45) hide show

{acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/METADATA +2620 -2618
{acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/RECORD +45 -37
datahub/_version.py +1 -1
datahub/api/entities/dataset/dataset.py +13 -1
datahub/ingestion/autogenerated/capability_summary.json +97 -6
datahub/ingestion/source/aws/glue.py +8 -0
datahub/ingestion/source/cassandra/cassandra.py +5 -7
datahub/ingestion/source/common/subtypes.py +2 -0
datahub/ingestion/source/datahub/datahub_source.py +3 -0
datahub/ingestion/source/delta_lake/source.py +1 -0
datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
datahub/ingestion/source/grafana/field_utils.py +307 -0
datahub/ingestion/source/grafana/grafana_api.py +142 -0
datahub/ingestion/source/grafana/grafana_config.py +104 -0
datahub/ingestion/source/grafana/grafana_source.py +522 -84
datahub/ingestion/source/grafana/lineage.py +202 -0
datahub/ingestion/source/grafana/models.py +120 -0
datahub/ingestion/source/grafana/report.py +91 -0
datahub/ingestion/source/grafana/types.py +16 -0
datahub/ingestion/source/hex/hex.py +8 -0
datahub/ingestion/source/looker/looker_source.py +9 -0
datahub/ingestion/source/looker/lookml_source.py +8 -0
datahub/ingestion/source/mongodb.py +11 -1
datahub/ingestion/source/redshift/redshift.py +8 -1
datahub/ingestion/source/s3/source.py +9 -1
datahub/ingestion/source/sql/athena.py +8 -2
datahub/ingestion/source/sql/clickhouse.py +9 -0
datahub/ingestion/source/sql/vertica.py +3 -0
datahub/ingestion/source/sql_queries.py +88 -46
datahub/ingestion/source/unity/proxy.py +112 -22
datahub/ingestion/source/unity/source.py +7 -10
datahub/metadata/_internal_schema_classes.py +18 -3
datahub/metadata/schema.avsc +19 -1
datahub/metadata/schemas/DataHubPageModuleProperties.avsc +10 -1
datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
datahub/metadata/schemas/MetadataChangeEvent.avsc +9 -0
datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
datahub/sdk/dataset.py +44 -0
datahub/sdk/search_filters.py +34 -14
datahub/sql_parsing/sql_parsing_aggregator.py +5 -0
datahub/telemetry/telemetry.py +4 -1
{acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/WHEEL +0 -0
{acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/licenses/LICENSE +0 -0
{acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/sql_queries.py CHANGED Viewed

@@ -2,12 +2,13 @@ import json
 import logging
 import os
 from dataclasses import dataclass
-from datetime import datetime, timezone
+from datetime import datetime
 from functools import partial
-from typing import Iterable, List, Optional, Union
+from typing import ClassVar, Iterable, List, Optional, Union
-from pydantic import Field
+from pydantic import BaseModel, Field, validator
+from datahub.configuration.datetimes import parse_user_datetime
 from datahub.configuration.source_common import (
     EnvConfigMixin,
     PlatformInstanceConfigMixin,
@@ -35,7 +36,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.graph.client import DataHubGraph
 from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
-from datahub.metadata.urns import CorpUserUrn
+from datahub.metadata.urns import CorpUserUrn, DatasetUrn
 from datahub.sql_parsing.schema_resolver import SchemaResolver
 from datahub.sql_parsing.sql_parsing_aggregator import (
     KnownQueryLineageInfo,
@@ -73,9 +74,8 @@ class SqlQueriesSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
         default=None,
     )
     override_dialect: Optional[str] = Field(
-        description="DEPRECATED: This field is ignored. SQL dialect detection is now handled automatically by the SQL parsing aggregator based on the platform.",
+        description="The SQL dialect to use when parsing queries. Overrides automatic dialect detection.",
         default=None,
-        hidden_from_docs=True,
     )
@@ -209,19 +209,40 @@ class SqlQueriesSource(Source):
     def _add_query_to_aggregator(self, query_entry: "QueryEntry") -> None:
         """Add a query to the SQL parsing aggregator."""
         try:
-            # If we have explicit lineage, use it directly
-            if query_entry.upstream_tables or query_entry.downstream_tables:
+            # If we have both upstream and downstream tables, use explicit lineage
+            if query_entry.upstream_tables and query_entry.downstream_tables:
                 logger.debug("Using explicit lineage from query file")
                 for downstream_table in query_entry.downstream_tables:
                     known_lineage = KnownQueryLineageInfo(
                         query_text=query_entry.query,
-                        downstream=downstream_table,
-                        upstreams=query_entry.upstream_tables,
+                        downstream=str(downstream_table),
+                        upstreams=[str(urn) for urn in query_entry.upstream_tables],
                         timestamp=query_entry.timestamp,
                         session_id=query_entry.session_id,
                     )
                     self.aggregator.add_known_query_lineage(known_lineage)
             else:
+                # Warn if only partial lineage information is provided
+                # XOR: true if exactly one of upstream_tables or downstream_tables is provided
+                if bool(query_entry.upstream_tables) ^ bool(
+                    query_entry.downstream_tables
+                ):
+                    query_preview = (
+                        query_entry.query[:150] + "..."
+                        if len(query_entry.query) > 150
+                        else query_entry.query
+                    )
+                    missing_upstream = (
+                        "Missing upstream. " if not query_entry.upstream_tables else ""
+                    )
+                    missing_downstream = (
+                        "Missing downstream. "
+                        if not query_entry.downstream_tables
+                        else ""
+                    )
+                    logger.info(
+                        f"Only partial lineage information provided, falling back to SQL parsing for complete lineage detection. {missing_upstream}{missing_downstream}Query: {query_preview}"
+                    )
                 # No explicit lineage, rely on parsing
                 observed_query = ObservedQuery(
                     query=query_entry.query,
@@ -230,6 +251,7 @@ class SqlQueriesSource(Source):
                     session_id=query_entry.session_id,
                     default_db=self.config.default_db,
                     default_schema=self.config.default_schema,
+                    override_dialect=self.config.override_dialect,
                 )
                 self.aggregator.add_observed_query(observed_query)
@@ -243,46 +265,66 @@ class SqlQueriesSource(Source):
             )
-@dataclass
-class QueryEntry:
+class QueryEntry(BaseModel):
     query: str
-    timestamp: Optional[datetime]
-    user: Optional[CorpUserUrn]
-    operation_type: Optional[str]
-    downstream_tables: List[str]
-    upstream_tables: List[str]
+    timestamp: Optional[datetime] = None
+    user: Optional[CorpUserUrn] = None
+    operation_type: Optional[str] = None
+    downstream_tables: List[DatasetUrn] = Field(default_factory=list)
+    upstream_tables: List[DatasetUrn] = Field(default_factory=list)
     session_id: Optional[str] = None
+    # Validation context for URN creation
+    _validation_context: ClassVar[Optional[SqlQueriesSourceConfig]] = None
+    class Config:
+        arbitrary_types_allowed = True
+    @validator("timestamp", pre=True)
+    def parse_timestamp(cls, v):
+        return None if v is None else parse_user_datetime(str(v))
+    @validator("user", pre=True)
+    def parse_user(cls, v):
+        if v is None:
+            return None
+        return v if isinstance(v, CorpUserUrn) else CorpUserUrn(v)
+    @validator("downstream_tables", "upstream_tables", pre=True)
+    def parse_tables(cls, v):
+        if not v:
+            return []
+        result = []
+        for item in v:
+            if isinstance(item, DatasetUrn):
+                result.append(item)
+            elif isinstance(item, str):
+                # Skip empty/whitespace-only strings
+                if item and item.strip():
+                    # Convert to URN using validation context
+                    assert cls._validation_context, (
+                        "Validation context must be set for URN creation"
+                    )
+                    urn_string = make_dataset_urn_with_platform_instance(
+                        name=item,
+                        platform=cls._validation_context.platform,
+                        platform_instance=cls._validation_context.platform_instance,
+                        env=cls._validation_context.env,
+                    )
+                    result.append(DatasetUrn.from_string(urn_string))
+        return result
     @classmethod
     def create(
         cls, entry_dict: dict, *, config: SqlQueriesSourceConfig
     ) -> "QueryEntry":
-        return cls(
-            query=entry_dict["query"],
-            timestamp=(
-                datetime.fromtimestamp(entry_dict["timestamp"], tz=timezone.utc)
-                if "timestamp" in entry_dict
-                else None
-            ),
-            user=CorpUserUrn(entry_dict["user"]) if "user" in entry_dict else None,
-            operation_type=entry_dict.get("operation_type"),
-            downstream_tables=[
-                make_dataset_urn_with_platform_instance(
-                    name=table,
-                    platform=config.platform,
-                    platform_instance=config.platform_instance,
-                    env=config.env,
-                )
-                for table in entry_dict.get("downstream_tables", [])
-            ],
-            upstream_tables=[
-                make_dataset_urn_with_platform_instance(
-                    name=table,
-                    platform=config.platform,
-                    platform_instance=config.platform_instance,
-                    env=config.env,
-                )
-                for table in entry_dict.get("upstream_tables", [])
-            ],
-            session_id=entry_dict.get("session_id"),
-        )
+        """Create QueryEntry from dict with config context."""
+        # Set validation context for URN creation
+        cls._validation_context = config
+        try:
+            return cls.parse_obj(entry_dict)
+        finally:
+            cls._validation_context = None

datahub/ingestion/source/unity/proxy.py CHANGED Viewed

@@ -4,8 +4,9 @@ Manage the communication with DataBricks Server and provide equivalent dataclass
 import dataclasses
 import logging
+from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
-from typing import Any, Dict, Iterable, List, Optional, Union, cast
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Union, cast
 from unittest.mock import patch
 import cachetools
@@ -28,6 +29,7 @@ from databricks.sdk.service.sql import (
 )
 from databricks.sdk.service.workspace import ObjectType
 from databricks.sql import connect
+from databricks.sql.types import Row
 from datahub._version import nice_version_name
 from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
@@ -291,10 +293,59 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
                 method, path, body={**body, "page_token": response["next_page_token"]}
             )
+    @cached(cachetools.FIFOCache(maxsize=100))
+    def get_catalog_column_lineage(self, catalog: str) -> Dict[str, Dict[str, dict]]:
+        """Get column lineage for all tables in a catalog."""
+        logger.info(f"Fetching column lineage for catalog: {catalog}")
+        try:
+            query = """
+                SELECT
+                    source_table_catalog, source_table_schema, source_table_name, source_column_name, source_type,
+                    target_table_schema, target_table_name, target_column_name,
+                    max(event_time)
+                FROM system.access.column_lineage
+                WHERE
+                    target_table_catalog = %s
+                    AND target_table_schema IS NOT NULL
+                    AND target_table_name IS NOT NULL
+                    AND target_column_name IS NOT NULL
+                    AND source_table_catalog IS NOT NULL
+                    AND source_table_schema IS NOT NULL
+                    AND source_table_name IS NOT NULL
+                    AND source_column_name IS NOT NULL
+                GROUP BY
+                    source_table_catalog, source_table_schema, source_table_name, source_column_name,  source_type,
+                    target_table_schema, target_table_name, target_column_name
+                """
+            rows = self._execute_sql_query(query, (catalog,))
+            result_dict: Dict[str, Dict[str, dict]] = {}
+            for row in rows:
+                result_dict.setdefault(row["target_table_schema"], {}).setdefault(
+                    row["target_table_name"], {}
+                ).setdefault(row["target_column_name"], []).append(
+                    # make fields look like the response from the older HTTP API
+                    {
+                        "catalog_name": row["source_table_catalog"],
+                        "schema_name": row["source_table_schema"],
+                        "table_name": row["source_table_name"],
+                        "name": row["source_column_name"],
+                    }
+                )
+            return result_dict
+        except Exception as e:
+            logger.warning(
+                f"Error getting column lineage for catalog {catalog}: {e}",
+                exc_info=True,
+            )
+            return {}
     def list_lineages_by_table(
         self, table_name: str, include_entity_lineage: bool
     ) -> dict:
         """List table lineage by table name."""
+        logger.debug(f"Getting table lineage for {table_name}")
         return self._workspace_client.api_client.do(  # type: ignore
             method="GET",
             path="/api/2.0/lineage-tracking/table-lineage",
@@ -304,13 +355,24 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
             },
         )
-    def list_lineages_by_column(self, table_name: str, column_name: str) -> dict:
+    def list_lineages_by_column(self, table_name: str, column_name: str) -> list:
         """List column lineage by table name and column name."""
-        return self._workspace_client.api_client.do(  # type: ignore
-            "GET",
-            "/api/2.0/lineage-tracking/column-lineage",
-            body={"table_name": table_name, "column_name": column_name},
-        )
+        logger.debug(f"Getting column lineage for {table_name}.{column_name}")
+        try:
+            return (
+                self._workspace_client.api_client.do(  # type: ignore
+                    "GET",
+                    "/api/2.0/lineage-tracking/column-lineage",
+                    body={"table_name": table_name, "column_name": column_name},
+                ).get("upstream_cols")
+                or []
+            )
+        except Exception as e:
+            logger.warning(
+                f"Error getting column lineage on table {table_name}, column {column_name}: {e}",
+                exc_info=True,
+            )
+            return []
     def table_lineage(self, table: Table, include_entity_lineage: bool) -> None:
         if table.schema.catalog.type == CustomCatalogType.HIVE_METASTORE_CATALOG:
@@ -348,23 +410,51 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
                 f"Error getting lineage on table {table.ref}: {e}", exc_info=True
             )
-    def get_column_lineage(self, table: Table, column_name: str) -> None:
+    def get_column_lineage(
+        self,
+        table: Table,
+        column_names: List[str],
+        *,
+        max_workers: Optional[int] = None,
+    ) -> None:
         try:
-            response: dict = self.list_lineages_by_column(
-                table_name=table.ref.qualified_table_name,
-                column_name=column_name,
-            )
-            for item in response.get("upstream_cols") or []:
-                table_ref = TableReference.create_from_lineage(
-                    item, table.schema.catalog.metastore
+            # use the newer system tables if we have a SQL warehouse, otherwise fall back
+            # and use the older (and much slower) HTTP API.
+            if self.warehouse_id:
+                lineage = (
+                    self.get_catalog_column_lineage(table.ref.catalog)
+                    .get(table.ref.schema, {})
+                    .get(table.ref.table, {})
                 )
-                if table_ref:
-                    table.upstreams.setdefault(table_ref, {}).setdefault(
-                        column_name, []
-                    ).append(item["name"])
+            else:
+                with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                    futures = [
+                        executor.submit(
+                            self.list_lineages_by_column,
+                            table.ref.qualified_table_name,
+                            column_name,
+                        )
+                        for column_name in column_names
+                    ]
+                lineage = {
+                    column_name: future.result()
+                    for column_name, future in zip(column_names, futures)
+                }
+            for column_name in column_names:
+                for item in lineage.get(column_name) or []:
+                    table_ref = TableReference.create_from_lineage(
+                        item,
+                        table.schema.catalog.metastore,
+                    )
+                    if table_ref:
+                        table.upstreams.setdefault(table_ref, {}).setdefault(
+                            column_name, []
+                        ).append(item["name"])
         except Exception as e:
             logger.warning(
-                f"Error getting column lineage on table {table.ref}, column {column_name}: {e}",
+                f"Error getting column lineage on table {table.ref}: {e}",
                 exc_info=True,
             )
@@ -504,14 +594,14 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
             executed_as_user_name=info.executed_as_user_name,
         )
-    def _execute_sql_query(self, query: str) -> List[List[str]]:
+    def _execute_sql_query(self, query: str, params: Sequence[Any] = ()) -> List[Row]:
         """Execute SQL query using databricks-sql connector for better performance"""
         try:
             with (
                 connect(**self._sql_connection_params) as connection,
                 connection.cursor() as cursor,
             ):
-                cursor.execute(query)
+                cursor.execute(query, list(params))
                 return cursor.fetchall()
         except Exception as e:

datahub/ingestion/source/unity/source.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import logging
 import re
 import time
-from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
 from urllib.parse import urljoin
@@ -657,15 +656,13 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
             if len(table.columns) > self.config.column_lineage_column_limit:
                 self.report.num_column_lineage_skipped_column_count += 1
-            with ThreadPoolExecutor(
-                max_workers=self.config.lineage_max_workers
-            ) as executor:
-                for column in table.columns[: self.config.column_lineage_column_limit]:
-                    executor.submit(
-                        self.unity_catalog_api_proxy.get_column_lineage,
-                        table,
-                        column.name,
-                    )
+            column_names = [
+                column.name
+                for column in table.columns[: self.config.column_lineage_column_limit]
+            ]
+            self.unity_catalog_api_proxy.get_column_lineage(
+                table, column_names, max_workers=self.config.lineage_max_workers
+            )
         return self._generate_lineage_aspect(self.gen_dataset_urn(table.ref), table)

datahub/metadata/_internal_schema_classes.py CHANGED Viewed

@@ -20163,23 +20163,24 @@ class DataHubPageModuleVisibilityClass(DictWrapper):
 class HierarchyModuleParamsClass(DictWrapper):
-    """The params required if the module is type HIERARCHY_VIEW
-    TODO: add filters
-    relatedEntitiesFilter: optional Filter"""
+    """The params required if the module is type HIERARCHY_VIEW"""
     RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.module.HierarchyModuleParams")
     def __init__(self,
         showRelatedEntities: bool,
         assetUrns: Union[None, List[str]]=None,
+        relatedEntitiesFilterJson: Union[None, str]=None,
     ):
         super().__init__()
         self.assetUrns = assetUrns
         self.showRelatedEntities = showRelatedEntities
+        self.relatedEntitiesFilterJson = relatedEntitiesFilterJson
     def _restore_defaults(self) -> None:
         self.assetUrns = self.RECORD_SCHEMA.fields_dict["assetUrns"].default
         self.showRelatedEntities = bool()
+        self.relatedEntitiesFilterJson = self.RECORD_SCHEMA.fields_dict["relatedEntitiesFilterJson"].default
     @property
@@ -20202,6 +20203,20 @@ class HierarchyModuleParamsClass(DictWrapper):
         self._inner_dict['showRelatedEntities'] = value
+    @property
+    def relatedEntitiesFilterJson(self) -> Union[None, str]:
+        """Optional filters to filter relatedEntities (assetUrns) out
+    The stringified json representing the logical predicate built in the UI to select assets.
+    This predicate is turned into orFilters to send through graphql since graphql doesn't support
+    arbitrary nesting. This string is used to restore the UI for this logical predicate."""
+        return self._inner_dict.get('relatedEntitiesFilterJson')  # type: ignore
+    @relatedEntitiesFilterJson.setter
+    def relatedEntitiesFilterJson(self, value: Union[None, str]) -> None:
+        self._inner_dict['relatedEntitiesFilterJson'] = value
 class LinkModuleParamsClass(DictWrapper):
     # No docs available.

datahub/metadata/schema.avsc CHANGED Viewed

@@ -4319,6 +4319,14 @@
                   "doc": "The type of upstream entity"
                 },
                 {
+                  "Searchable": {
+                    "/*": {
+                      "fieldName": "fineGrainedUpstreams",
+                      "fieldType": "URN",
+                      "hasValuesFieldName": "hasFineGrainedUpstreams",
+                      "queryByDefault": false
+                    }
+                  },
                   "Urn": "Urn",
                   "urn_is_array": true,
                   "type": [
@@ -12875,6 +12883,7 @@
                                   "Searchable": {
                                     "fieldName": "upstreams",
                                     "fieldType": "URN",
+                                    "hasValuesFieldName": "hasUpstreams",
                                     "queryByDefault": false
                                   },
                                   "java": {
@@ -17844,9 +17853,18 @@
                     {
                       "type": "boolean",
                       "name": "showRelatedEntities"
+                    },
+                    {
+                      "type": [
+                        "null",
+                        "string"
+                      ],
+                      "name": "relatedEntitiesFilterJson",
+                      "default": null,
+                      "doc": "Optional filters to filter relatedEntities (assetUrns) out\n\nThe stringified json representing the logical predicate built in the UI to select assets.\nThis predicate is turned into orFilters to send through graphql since graphql doesn't support\narbitrary nesting. This string is used to restore the UI for this logical predicate."
                     }
                   ],
-                  "doc": "The params required if the module is type HIERARCHY_VIEW\nTODO: add filters\nrelatedEntitiesFilter: optional Filter"
+                  "doc": "The params required if the module is type HIERARCHY_VIEW"
                 }
               ],
               "name": "hierarchyViewParams",

datahub/metadata/schemas/DataHubPageModuleProperties.avsc CHANGED Viewed

@@ -181,9 +181,18 @@
                   {
                     "type": "boolean",
                     "name": "showRelatedEntities"
+                  },
+                  {
+                    "type": [
+                      "null",
+                      "string"
+                    ],
+                    "name": "relatedEntitiesFilterJson",
+                    "default": null,
+                    "doc": "Optional filters to filter relatedEntities (assetUrns) out\n\nThe stringified json representing the logical predicate built in the UI to select assets.\nThis predicate is turned into orFilters to send through graphql since graphql doesn't support\narbitrary nesting. This string is used to restore the UI for this logical predicate."
                   }
                 ],
-                "doc": "The params required if the module is type HIERARCHY_VIEW\nTODO: add filters\nrelatedEntitiesFilter: optional Filter"
+                "doc": "The params required if the module is type HIERARCHY_VIEW"
               }
             ],
             "name": "hierarchyViewParams",

datahub/metadata/schemas/DataJobInputOutput.avsc CHANGED Viewed

@@ -375,6 +375,14 @@
                 "doc": "The type of upstream entity"
               },
               {
+                "Searchable": {
+                  "/*": {
+                    "fieldName": "fineGrainedUpstreams",
+                    "fieldType": "URN",
+                    "hasValuesFieldName": "hasFineGrainedUpstreams",
+                    "queryByDefault": false
+                  }
+                },
                 "type": [
                   "null",
                   {

datahub/metadata/schemas/MetadataChangeEvent.avsc CHANGED Viewed

@@ -3070,6 +3070,14 @@
                                   "doc": "The type of upstream entity"
                                 },
                                 {
+                                  "Searchable": {
+                                    "/*": {
+                                      "fieldName": "fineGrainedUpstreams",
+                                      "fieldType": "URN",
+                                      "hasValuesFieldName": "hasFineGrainedUpstreams",
+                                      "queryByDefault": false
+                                    }
+                                  },
                                   "type": [
                                     "null",
                                     {
@@ -3691,6 +3699,7 @@
                                 "Searchable": {
                                   "fieldName": "upstreams",
                                   "fieldType": "URN",
+                                  "hasValuesFieldName": "hasUpstreams",
                                   "queryByDefault": false
                                 },
                                 "java": {

datahub/metadata/schemas/UpstreamLineage.avsc CHANGED Viewed

@@ -94,6 +94,7 @@
               "Searchable": {
                 "fieldName": "upstreams",
                 "fieldType": "URN",
+                "hasValuesFieldName": "hasUpstreams",
                 "queryByDefault": false
               },
               "java": {
@@ -199,6 +200,14 @@
                 "doc": "The type of upstream entity"
               },
               {
+                "Searchable": {
+                  "/*": {
+                    "fieldName": "fineGrainedUpstreams",
+                    "fieldType": "URN",
+                    "hasValuesFieldName": "hasFineGrainedUpstreams",
+                    "queryByDefault": false
+                  }
+                },
                 "type": [
                   "null",
                   {

datahub/sdk/dataset.py CHANGED Viewed

@@ -72,6 +72,11 @@ UpstreamLineageInputType: TypeAlias = Union[
     Dict[DatasetUrnOrStr, ColumnLineageMapping],
 ]
+ViewDefinitionInputType: TypeAlias = Union[
+    str,
+    models.ViewPropertiesClass,
+]
 def _parse_upstream_input(
     upstream_input: UpstreamInputType,
@@ -467,6 +472,7 @@ class Dataset(
         custom_properties: Optional[Dict[str, str]] = None,
         created: Optional[datetime] = None,
         last_modified: Optional[datetime] = None,
+        view_definition: Optional[ViewDefinitionInputType] = None,
         # Standard aspects.
         parent_container: ParentContainerInputType | Unset = unset,
         subtype: Optional[str] = None,
@@ -495,6 +501,7 @@ class Dataset(
             custom_properties: Optional dictionary of custom properties.
             created: Optional creation timestamp.
             last_modified: Optional last modification timestamp.
+            view_definition: Optional view definition for the dataset.
             parent_container: Optional parent container for this dataset.
             subtype: Optional subtype of the dataset.
             owners: Optional list of owners.
@@ -536,6 +543,8 @@ class Dataset(
             self.set_created(created)
         if last_modified is not None:
             self.set_last_modified(last_modified)
+        if view_definition is not None:
+            self.set_view_definition(view_definition)
         if parent_container is not unset:
             self._set_container(parent_container)
@@ -717,6 +726,41 @@ class Dataset(
     def set_last_modified(self, last_modified: datetime) -> None:
         self._ensure_dataset_props().lastModified = make_time_stamp(last_modified)
+    @property
+    def view_definition(self) -> Optional[models.ViewPropertiesClass]:
+        """Get the view definition of the dataset.
+        Under typical usage, this will be present if the subtype is "View".
+        Returns:
+            The view definition if set, None otherwise.
+        """
+        return self._get_aspect(models.ViewPropertiesClass)
+    def set_view_definition(self, view_definition: ViewDefinitionInputType) -> None:
+        """Set the view definition of the dataset.
+        If you're setting a view definition, subtype should typically be set to "view".
+        If a string is provided, it will be treated as a SQL view definition. To set
+        a custom language or other properties, provide a ViewPropertiesClass object.
+        Args:
+            view_definition: The view definition to set.
+        """
+        if isinstance(view_definition, models.ViewPropertiesClass):
+            self._set_aspect(view_definition)
+        elif isinstance(view_definition, str):
+            self._set_aspect(
+                models.ViewPropertiesClass(
+                    materialized=False,
+                    viewLogic=view_definition,
+                    viewLanguage="SQL",
+                )
+            )
+        else:
+            assert_never(view_definition)
     def _schema_dict(self) -> Dict[str, models.SchemaFieldClass]:
         schema_metadata = self._get_aspect(models.SchemaMetadataClass)
         if schema_metadata is None:

acryl-datahub 1.2.0.2rc2__py3-none-any.whl → 1.2.0.3rc1__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.2.0.2rc2py3-none-any.whl → 1.2.0.3rc1py3-none-any.whl