PyPI - acryl-datahub - Versions diffs - 1.2.0.3rc1__py3-none-any.whl → 1.2.0.4rc1__py3-none-any.whl - Mend

acryl-datahub 1.2.0.3rc1py3-none-any.whl → 1.2.0.4rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (38) hide show

datahub/ingestion/source/looker/looker_liquid_tag.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from functools import lru_cache
-from typing import ClassVar, Optional, TextIO
+from typing import ClassVar, Optional, TextIO, Type
 from liquid import Environment
 from liquid.ast import Node
@@ -20,16 +20,27 @@ class CustomTagException(Exception):
 class ConditionNode(Node):
     def __init__(self, tok: Token, sql_or_lookml_reference: str, filter_name: str):
         self.tok = tok
         self.sql_or_lookml_reference = sql_or_lookml_reference
         self.filter_name = filter_name
     def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
         # This implementation will make sure that sql parse work correctly if looker condition tag
         # is used in lookml sql field
         buffer.write(f"{self.sql_or_lookml_reference}='dummy_value'")
+        return True
+class IncrementConditionNode(Node):
+    def __init__(self, tok: Token, sql_or_lookml_reference: str):
+        self.tok = tok
+        self.sql_or_lookml_reference = sql_or_lookml_reference
+    def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
+        # For incrementcondition, we need to generate a condition that would be used
+        # in incremental PDT updates. This typically involves date/time comparisons.
+        # We'll render it as a date comparison with a placeholder value
+        # See details in Looker documentation for incrementcondition tag -> cloud.google.com/looker/docs/reference/param-view-increment-key
+        buffer.write(f"{self.sql_or_lookml_reference} > '2023-01-01'")
         return True
@@ -44,7 +55,6 @@ class ConditionTag(Tag):
     This class render the below tag as order.region='ap-south-1' if order_region is provided in config.liquid_variables
     as order_region: 'ap-south-1'
         {% condition order_region %} order.region {% endcondition %}
     """
     TAG_START: ClassVar[str] = "condition"
@@ -79,7 +89,48 @@ class ConditionTag(Tag):
         )
-custom_tags = [ConditionTag]
+class IncrementConditionTag(Tag):
+    """
+    IncrementConditionTag is the equivalent implementation of looker's custom liquid tag "incrementcondition".
+    Refer doc: https://cloud.google.com/looker/docs/incremental-pdts#using_the_incrementcondition_tag
+    This tag is used for incremental PDTs to determine which records should be updated.
+    It typically works with date/time fields to filter data that has changed since the last update.
+    Example usage in Looker:
+        {% incrementcondition created_at %} order.created_at {% endincrementcondition %}
+    This would generate SQL like: order.created_at > '2023-01-01 00:00:00'
+    """
+    TAG_START: ClassVar[str] = "incrementcondition"
+    TAG_END: ClassVar[str] = "endincrementcondition"
+    name: str = "incrementcondition"
+    def __init__(self, env: Environment):
+        super().__init__(env)
+        self.parser = get_parser(self.env)
+    def parse(self, stream: TokenStream) -> Node:
+        expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_START)
+        start_token = stream.current
+        stream.next_token()
+        expect(stream, TOKEN_LITERAL)
+        sql_or_lookml_reference: str = stream.current.value.strip()
+        stream.next_token()
+        expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_END)
+        return IncrementConditionNode(
+            tok=start_token,
+            sql_or_lookml_reference=sql_or_lookml_reference,
+        )
+# Updated custom_tags list to include both tags
+custom_tags: list[Type[Tag]] = [ConditionTag, IncrementConditionTag]
 @string_filter

datahub/ingestion/source/mock_data/datahub_mock_data.py CHANGED Viewed

@@ -13,7 +13,7 @@ from datahub.ingestion.api.decorators import (
     platform_name,
     support_status,
 )
-from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.source import Source, SourceReport, StructuredLogCategory
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.common.subtypes import DatasetSubTypes
 from datahub.ingestion.source.mock_data.datahub_mock_data_report import (
@@ -35,6 +35,8 @@ from datahub.utilities.str_enum import StrEnum
 logger = logging.getLogger(__name__)
+PLATFORM_NAME = "fake"
 class SubTypePattern(StrEnum):
     ALTERNATING = "alternating"
@@ -137,6 +139,10 @@ class DataHubMockDataConfig(ConfigModel):
         default=0,
         description="Number of warnings to add in report for testing",
     )
+    num_info: int = Field(
+        default=0,
+        description="Number of info to add in report for testing",
+    )
     gen_1: LineageConfigGen1 = Field(
         default_factory=LineageConfigGen1,
@@ -144,7 +150,7 @@ class DataHubMockDataConfig(ConfigModel):
     )
-@platform_name("DataHubMockData")
+@platform_name(PLATFORM_NAME)
 @config_class(DataHubMockDataConfig)
 @support_status(SupportStatus.TESTING)
 class DataHubMockDataSource(Source):
@@ -159,6 +165,9 @@ class DataHubMockDataSource(Source):
         self.report = DataHubMockDataReport()
     def get_workunits(self) -> Iterable[MetadataWorkUnit]:
+        # We don't want any implicit aspects to be produced
+        # so we are not using get_workunits_internal
         if self.config.throw_uncaught_exceptions:
             raise Exception("This is a test exception")
@@ -176,10 +185,17 @@ class DataHubMockDataSource(Source):
                     message="This is test warning",
                     title="Test Warning",
                     context=f"This is test warning {i}",
+                    log_category=StructuredLogCategory.LINEAGE,
+                )
+        if self.config.num_info > 0:
+            for i in range(self.config.num_info):
+                self.report.info(
+                    message="This is test info",
+                    title="Test Info",
+                    context=f"This is test info {i}",
                 )
-        # We don't want any implicit aspects to be produced
-        # so we are not using get_workunits_internal
         if self.config.gen_1.enabled:
             for wu in self._data_gen_1():
                 if self.report.first_urn_seen is None:
@@ -309,7 +325,7 @@ class DataHubMockDataSource(Source):
             table_level, table_index, subtype_pattern, subtype_types, level_subtypes
         )
-        urn = make_dataset_urn(platform="fake", name=table_name)
+        urn = make_dataset_urn(platform=PLATFORM_NAME, name=table_name)
         mcp = MetadataChangeProposalWrapper(
             entityUrn=urn,
             entityType="dataset",
@@ -433,7 +449,7 @@ class DataHubMockDataSource(Source):
     def _get_status_aspect(self, table: str) -> MetadataWorkUnit:
         urn = make_dataset_urn(
-            platform="fake",
+            platform=PLATFORM_NAME,
             name=table,
         )
         mcp = MetadataChangeProposalWrapper(
@@ -448,7 +464,7 @@ class DataHubMockDataSource(Source):
     ) -> MetadataWorkUnit:
         mcp = MetadataChangeProposalWrapper(
             entityUrn=make_dataset_urn(
-                platform="fake",
+                platform=PLATFORM_NAME,
                 name=downstream_table,
             ),
             entityType="dataset",
@@ -456,7 +472,7 @@ class DataHubMockDataSource(Source):
                 upstreams=[
                     UpstreamClass(
                         dataset=make_dataset_urn(
-                            platform="fake",
+                            platform=PLATFORM_NAME,
                             name=upstream_table,
                         ),
                         type=DatasetLineageTypeClass.TRANSFORMED,
@@ -468,7 +484,7 @@ class DataHubMockDataSource(Source):
     def _get_profile_aspect(self, table: str) -> MetadataWorkUnit:
         urn = make_dataset_urn(
-            platform="fake",
+            platform=PLATFORM_NAME,
             name=table,
         )
         mcp = MetadataChangeProposalWrapper(
@@ -485,7 +501,7 @@ class DataHubMockDataSource(Source):
     def _get_usage_aspect(self, table: str) -> MetadataWorkUnit:
         urn = make_dataset_urn(
-            platform="fake",
+            platform=PLATFORM_NAME,
             name=table,
         )
         mcp = MetadataChangeProposalWrapper(

datahub/ingestion/source/powerbi/powerbi.py CHANGED Viewed

@@ -1226,7 +1226,10 @@ class Mapper:
 @platform_name("PowerBI")
 @config_class(PowerBiDashboardSourceConfig)
 @support_status(SupportStatus.CERTIFIED)
-@capability(SourceCapability.CONTAINERS, "Enabled by default")
+@capability(
+    SourceCapability.CONTAINERS,
+    "Enabled by default",
+)
 @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
 @capability(SourceCapability.OWNERSHIP, "Enabled by default")
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")

datahub/ingestion/source/redshift/redshift.py CHANGED Viewed

@@ -132,6 +132,7 @@ logger: logging.Logger = logging.getLogger(__name__)
     "Enabled by default",
     subtype_modifier=[
         SourceCapabilityModifier.DATABASE,
+        SourceCapabilityModifier.SCHEMA,
     ],
 )
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")

datahub/ingestion/source/salesforce.py CHANGED Viewed

@@ -549,6 +549,14 @@ class SalesforceApi:
     capability_name=SourceCapability.TAGS,
     description="Enabled by default",
 )
+@capability(
+    capability_name=SourceCapability.LINEAGE_COARSE,
+    description="Extract table-level lineage for Salesforce objects",
+    subtype_modifier=[
+        SourceCapabilityModifier.SALESFORCE_CUSTOM_OBJECT,
+        SourceCapabilityModifier.SALESFORCE_STANDARD_OBJECT,
+    ],
+)
 class SalesforceSource(StatefulIngestionSourceBase):
     def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
         super().__init__(config, ctx)

datahub/ingestion/source/sql/athena_properties_extractor.py CHANGED Viewed

@@ -99,10 +99,10 @@ class AthenaPropertiesExtractor:
     """A class to extract properties from Athena CREATE TABLE statements."""
     CREATE_TABLE_REGEXP = re.compile(
-        "(CREATE TABLE[\s\n]*)(.*?)(\s*\()", re.MULTILINE | re.IGNORECASE
+        r"(CREATE TABLE[\s\n]*)(.*?)(\s*\()", re.MULTILINE | re.IGNORECASE
     )
     PARTITIONED_BY_REGEXP = re.compile(
-        "(PARTITIONED BY[\s\n]*\()((?:[^()]|\([^)]*\))*?)(\))",
+        r"(PARTITIONED BY[\s\n]*\()((?:[^()]|\([^)]*\))*?)(\))",
         re.MULTILINE | re.IGNORECASE,
     )

datahub/ingestion/source/sql/hive_metastore.py CHANGED Viewed

@@ -27,6 +27,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.common.subtypes import (
     DatasetContainerSubTypes,
     DatasetSubTypes,
+    SourceCapabilityModifier,
 )
 from datahub.ingestion.source.sql.sql_common import (
     SQLAlchemySource,
@@ -168,6 +169,13 @@ class HiveMetastore(BasicSQLAlchemyConfig):
 @capability(
     SourceCapability.LINEAGE_COARSE, "View lineage is not supported", supported=False
 )
+@capability(
+    SourceCapability.CONTAINERS,
+    "Enabled by default",
+    subtype_modifier=[
+        SourceCapabilityModifier.CATALOG,
+    ],
+)
 class HiveMetastoreSource(SQLAlchemySource):
     """
     This plugin extracts the following:

datahub/ingestion/source/sql/teradata.py CHANGED Viewed

@@ -42,6 +42,7 @@ from datahub.ingestion.api.decorators import (
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.graph.client import DataHubGraph
+from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
 from datahub.ingestion.source.sql.sql_common import register_custom_type
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
 from datahub.ingestion.source.sql.sql_report import SQLSourceReport
@@ -539,7 +540,13 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
 @config_class(TeradataConfig)
 @support_status(SupportStatus.TESTING)
 @capability(SourceCapability.DOMAINS, "Enabled by default")
-@capability(SourceCapability.CONTAINERS, "Enabled by default")
+@capability(
+    SourceCapability.CONTAINERS,
+    "Enabled by default",
+    subtype_modifier=[
+        SourceCapabilityModifier.DATABASE,
+    ],
+)
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
 @capability(
     SourceCapability.DELETION_DETECTION,

datahub/ingestion/source/sql/trino.py CHANGED Viewed

@@ -36,6 +36,7 @@ from datahub.ingestion.api.decorators import (
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.extractor import schema_util
 from datahub.ingestion.source.common.data_reader import DataReader
+from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
 from datahub.ingestion.source.sql.sql_common import (
     SQLAlchemySource,
     SqlWorkUnit,
@@ -249,6 +250,14 @@ class TrinoConfig(BasicSQLAlchemyConfig):
 @support_status(SupportStatus.CERTIFIED)
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
+@capability(
+    SourceCapability.LINEAGE_COARSE,
+    "Extract table-level lineage",
+    subtype_modifier=[
+        SourceCapabilityModifier.TABLE,
+        SourceCapabilityModifier.VIEW,
+    ],
+)
 class TrinoSource(SQLAlchemySource):
     """

datahub/ingestion/source/unity/tag_entities.py CHANGED Viewed

@@ -77,13 +77,13 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
         )
         if existing_platform_resource:
             logger.info(
-                f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.original}: {existing_platform_resource}"
+                f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.raw_text}: {existing_platform_resource}"
             )
             return existing_platform_resource
         return UnityCatalogTagPlatformResourceId(
-            tag_key=tag.key.original,
-            tag_value=tag.value.original if tag.value is not None else None,
+            tag_key=tag.key.raw_text,
+            tag_value=tag.value.raw_text if tag.value is not None else None,
             platform_instance=platform_instance,
             exists_in_unity_catalog=exists_in_unity_catalog,
             persisted=False,

datahub/sdk/entity_client.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from __future__ import annotations
 import warnings
-from typing import TYPE_CHECKING, Union, overload
+from typing import TYPE_CHECKING, Optional, Union, overload
 import datahub.metadata.schema_classes as models
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
+from datahub.emitter.rest_emitter import EmitMode
 from datahub.errors import IngestionAttributionWarning, ItemNotFoundError, SdkUsageError
 from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.urns import (
@@ -133,7 +134,7 @@ class EntityClient:
         return entity
-    def create(self, entity: Entity) -> None:
+    def create(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
         mcps = []
         if self._graph.exists(str(entity.urn)):
@@ -152,9 +153,12 @@ class EntityClient:
         )
         mcps.extend(entity.as_mcps(models.ChangeTypeClass.CREATE))
-        self._graph.emit_mcps(mcps)
+        if emit_mode:
+            self._graph.emit_mcps(mcps, emit_mode=emit_mode)
+        else:
+            self._graph.emit_mcps(mcps)
-    def upsert(self, entity: Entity) -> None:
+    def upsert(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
         if entity._prev_aspects is None and self._graph.exists(str(entity.urn)):
             warnings.warn(
                 f"The entity {entity.urn} already exists. This operation will partially overwrite the existing entity.",
@@ -164,9 +168,17 @@ class EntityClient:
             # TODO: If there are no previous aspects but the entity exists, should we delete aspects that are not present here?
         mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
-        self._graph.emit_mcps(mcps)
+        if emit_mode:
+            self._graph.emit_mcps(mcps, emit_mode=emit_mode)
+        else:
+            self._graph.emit_mcps(mcps)
-    def update(self, entity: Union[Entity, MetadataPatchProposal]) -> None:
+    def update(
+        self,
+        entity: Union[Entity, MetadataPatchProposal],
+        *,
+        emit_mode: Optional[EmitMode] = None,
+    ) -> None:
         if isinstance(entity, MetadataPatchProposal):
             return self._update_patch(entity)
@@ -179,7 +191,10 @@ class EntityClient:
         # -> probably add a "mode" parameter that can be "update" (e.g. if not modified) or "update_force"
         mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
-        self._graph.emit_mcps(mcps)
+        if emit_mode:
+            self._graph.emit_mcps(mcps, emit_mode=emit_mode)
+        else:
+            self._graph.emit_mcps(mcps)
     def _update_patch(
         self, updater: MetadataPatchProposal, check_exists: bool = True

datahub/utilities/mapping.py CHANGED Viewed

@@ -83,7 +83,7 @@ class Constants:
     MATCH = "match"
     USER_OWNER = "user"
     GROUP_OWNER = "group"
-    OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float]
+    OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float, list]
     TAG_PARTITION_KEY = "PARTITION_KEY"
     TAG_DIST_KEY = "DIST_KEY"
     TAG_SORT_KEY = "SORT_KEY"
@@ -455,7 +455,34 @@ class OperationProcessor:
         # function to check if a match clause is satisfied to a value.
         if not any(
             isinstance(raw_props_value, t) for t in Constants.OPERAND_DATATYPE_SUPPORTED
-        ) or not isinstance(raw_props_value, type(match_clause)):
+        ):
+            return None
+        # Handle list values by checking if any item in the list matches
+        if isinstance(raw_props_value, list):
+            # For lists, we need to find at least one matching item
+            # Return a match with the concatenated values of all matching items
+            matching_items = []
+            for item in raw_props_value:
+                if isinstance(item, str):
+                    match = re.match(match_clause, item)
+                    if match:
+                        matching_items.append(item)
+                elif isinstance(match_clause, type(item)):
+                    match = re.match(str(match_clause), str(item))
+                    if match:
+                        matching_items.append(str(item))
+            if matching_items:
+                # Create a synthetic match object with all matching items joined
+                combined_value = ",".join(matching_items)
+                return re.match(
+                    ".*", combined_value
+                )  # Always matches, returns combined value
+            return None
+        # Handle scalar values (existing logic)
+        elif not isinstance(raw_props_value, type(match_clause)):
             return None
         elif isinstance(raw_props_value, str):
             return re.match(match_clause, raw_props_value)

{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 1.2.0.3rc1__py3-none-any.whl → 1.2.0.4rc1__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.2.0.3rc1py3-none-any.whl → 1.2.0.4rc1py3-none-any.whl