PyPI - acryl-datahub - Versions diffs - 1.2.0.3rc2__py3-none-any.whl → 1.2.0.4rc1__py3-none-any.whl - Mend

acryl-datahub 1.2.0.3rc2py3-none-any.whl → 1.2.0.4rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (32) hide show

datahub/api/entities/external/restricted_text.py CHANGED Viewed

@@ -11,41 +11,12 @@ Features:
 from __future__ import annotations
-from typing import Any, ClassVar, Optional, Set, Union
+from typing import ClassVar, Optional, Set
-# Check Pydantic version and import accordingly
-try:
-    from pydantic import VERSION
+from datahub.configuration.common import ConfigModel
-    PYDANTIC_V2 = int(VERSION.split(".")[0]) >= 2
-except (ImportError, AttributeError):
-    # Fallback for older versions that don't have VERSION
-    PYDANTIC_V2 = False
-if PYDANTIC_V2:
-    from pydantic import GetCoreSchemaHandler  # type: ignore[attr-defined]
-    from pydantic_core import core_schema
-else:
-    from pydantic.validators import str_validator
-class RestrictedTextConfig:
-    """Configuration class for RestrictedText."""
-    def __init__(
-        self,
-        max_length: Optional[int] = None,
-        forbidden_chars: Optional[Set[str]] = None,
-        replacement_char: Optional[str] = None,
-        truncation_suffix: Optional[str] = None,
-    ):
-        self.max_length = max_length
-        self.forbidden_chars = forbidden_chars
-        self.replacement_char = replacement_char
-        self.truncation_suffix = truncation_suffix
-class RestrictedText(str):
+class RestrictedText(ConfigModel):
     """A string type that stores the original value but returns a truncated and sanitized version.
     This type allows you to:
@@ -60,8 +31,9 @@ class RestrictedText(str):
         # Basic usage with default settings
         name: RestrictedText
-        # Custom max length and character replacement using Field
-        custom_field: RestrictedText = RestrictedText.with_config(
+        # Custom max length and character replacement
+        custom_field: RestrictedText = RestrictedText(
+            text="hello-world.test",
             max_length=10,
             forbidden_chars={' ', '-', '.'},
             replacement_char='_'
@@ -73,175 +45,128 @@ class RestrictedText(str):
         custom_field="hello-world.test"
     )
-    print(model.name)  # Truncated and sanitized version
-    print(model.name.original)  # Original value
-    print(model.custom_field)  # "hello_worl..."
+    # model.name returns truncated and sanitized version
+    # model.name.raw_text returns original value
+    # model.custom_field returns "hello_worl..."
     ```
     """
     # Default configuration
-    _default_max_length: ClassVar[Optional[int]] = 50
-    _default_forbidden_chars: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
-    _default_replacement_char: ClassVar[str] = "_"
-    _default_truncation_suffix: ClassVar[str] = "..."
-    def __new__(cls, value: str = "") -> "RestrictedText":
-        """Create a new string instance."""
-        instance = str.__new__(cls, "")  # We'll set the display value later
-        return instance
-    def __init__(self, value: str = ""):
-        """Initialize the RestrictedText with a value."""
-        self.original: str = value
-        self.max_length = self._default_max_length
-        self.forbidden_chars = self._default_forbidden_chars
-        self.replacement_char = self._default_replacement_char
-        self.truncation_suffix = self._default_truncation_suffix
-        # Process the value
-        self._processed_value = self._process_value(value)
-    def _configure(
+    DEFAULT_MAX_LENGTH: ClassVar[Optional[int]] = 50
+    DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
+    DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
+    DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
+    raw_text: str
+    max_length: Optional[int] = None
+    forbidden_chars: Optional[Set[str]] = None
+    replacement_char: Optional[str] = None
+    truncation_suffix: Optional[str] = None
+    _processed_value: Optional[str] = None
+    def __init__(self, **data):
+        super().__init__(**data)
+        self.validate_text()
+    @classmethod
+    def __get_validators__(cls):
+        yield cls.pydantic_accept_raw_text
+        yield cls.validate
+        yield cls.pydantic_validate_text
+    @classmethod
+    def pydantic_accept_raw_text(cls, v):
+        if isinstance(v, (RestrictedText, dict)):
+            return v
+        assert isinstance(v, str), "text must be a string"
+        return {"text": v}
+    @classmethod
+    def pydantic_validate_text(cls, v):
+        assert isinstance(v, RestrictedText)
+        assert v.validate_text()
+        return v
+    @classmethod
+    def validate(cls, v):
+        """Validate and create a RestrictedText instance."""
+        if isinstance(v, RestrictedText):
+            return v
+        # This should be a dict at this point from pydantic_accept_raw_text
+        if isinstance(v, dict):
+            instance = cls(**v)
+            instance.validate_text()
+            return instance
+        raise ValueError(f"Unable to validate RestrictedText from {type(v)}")
+    def validate_text(self) -> bool:
+        """Validate the text and apply restrictions."""
+        # Set defaults if not provided
+        max_length = (
+            self.max_length if self.max_length is not None else self.DEFAULT_MAX_LENGTH
+        )
+        forbidden_chars = (
+            self.forbidden_chars
+            if self.forbidden_chars is not None
+            else self.DEFAULT_FORBIDDEN_CHARS
+        )
+        replacement_char = (
+            self.replacement_char
+            if self.replacement_char is not None
+            else self.DEFAULT_REPLACEMENT_CHAR
+        )
+        truncation_suffix = (
+            self.truncation_suffix
+            if self.truncation_suffix is not None
+            else self.DEFAULT_TRUNCATION_SUFFIX
+        )
+        # Store processed value
+        self._processed_value = self._process_value(
+            self.raw_text,
+            max_length,
+            forbidden_chars,
+            replacement_char,
+            truncation_suffix,
+        )
+        return True
+    def _process_value(
         self,
-        max_length: Optional[int] = None,
-        forbidden_chars: Optional[Set[str]] = None,
-        replacement_char: Optional[str] = None,
-        truncation_suffix: Optional[str] = None,
-    ) -> "RestrictedText":
-        """Configure this instance with custom settings."""
-        if max_length is not None:
-            self.max_length = max_length
-        if forbidden_chars is not None:
-            self.forbidden_chars = forbidden_chars
-        if replacement_char is not None:
-            self.replacement_char = replacement_char
-        if truncation_suffix is not None:
-            self.truncation_suffix = truncation_suffix
-        # Reprocess the value with new configuration
-        self._processed_value = self._process_value(self.original)
-        return self
-    def _process_value(self, value: str) -> str:
+        value: str,
+        max_length: Optional[int],
+        forbidden_chars: Set[str],
+        replacement_char: str,
+        truncation_suffix: str,
+    ) -> str:
         """Process the value by replacing characters and truncating."""
         # Replace specified characters
         processed = value
-        for char in self.forbidden_chars:
-            processed = processed.replace(char, self.replacement_char)
+        for char in forbidden_chars:
+            processed = processed.replace(char, replacement_char)
         # Truncate if necessary
-        if self.max_length is not None and len(processed) > self.max_length:
-            if len(self.truncation_suffix) >= self.max_length:
+        if max_length is not None and len(processed) > max_length:
+            if len(truncation_suffix) >= max_length:
                 # If suffix is too long, just truncate without suffix
-                processed = processed[: self.max_length]
+                processed = processed[:max_length]
             else:
                 # Truncate and add suffix
-                truncate_length = self.max_length - len(self.truncation_suffix)
-                processed = processed[:truncate_length] + self.truncation_suffix
+                truncate_length = max_length - len(truncation_suffix)
+                processed = processed[:truncate_length] + truncation_suffix
         return processed
     def __str__(self) -> str:
         """Return the processed (truncated and sanitized) value."""
-        return self._processed_value
+        return self._processed_value or ""
     def __repr__(self) -> str:
-        return f"{self.__class__.__name__}({self._processed_value!r})"
+        return f"{self.__class__.__name__}({self.raw_text!r})"
     @property
     def processed(self) -> str:
         """Get the processed (truncated and sanitized) value."""
-        return self._processed_value
-    @classmethod
-    def with_config(
-        cls,
-        max_length: Optional[int] = None,
-        forbidden_chars: Optional[Set[str]] = None,
-        replacement_char: Optional[str] = None,
-        truncation_suffix: Optional[str] = None,
-    ) -> RestrictedTextConfig:
-        """Create a configuration object for use as field default.
-        Args:
-            max_length: Maximum length of the processed string
-            forbidden_chars: Set of characters to replace
-            replacement_char: Character to use as replacement
-            truncation_suffix: Suffix to add when truncating
-        Returns:
-            A configuration object that can be used as field default
-        """
-        return RestrictedTextConfig(
-            max_length=max_length,
-            forbidden_chars=forbidden_chars,
-            replacement_char=replacement_char,
-            truncation_suffix=truncation_suffix,
-        )
-    # Pydantic v2 methods
-    if PYDANTIC_V2:
-        @classmethod
-        def _validate(
-            cls,
-            __input_value: Union[str, "RestrictedText"],
-            _: core_schema.ValidationInfo,
-        ) -> "RestrictedText":
-            """Validate and create a RestrictedText instance."""
-            if isinstance(__input_value, RestrictedText):
-                return __input_value
-            return cls(__input_value)
-        @classmethod
-        def __get_pydantic_core_schema__(
-            cls, source: type[Any], handler: GetCoreSchemaHandler
-        ) -> core_schema.CoreSchema:
-            """Get the Pydantic core schema for this type."""
-            return core_schema.with_info_after_validator_function(
-                cls._validate,
-                core_schema.str_schema(),
-                field_name=cls.__name__,
-            )
-    # Pydantic v1 methods
-    else:
-        @classmethod
-        def __get_validators__(cls):
-            """Pydantic v1 validator method."""
-            yield cls.validate
-        @classmethod
-        def validate(cls, v, field=None):
-            """Validate and create a RestrictedText instance for Pydantic v1."""
-            if isinstance(v, RestrictedText):
-                return v
-            if not isinstance(v, str):
-                # Let pydantic handle the string validation
-                v = str_validator(v)
-            # Create instance
-            instance = cls(v)
-            # Check if there's a field default that contains configuration
-            if (
-                field
-                and hasattr(field, "default")
-                and isinstance(field.default, RestrictedTextConfig)
-            ):
-                config = field.default
-                instance._configure(
-                    max_length=config.max_length,
-                    forbidden_chars=config.forbidden_chars,
-                    replacement_char=config.replacement_char,
-                    truncation_suffix=config.truncation_suffix,
-                )
-            return instance
-        @classmethod
-        def __modify_schema__(cls, field_schema):
-            """Modify the JSON schema for Pydantic v1."""
-            field_schema.update(type="string", examples=["example string"])
+        return self._processed_value or ""

datahub/api/entities/external/unity_catalog_external_entites.py CHANGED Viewed

@@ -10,8 +10,10 @@
 # Tag search using the workspace search UI is supported only for tables, views, and table columns.
 # Tag search requires exact term matching.
 # https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
-from typing import Any, Dict, Optional, Set, Union
+from typing import Any, Dict, Optional, Set
+# Import validator for Pydantic v1 (always needed since we removed conditional logic)
+from pydantic import validator
 from typing_extensions import ClassVar
 from datahub.api.entities.external.external_tag import ExternalTag
@@ -21,9 +23,9 @@ from datahub.api.entities.external.restricted_text import RestrictedText
 class UnityCatalogTagKeyText(RestrictedText):
     """RestrictedText configured for Unity Catalog tag keys."""
-    _default_max_length: ClassVar[int] = 255
-    # Unity Catalog tag keys: alphanumeric, hyphens, underscores, periods only
-    _default_forbidden_chars: ClassVar[Set[str]] = {
+    DEFAULT_MAX_LENGTH: ClassVar[int] = 255
+    # Unity Catalog tag keys: forbidden characters based on constraints
+    DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {
         "\t",
         "\n",
         "\r",
@@ -34,18 +36,18 @@ class UnityCatalogTagKeyText(RestrictedText):
         "/",
         ":",
     }
-    _default_replacement_char: ClassVar[str] = "_"
-    _default_truncation_suffix: ClassVar[str] = ""  # No suffix for clean identifiers
+    DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
+    DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = ""  # No suffix for clean identifiers
 class UnityCatalogTagValueText(RestrictedText):
     """RestrictedText configured for Unity Catalog tag values."""
-    _default_max_length: ClassVar[int] = 1000
+    DEFAULT_MAX_LENGTH: ClassVar[int] = 1000
     # Unity Catalog tag values are more permissive but still have some restrictions
-    _default_forbidden_chars: ClassVar[Set[str]] = {"\t", "\n", "\r"}
-    _default_replacement_char: ClassVar[str] = " "
-    _default_truncation_suffix: ClassVar[str] = "..."
+    DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {"\t", "\n", "\r"}
+    DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
+    DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
 class UnityCatalogTag(ExternalTag):
@@ -60,46 +62,43 @@ class UnityCatalogTag(ExternalTag):
     key: UnityCatalogTagKeyText
     value: Optional[UnityCatalogTagValueText] = None
-    def __init__(
-        self,
-        key: Optional[Union[str, UnityCatalogTagKeyText]] = None,
-        value: Optional[Union[str, UnityCatalogTagValueText]] = None,
-        **data: Any,
-    ) -> None:
-        """
-        Initialize UnityCatalogTag from either a DataHub Tag URN or explicit key/value.
+    # Pydantic v1 validators
+    @validator("key", pre=True)
+    @classmethod
+    def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText:
+        """Validate and convert key field for Pydantic v1."""
+        if isinstance(v, UnityCatalogTagKeyText):
+            return v
-        Args:
-            key: Explicit key value (optional for Pydantic initialization)
-            value: Explicit value (optional)
-            **data: Additional Pydantic data
-        """
-        if key is not None:
-            # Direct initialization with key/value
-            processed_key = (
-                UnityCatalogTagKeyText(key)
-                if not isinstance(key, UnityCatalogTagKeyText)
-                else key
-            )
-            processed_value = None
-            if value is not None:
-                processed_value = (
-                    UnityCatalogTagValueText(value)
-                    if not isinstance(value, UnityCatalogTagValueText)
-                    else value
-                )
-            # If value is an empty string, set it to None to not generater empty value in DataHub tag which results in key: tags
-            if not str(value):
-                processed_value = None
-            super().__init__(
-                key=processed_key,
-                value=processed_value,
-                **data,
-            )
-        else:
-            # Standard pydantic initialization
-            super().__init__(**data)
+        # If we get a RestrictedText object from parent class validation, use its raw_text value
+        if hasattr(v, "raw_text"):
+            return UnityCatalogTagKeyText(raw_text=v.raw_text)
+        return UnityCatalogTagKeyText(raw_text=v)
+    @validator("value", pre=True)
+    @classmethod
+    def _validate_value(cls, v: Any) -> Optional[UnityCatalogTagValueText]:
+        """Validate and convert value field for Pydantic v1."""
+        if v is None:
+            return None
+        if isinstance(v, UnityCatalogTagValueText):
+            return v
+        # If we get a RestrictedText object from parent class validation, use its raw_text value
+        if hasattr(v, "raw_text"):
+            text_value = v.raw_text
+            # If value is an empty string, set it to None to not generate empty value in DataHub tag
+            if not str(text_value):
+                return None
+            return UnityCatalogTagValueText(raw_text=text_value)
+        # If value is an empty string, set it to None to not generate empty value in DataHub tag
+        if not str(v):
+            return None
+        return UnityCatalogTagValueText(raw_text=v)
     def __eq__(self, other: object) -> bool:
         """Check equality based on key and value."""
@@ -124,7 +123,7 @@ class UnityCatalogTag(ExternalTag):
         Returns:
             UnityCatalogTag instance
         """
-        return cls(key=tag_dict["key"], value=tag_dict.get("value"))
+        return cls(**tag_dict)
     @classmethod
     def from_key_value(cls, key: str, value: Optional[str] = None) -> "UnityCatalogTag":
@@ -149,9 +148,9 @@ class UnityCatalogTag(ExternalTag):
         Returns:
             Dictionary with 'key' and optionally 'value'
         """
-        result: Dict[str, str] = {"key": self.key.original}
+        result: Dict[str, str] = {"key": self.key.raw_text}
         if self.value is not None:
-            result["value"] = self.value.original
+            result["value"] = self.value.raw_text
         return result
     def to_display_dict(self) -> Dict[str, str]:

datahub/ingestion/api/source.py CHANGED Viewed

@@ -81,11 +81,24 @@ class StructuredLogLevel(Enum):
     ERROR = logging.ERROR
+class StructuredLogCategory(Enum):
+    """
+    This is used to categorise the errors mainly based on the biggest impact area
+    This is to be used to help in self-serve understand the impact of any log entry
+    More enums to be added as logs are updated to be self-serve
+    """
+    LINEAGE = "LINEAGE"
+    USAGE = "USAGE"
+    PROFILING = "PROFILING"
 @dataclass
 class StructuredLogEntry(Report):
     title: Optional[str]
     message: str
     context: LossyList[str]
+    log_category: Optional[StructuredLogCategory] = None
 @dataclass
@@ -108,9 +121,10 @@ class StructuredLogs(Report):
         exc: Optional[BaseException] = None,
         log: bool = False,
         stacklevel: int = 1,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
         """
-        Report a user-facing warning for the ingestion run.
+        Report a user-facing log for the ingestion run.
         Args:
             level: The level of the log entry.
@@ -118,6 +132,9 @@ class StructuredLogs(Report):
             title: The category / heading to present on for this message in the UI.
             context: Additional context (e.g. where, how) for the log entry.
             exc: The exception associated with the event. We'll show the stack trace when in debug mode.
+            log_category: The type of the log entry. This is used to categorise the log entry.
+            log: Whether to log the entry to the console.
+            stacklevel: The stack level to use for the log entry.
         """
         # One for this method, and one for the containing report_* call.
@@ -160,6 +177,7 @@ class StructuredLogs(Report):
                 title=title,
                 message=message,
                 context=context_list,
+                log_category=log_category,
             )
         else:
             if context is not None:
@@ -219,9 +237,19 @@ class SourceReport(ExamplesReport):
         context: Optional[str] = None,
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.WARN, message, title, context, exc, log=False
+            StructuredLogLevel.WARN,
+            message,
+            title,
+            context,
+            exc,
+            log=False,
+            log_category=log_category,
         )
     def warning(
@@ -231,9 +259,19 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
         log: bool = True,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.WARN, message, title, context, exc, log=log
+            StructuredLogLevel.WARN,
+            message,
+            title,
+            context,
+            exc,
+            log=log,
+            log_category=log_category,
         )
     def report_failure(
@@ -243,9 +281,19 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
         log: bool = True,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.ERROR, message, title, context, exc, log=log
+            StructuredLogLevel.ERROR,
+            message,
+            title,
+            context,
+            exc,
+            log=log,
+            log_category=log_category,
         )
     def failure(
@@ -255,9 +303,19 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
         log: bool = True,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.ERROR, message, title, context, exc, log=log
+            StructuredLogLevel.ERROR,
+            message,
+            title,
+            context,
+            exc,
+            log=log,
+            log_category=log_category,
         )
     def info(
@@ -267,9 +325,19 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         exc: Optional[BaseException] = None,
         log: bool = True,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> None:
+        """
+        See docs of StructuredLogs.report_log for details of args
+        """
         self._structured_logs.report_log(
-            StructuredLogLevel.INFO, message, title, context, exc, log=log
+            StructuredLogLevel.INFO,
+            message,
+            title,
+            context,
+            exc,
+            log=log,
+            log_category=log_category,
         )
     @contextlib.contextmanager
@@ -279,6 +347,7 @@ class SourceReport(ExamplesReport):
         title: Optional[LiteralString] = None,
         context: Optional[str] = None,
         level: StructuredLogLevel = StructuredLogLevel.ERROR,
+        log_category: Optional[StructuredLogCategory] = None,
     ) -> Iterator[None]:
         # Convenience method that helps avoid boilerplate try/except blocks.
         # TODO: I'm not super happy with the naming here - it's not obvious that this
@@ -287,7 +356,12 @@ class SourceReport(ExamplesReport):
             yield
         except Exception as exc:
             self._structured_logs.report_log(
-                level, message=message, title=title, context=context, exc=exc
+                level,
+                message=message,
+                title=title,
+                context=context,
+                exc=exc,
+                log_category=log_category,
             )
     def __post_init__(self) -> None:

acryl-datahub 1.2.0.3rc2__py3-none-any.whl → 1.2.0.4rc1__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.2.0.3rc2py3-none-any.whl → 1.2.0.4rc1py3-none-any.whl