acryl-datahub 1.2.0.3rc2__py3-none-any.whl → 1.2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/METADATA +2665 -2664
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/RECORD +68 -67
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +3 -3
- datahub/api/entities/external/external_tag.py +6 -4
- datahub/api/entities/external/lake_formation_external_entites.py +50 -49
- datahub/api/entities/external/restricted_text.py +105 -180
- datahub/api/entities/external/unity_catalog_external_entites.py +51 -52
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/quickstart_versioning.py +1 -1
- datahub/cli/specific/assertions_cli.py +37 -2
- datahub/cli/specific/datacontract_cli.py +54 -4
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +1 -1
- datahub/ingestion/api/report.py +21 -2
- datahub/ingestion/api/source.py +81 -7
- datahub/ingestion/autogenerated/capability_summary.json +47 -19
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +9 -0
- datahub/ingestion/source/aws/glue.py +18 -2
- datahub/ingestion/source/aws/tag_entities.py +4 -4
- datahub/ingestion/source/data_lake_common/path_spec.py +6 -3
- datahub/ingestion/source/datahub/datahub_source.py +8 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +6 -3
- datahub/ingestion/source/delta_lake/source.py +8 -1
- datahub/ingestion/source/dremio/dremio_source.py +19 -2
- datahub/ingestion/source/fivetran/fivetran.py +9 -3
- datahub/ingestion/source/fivetran/fivetran_log_api.py +4 -3
- datahub/ingestion/source/ge_data_profiler.py +8 -0
- datahub/ingestion/source/grafana/models.py +6 -0
- datahub/ingestion/source/hex/hex.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +4 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/mock_data/datahub_mock_data.py +26 -10
- datahub/ingestion/source/powerbi/powerbi.py +4 -1
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/redshift.py +1 -0
- datahub/ingestion/source/salesforce.py +8 -0
- datahub/ingestion/source/slack/slack.py +7 -14
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +4 -4
- datahub/ingestion/source/sql/hive_metastore.py +8 -0
- datahub/ingestion/source/sql/teradata.py +8 -1
- datahub/ingestion/source/sql/trino.py +9 -0
- datahub/ingestion/source/tableau/tableau.py +1 -1
- datahub/ingestion/source/unity/config.py +36 -1
- datahub/ingestion/source/unity/proxy.py +332 -46
- datahub/ingestion/source/unity/proxy_types.py +12 -2
- datahub/ingestion/source/unity/source.py +91 -34
- datahub/ingestion/source/unity/tag_entities.py +5 -5
- datahub/ingestion/source/usage/starburst_trino_usage.py +2 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/metadata/_internal_schema_classes.py +513 -513
- datahub/metadata/_urns/urn_defs.py +1684 -1684
- datahub/metadata/schema.avsc +16745 -16348
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/sdk/entity_client.py +22 -7
- datahub/sdk/search_client.py +3 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataset.py +37 -59
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/server_config_util.py +2 -1
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4.dist-info}/top_level.txt +0 -0
|
@@ -10,8 +10,9 @@
|
|
|
10
10
|
# Tag search using the workspace search UI is supported only for tables, views, and table columns.
|
|
11
11
|
# Tag search requires exact term matching.
|
|
12
12
|
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
|
|
13
|
-
from typing import Any, Dict, Optional
|
|
13
|
+
from typing import Any, Dict, Optional
|
|
14
14
|
|
|
15
|
+
from pydantic import validator
|
|
15
16
|
from typing_extensions import ClassVar
|
|
16
17
|
|
|
17
18
|
from datahub.api.entities.external.external_tag import ExternalTag
|
|
@@ -19,21 +20,21 @@ from datahub.api.entities.external.restricted_text import RestrictedText
|
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class LakeFormationTagKeyText(RestrictedText):
|
|
22
|
-
"""RestrictedText configured for
|
|
23
|
+
"""RestrictedText configured for Lake Formation tag keys."""
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
#
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 50
|
|
26
|
+
# Lake Formation tag keys restrictions
|
|
27
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
|
|
28
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class LakeFormationTagValueText(RestrictedText):
|
|
31
|
-
"""RestrictedText configured for
|
|
32
|
+
"""RestrictedText configured for Lake Formation tag values."""
|
|
32
33
|
|
|
33
|
-
|
|
34
|
-
#
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 50
|
|
35
|
+
# Lake Formation tag values restrictions
|
|
36
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
|
|
37
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
class LakeFormationTag(ExternalTag):
|
|
@@ -49,43 +50,43 @@ class LakeFormationTag(ExternalTag):
|
|
|
49
50
|
value: Optional[LakeFormationTagValueText] = None
|
|
50
51
|
catalog: Optional[str] = None
|
|
51
52
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
Initialize LakeFormation Tag from either a DataHub Tag URN or explicit key/value.
|
|
53
|
+
# Pydantic v1 validators
|
|
54
|
+
@validator("key", pre=True)
|
|
55
|
+
@classmethod
|
|
56
|
+
def _validate_key(cls, v: Any) -> LakeFormationTagKeyText:
|
|
57
|
+
"""Validate and convert key field for Pydantic v1."""
|
|
58
|
+
if isinstance(v, LakeFormationTagKeyText):
|
|
59
|
+
return v
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
61
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
62
|
+
if hasattr(v, "raw_text"):
|
|
63
|
+
return LakeFormationTagKeyText(raw_text=v.raw_text)
|
|
64
|
+
|
|
65
|
+
return LakeFormationTagKeyText(raw_text=v)
|
|
66
|
+
|
|
67
|
+
@validator("value", pre=True)
|
|
68
|
+
@classmethod
|
|
69
|
+
def _validate_value(cls, v: Any) -> Optional[LakeFormationTagValueText]:
|
|
70
|
+
"""Validate and convert value field for Pydantic v1."""
|
|
71
|
+
if v is None:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
if isinstance(v, LakeFormationTagValueText):
|
|
75
|
+
return v
|
|
76
|
+
|
|
77
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
78
|
+
if hasattr(v, "raw_text"):
|
|
79
|
+
text_value = v.raw_text
|
|
80
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
81
|
+
if not str(text_value):
|
|
82
|
+
return None
|
|
83
|
+
return LakeFormationTagValueText(raw_text=text_value)
|
|
84
|
+
|
|
85
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
86
|
+
if not str(v):
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
return LakeFormationTagValueText(raw_text=v)
|
|
89
90
|
|
|
90
91
|
def __eq__(self, other: object) -> bool:
|
|
91
92
|
"""Check equality based on key and value."""
|
|
@@ -137,9 +138,9 @@ class LakeFormationTag(ExternalTag):
|
|
|
137
138
|
Returns:
|
|
138
139
|
Dictionary with 'key' and optionally 'value'
|
|
139
140
|
"""
|
|
140
|
-
result: Dict[str, str] = {"key": self.key.
|
|
141
|
+
result: Dict[str, str] = {"key": self.key.raw_text}
|
|
141
142
|
if self.value is not None:
|
|
142
|
-
result["value"] = self.value.
|
|
143
|
+
result["value"] = self.value.raw_text
|
|
143
144
|
return result
|
|
144
145
|
|
|
145
146
|
def to_display_dict(self) -> Dict[str, str]:
|
|
@@ -11,41 +11,12 @@ Features:
|
|
|
11
11
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
|
-
from typing import
|
|
14
|
+
from typing import ClassVar, Optional, Set
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
try:
|
|
18
|
-
from pydantic import VERSION
|
|
16
|
+
from datahub.configuration.common import ConfigModel
|
|
19
17
|
|
|
20
|
-
PYDANTIC_V2 = int(VERSION.split(".")[0]) >= 2
|
|
21
|
-
except (ImportError, AttributeError):
|
|
22
|
-
# Fallback for older versions that don't have VERSION
|
|
23
|
-
PYDANTIC_V2 = False
|
|
24
18
|
|
|
25
|
-
|
|
26
|
-
from pydantic import GetCoreSchemaHandler # type: ignore[attr-defined]
|
|
27
|
-
from pydantic_core import core_schema
|
|
28
|
-
else:
|
|
29
|
-
from pydantic.validators import str_validator
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class RestrictedTextConfig:
|
|
33
|
-
"""Configuration class for RestrictedText."""
|
|
34
|
-
|
|
35
|
-
def __init__(
|
|
36
|
-
self,
|
|
37
|
-
max_length: Optional[int] = None,
|
|
38
|
-
forbidden_chars: Optional[Set[str]] = None,
|
|
39
|
-
replacement_char: Optional[str] = None,
|
|
40
|
-
truncation_suffix: Optional[str] = None,
|
|
41
|
-
):
|
|
42
|
-
self.max_length = max_length
|
|
43
|
-
self.forbidden_chars = forbidden_chars
|
|
44
|
-
self.replacement_char = replacement_char
|
|
45
|
-
self.truncation_suffix = truncation_suffix
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class RestrictedText(str):
|
|
19
|
+
class RestrictedText(ConfigModel):
|
|
49
20
|
"""A string type that stores the original value but returns a truncated and sanitized version.
|
|
50
21
|
|
|
51
22
|
This type allows you to:
|
|
@@ -60,8 +31,9 @@ class RestrictedText(str):
|
|
|
60
31
|
# Basic usage with default settings
|
|
61
32
|
name: RestrictedText
|
|
62
33
|
|
|
63
|
-
# Custom max length and character replacement
|
|
64
|
-
custom_field: RestrictedText = RestrictedText
|
|
34
|
+
# Custom max length and character replacement
|
|
35
|
+
custom_field: RestrictedText = RestrictedText(
|
|
36
|
+
text="hello-world.test",
|
|
65
37
|
max_length=10,
|
|
66
38
|
forbidden_chars={' ', '-', '.'},
|
|
67
39
|
replacement_char='_'
|
|
@@ -74,174 +46,127 @@ class RestrictedText(str):
|
|
|
74
46
|
)
|
|
75
47
|
|
|
76
48
|
print(model.name) # Truncated and sanitized version
|
|
77
|
-
print(model.name.
|
|
49
|
+
print(model.name.text) # Original value
|
|
78
50
|
print(model.custom_field) # "hello_worl..."
|
|
79
51
|
```
|
|
80
52
|
"""
|
|
81
53
|
|
|
82
54
|
# Default configuration
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
self.
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
55
|
+
DEFAULT_MAX_LENGTH: ClassVar[Optional[int]] = 50
|
|
56
|
+
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
|
|
57
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
|
|
58
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
|
|
59
|
+
|
|
60
|
+
raw_text: str
|
|
61
|
+
max_length: Optional[int] = None
|
|
62
|
+
forbidden_chars: Optional[Set[str]] = None
|
|
63
|
+
replacement_char: Optional[str] = None
|
|
64
|
+
truncation_suffix: Optional[str] = None
|
|
65
|
+
_processed_value: Optional[str] = None
|
|
66
|
+
|
|
67
|
+
def __init__(self, **data):
|
|
68
|
+
super().__init__(**data)
|
|
69
|
+
self.validate_text()
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def __get_validators__(cls):
|
|
73
|
+
yield cls.pydantic_accept_raw_text
|
|
74
|
+
yield cls.validate
|
|
75
|
+
yield cls.pydantic_validate_text
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def pydantic_accept_raw_text(cls, v):
|
|
79
|
+
if isinstance(v, (RestrictedText, dict)):
|
|
80
|
+
return v
|
|
81
|
+
assert isinstance(v, str), "text must be a string"
|
|
82
|
+
return {"text": v}
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def pydantic_validate_text(cls, v):
|
|
86
|
+
assert isinstance(v, RestrictedText)
|
|
87
|
+
assert v.validate_text()
|
|
88
|
+
return v
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def validate(cls, v):
|
|
92
|
+
"""Validate and create a RestrictedText instance."""
|
|
93
|
+
if isinstance(v, RestrictedText):
|
|
94
|
+
return v
|
|
95
|
+
|
|
96
|
+
# This should be a dict at this point from pydantic_accept_raw_text
|
|
97
|
+
if isinstance(v, dict):
|
|
98
|
+
instance = cls(**v)
|
|
99
|
+
instance.validate_text()
|
|
100
|
+
return instance
|
|
101
|
+
|
|
102
|
+
raise ValueError(f"Unable to validate RestrictedText from {type(v)}")
|
|
103
|
+
|
|
104
|
+
def validate_text(self) -> bool:
|
|
105
|
+
"""Validate the text and apply restrictions."""
|
|
106
|
+
# Set defaults if not provided
|
|
107
|
+
max_length = (
|
|
108
|
+
self.max_length if self.max_length is not None else self.DEFAULT_MAX_LENGTH
|
|
109
|
+
)
|
|
110
|
+
forbidden_chars = (
|
|
111
|
+
self.forbidden_chars
|
|
112
|
+
if self.forbidden_chars is not None
|
|
113
|
+
else self.DEFAULT_FORBIDDEN_CHARS
|
|
114
|
+
)
|
|
115
|
+
replacement_char = (
|
|
116
|
+
self.replacement_char
|
|
117
|
+
if self.replacement_char is not None
|
|
118
|
+
else self.DEFAULT_REPLACEMENT_CHAR
|
|
119
|
+
)
|
|
120
|
+
truncation_suffix = (
|
|
121
|
+
self.truncation_suffix
|
|
122
|
+
if self.truncation_suffix is not None
|
|
123
|
+
else self.DEFAULT_TRUNCATION_SUFFIX
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Store processed value
|
|
127
|
+
self._processed_value = self._process_value(
|
|
128
|
+
self.raw_text,
|
|
129
|
+
max_length,
|
|
130
|
+
forbidden_chars,
|
|
131
|
+
replacement_char,
|
|
132
|
+
truncation_suffix,
|
|
133
|
+
)
|
|
134
|
+
return True
|
|
135
|
+
|
|
136
|
+
def _process_value(
|
|
105
137
|
self,
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
if max_length is not None:
|
|
113
|
-
self.max_length = max_length
|
|
114
|
-
if forbidden_chars is not None:
|
|
115
|
-
self.forbidden_chars = forbidden_chars
|
|
116
|
-
if replacement_char is not None:
|
|
117
|
-
self.replacement_char = replacement_char
|
|
118
|
-
if truncation_suffix is not None:
|
|
119
|
-
self.truncation_suffix = truncation_suffix
|
|
120
|
-
|
|
121
|
-
# Reprocess the value with new configuration
|
|
122
|
-
self._processed_value = self._process_value(self.original)
|
|
123
|
-
return self
|
|
124
|
-
|
|
125
|
-
def _process_value(self, value: str) -> str:
|
|
138
|
+
value: str,
|
|
139
|
+
max_length: Optional[int],
|
|
140
|
+
forbidden_chars: Set[str],
|
|
141
|
+
replacement_char: str,
|
|
142
|
+
truncation_suffix: str,
|
|
143
|
+
) -> str:
|
|
126
144
|
"""Process the value by replacing characters and truncating."""
|
|
127
145
|
# Replace specified characters
|
|
128
146
|
processed = value
|
|
129
|
-
for char in
|
|
130
|
-
processed = processed.replace(char,
|
|
147
|
+
for char in forbidden_chars:
|
|
148
|
+
processed = processed.replace(char, replacement_char)
|
|
131
149
|
|
|
132
150
|
# Truncate if necessary
|
|
133
|
-
if
|
|
134
|
-
if len(
|
|
151
|
+
if max_length is not None and len(processed) > max_length:
|
|
152
|
+
if len(truncation_suffix) >= max_length:
|
|
135
153
|
# If suffix is too long, just truncate without suffix
|
|
136
|
-
processed = processed[:
|
|
154
|
+
processed = processed[:max_length]
|
|
137
155
|
else:
|
|
138
156
|
# Truncate and add suffix
|
|
139
|
-
truncate_length =
|
|
140
|
-
processed = processed[:truncate_length] +
|
|
157
|
+
truncate_length = max_length - len(truncation_suffix)
|
|
158
|
+
processed = processed[:truncate_length] + truncation_suffix
|
|
141
159
|
|
|
142
160
|
return processed
|
|
143
161
|
|
|
144
162
|
def __str__(self) -> str:
|
|
145
163
|
"""Return the processed (truncated and sanitized) value."""
|
|
146
|
-
return self._processed_value
|
|
164
|
+
return self._processed_value or ""
|
|
147
165
|
|
|
148
166
|
def __repr__(self) -> str:
|
|
149
|
-
return f"{self.__class__.__name__}({self.
|
|
167
|
+
return f"{self.__class__.__name__}({self.raw_text!r})"
|
|
150
168
|
|
|
151
169
|
@property
|
|
152
170
|
def processed(self) -> str:
|
|
153
171
|
"""Get the processed (truncated and sanitized) value."""
|
|
154
|
-
return self._processed_value
|
|
155
|
-
|
|
156
|
-
@classmethod
|
|
157
|
-
def with_config(
|
|
158
|
-
cls,
|
|
159
|
-
max_length: Optional[int] = None,
|
|
160
|
-
forbidden_chars: Optional[Set[str]] = None,
|
|
161
|
-
replacement_char: Optional[str] = None,
|
|
162
|
-
truncation_suffix: Optional[str] = None,
|
|
163
|
-
) -> RestrictedTextConfig:
|
|
164
|
-
"""Create a configuration object for use as field default.
|
|
165
|
-
|
|
166
|
-
Args:
|
|
167
|
-
max_length: Maximum length of the processed string
|
|
168
|
-
forbidden_chars: Set of characters to replace
|
|
169
|
-
replacement_char: Character to use as replacement
|
|
170
|
-
truncation_suffix: Suffix to add when truncating
|
|
171
|
-
|
|
172
|
-
Returns:
|
|
173
|
-
A configuration object that can be used as field default
|
|
174
|
-
"""
|
|
175
|
-
return RestrictedTextConfig(
|
|
176
|
-
max_length=max_length,
|
|
177
|
-
forbidden_chars=forbidden_chars,
|
|
178
|
-
replacement_char=replacement_char,
|
|
179
|
-
truncation_suffix=truncation_suffix,
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
# Pydantic v2 methods
|
|
183
|
-
if PYDANTIC_V2:
|
|
184
|
-
|
|
185
|
-
@classmethod
|
|
186
|
-
def _validate(
|
|
187
|
-
cls,
|
|
188
|
-
__input_value: Union[str, "RestrictedText"],
|
|
189
|
-
_: core_schema.ValidationInfo,
|
|
190
|
-
) -> "RestrictedText":
|
|
191
|
-
"""Validate and create a RestrictedText instance."""
|
|
192
|
-
if isinstance(__input_value, RestrictedText):
|
|
193
|
-
return __input_value
|
|
194
|
-
return cls(__input_value)
|
|
195
|
-
|
|
196
|
-
@classmethod
|
|
197
|
-
def __get_pydantic_core_schema__(
|
|
198
|
-
cls, source: type[Any], handler: GetCoreSchemaHandler
|
|
199
|
-
) -> core_schema.CoreSchema:
|
|
200
|
-
"""Get the Pydantic core schema for this type."""
|
|
201
|
-
return core_schema.with_info_after_validator_function(
|
|
202
|
-
cls._validate,
|
|
203
|
-
core_schema.str_schema(),
|
|
204
|
-
field_name=cls.__name__,
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
# Pydantic v1 methods
|
|
208
|
-
else:
|
|
209
|
-
|
|
210
|
-
@classmethod
|
|
211
|
-
def __get_validators__(cls):
|
|
212
|
-
"""Pydantic v1 validator method."""
|
|
213
|
-
yield cls.validate
|
|
214
|
-
|
|
215
|
-
@classmethod
|
|
216
|
-
def validate(cls, v, field=None):
|
|
217
|
-
"""Validate and create a RestrictedText instance for Pydantic v1."""
|
|
218
|
-
if isinstance(v, RestrictedText):
|
|
219
|
-
return v
|
|
220
|
-
|
|
221
|
-
if not isinstance(v, str):
|
|
222
|
-
# Let pydantic handle the string validation
|
|
223
|
-
v = str_validator(v)
|
|
224
|
-
|
|
225
|
-
# Create instance
|
|
226
|
-
instance = cls(v)
|
|
227
|
-
|
|
228
|
-
# Check if there's a field default that contains configuration
|
|
229
|
-
if (
|
|
230
|
-
field
|
|
231
|
-
and hasattr(field, "default")
|
|
232
|
-
and isinstance(field.default, RestrictedTextConfig)
|
|
233
|
-
):
|
|
234
|
-
config = field.default
|
|
235
|
-
instance._configure(
|
|
236
|
-
max_length=config.max_length,
|
|
237
|
-
forbidden_chars=config.forbidden_chars,
|
|
238
|
-
replacement_char=config.replacement_char,
|
|
239
|
-
truncation_suffix=config.truncation_suffix,
|
|
240
|
-
)
|
|
241
|
-
|
|
242
|
-
return instance
|
|
243
|
-
|
|
244
|
-
@classmethod
|
|
245
|
-
def __modify_schema__(cls, field_schema):
|
|
246
|
-
"""Modify the JSON schema for Pydantic v1."""
|
|
247
|
-
field_schema.update(type="string", examples=["example string"])
|
|
172
|
+
return self._processed_value or ""
|
|
@@ -10,8 +10,10 @@
|
|
|
10
10
|
# Tag search using the workspace search UI is supported only for tables, views, and table columns.
|
|
11
11
|
# Tag search requires exact term matching.
|
|
12
12
|
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
|
|
13
|
-
from typing import Any, Dict, Optional, Set
|
|
13
|
+
from typing import Any, Dict, Optional, Set
|
|
14
14
|
|
|
15
|
+
# Import validator for Pydantic v1 (always needed since we removed conditional logic)
|
|
16
|
+
from pydantic import validator
|
|
15
17
|
from typing_extensions import ClassVar
|
|
16
18
|
|
|
17
19
|
from datahub.api.entities.external.external_tag import ExternalTag
|
|
@@ -21,9 +23,9 @@ from datahub.api.entities.external.restricted_text import RestrictedText
|
|
|
21
23
|
class UnityCatalogTagKeyText(RestrictedText):
|
|
22
24
|
"""RestrictedText configured for Unity Catalog tag keys."""
|
|
23
25
|
|
|
24
|
-
|
|
25
|
-
# Unity Catalog tag keys:
|
|
26
|
-
|
|
26
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 255
|
|
27
|
+
# Unity Catalog tag keys: forbidden characters based on constraints
|
|
28
|
+
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {
|
|
27
29
|
"\t",
|
|
28
30
|
"\n",
|
|
29
31
|
"\r",
|
|
@@ -34,18 +36,18 @@ class UnityCatalogTagKeyText(RestrictedText):
|
|
|
34
36
|
"/",
|
|
35
37
|
":",
|
|
36
38
|
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
|
|
40
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers
|
|
39
41
|
|
|
40
42
|
|
|
41
43
|
class UnityCatalogTagValueText(RestrictedText):
|
|
42
44
|
"""RestrictedText configured for Unity Catalog tag values."""
|
|
43
45
|
|
|
44
|
-
|
|
46
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 1000
|
|
45
47
|
# Unity Catalog tag values are more permissive but still have some restrictions
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
48
|
+
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {"\t", "\n", "\r"}
|
|
49
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
|
|
50
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
|
|
49
51
|
|
|
50
52
|
|
|
51
53
|
class UnityCatalogTag(ExternalTag):
|
|
@@ -60,46 +62,43 @@ class UnityCatalogTag(ExternalTag):
|
|
|
60
62
|
key: UnityCatalogTagKeyText
|
|
61
63
|
value: Optional[UnityCatalogTagValueText] = None
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
Initialize UnityCatalogTag from either a DataHub Tag URN or explicit key/value.
|
|
65
|
+
# Pydantic v1 validators
|
|
66
|
+
@validator("key", pre=True)
|
|
67
|
+
@classmethod
|
|
68
|
+
def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText:
|
|
69
|
+
"""Validate and convert key field for Pydantic v1."""
|
|
70
|
+
if isinstance(v, UnityCatalogTagKeyText):
|
|
71
|
+
return v
|
|
71
72
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
# If value is an empty string, set it to None to not
|
|
92
|
-
if not str(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
# Standard pydantic initialization
|
|
102
|
-
super().__init__(**data)
|
|
73
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
74
|
+
if hasattr(v, "raw_text"):
|
|
75
|
+
return UnityCatalogTagKeyText(raw_text=v.raw_text)
|
|
76
|
+
|
|
77
|
+
return UnityCatalogTagKeyText(raw_text=v)
|
|
78
|
+
|
|
79
|
+
@validator("value", pre=True)
|
|
80
|
+
@classmethod
|
|
81
|
+
def _validate_value(cls, v: Any) -> Optional[UnityCatalogTagValueText]:
|
|
82
|
+
"""Validate and convert value field for Pydantic v1."""
|
|
83
|
+
if v is None:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
if isinstance(v, UnityCatalogTagValueText):
|
|
87
|
+
return v
|
|
88
|
+
|
|
89
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
90
|
+
if hasattr(v, "raw_text"):
|
|
91
|
+
text_value = v.raw_text
|
|
92
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
93
|
+
if not str(text_value):
|
|
94
|
+
return None
|
|
95
|
+
return UnityCatalogTagValueText(raw_text=text_value)
|
|
96
|
+
|
|
97
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
98
|
+
if not str(v):
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
return UnityCatalogTagValueText(raw_text=v)
|
|
103
102
|
|
|
104
103
|
def __eq__(self, other: object) -> bool:
|
|
105
104
|
"""Check equality based on key and value."""
|
|
@@ -124,7 +123,7 @@ class UnityCatalogTag(ExternalTag):
|
|
|
124
123
|
Returns:
|
|
125
124
|
UnityCatalogTag instance
|
|
126
125
|
"""
|
|
127
|
-
return cls(
|
|
126
|
+
return cls(**tag_dict)
|
|
128
127
|
|
|
129
128
|
@classmethod
|
|
130
129
|
def from_key_value(cls, key: str, value: Optional[str] = None) -> "UnityCatalogTag":
|
|
@@ -149,9 +148,9 @@ class UnityCatalogTag(ExternalTag):
|
|
|
149
148
|
Returns:
|
|
150
149
|
Dictionary with 'key' and optionally 'value'
|
|
151
150
|
"""
|
|
152
|
-
result: Dict[str, str] = {"key": self.key.
|
|
151
|
+
result: Dict[str, str] = {"key": self.key.raw_text}
|
|
153
152
|
if self.value is not None:
|
|
154
|
-
result["value"] = self.value.
|
|
153
|
+
result["value"] = self.value.raw_text
|
|
155
154
|
return result
|
|
156
155
|
|
|
157
156
|
def to_display_dict(self) -> Dict[str, str]:
|
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from typing import List, Optional, Union
|
|
6
6
|
|
|
7
7
|
import yaml
|
|
8
|
-
from pydantic import validator
|
|
8
|
+
from pydantic import Field, validator
|
|
9
9
|
from ruamel.yaml import YAML
|
|
10
10
|
from typing_extensions import Literal
|
|
11
11
|
|
|
@@ -67,7 +67,7 @@ class Prompt(ConfigModel):
|
|
|
67
67
|
description: Optional[str] = None
|
|
68
68
|
type: str
|
|
69
69
|
structured_property_id: Optional[str] = None
|
|
70
|
-
structured_property_urn: Optional[str] = None
|
|
70
|
+
structured_property_urn: Optional[str] = Field(default=None, validate_default=True)
|
|
71
71
|
required: Optional[bool] = None
|
|
72
72
|
|
|
73
73
|
@validator("structured_property_urn", pre=True, always=True)
|
|
@@ -111,7 +111,7 @@ class Actors(ConfigModel):
|
|
|
111
111
|
|
|
112
112
|
class Forms(ConfigModel):
|
|
113
113
|
id: Optional[str] = None
|
|
114
|
-
urn: Optional[str] = None
|
|
114
|
+
urn: Optional[str] = Field(default=None, validate_default=True)
|
|
115
115
|
name: str
|
|
116
116
|
description: Optional[str] = None
|
|
117
117
|
prompts: List[Prompt] = []
|
|
@@ -4,7 +4,7 @@ from pathlib import Path
|
|
|
4
4
|
from typing import Iterable, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import yaml
|
|
7
|
-
from pydantic import StrictStr, validator
|
|
7
|
+
from pydantic import Field, StrictStr, validator
|
|
8
8
|
from ruamel.yaml import YAML
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import ConfigModel
|
|
@@ -68,7 +68,7 @@ class TypeQualifierAllowedTypes(ConfigModel):
|
|
|
68
68
|
|
|
69
69
|
class StructuredProperties(ConfigModel):
|
|
70
70
|
id: Optional[str] = None
|
|
71
|
-
urn: Optional[str] = None
|
|
71
|
+
urn: Optional[str] = Field(None, validate_default=True)
|
|
72
72
|
qualified_name: Optional[str] = None
|
|
73
73
|
type: str
|
|
74
74
|
value_entity_types: Optional[List[str]] = None
|