acryl-datahub 1.2.0.3rc2__py3-none-any.whl → 1.2.0.4rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/METADATA +2522 -2522
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/RECORD +32 -32
- datahub/_version.py +1 -1
- datahub/api/entities/external/external_tag.py +6 -4
- datahub/api/entities/external/lake_formation_external_entites.py +50 -49
- datahub/api/entities/external/restricted_text.py +107 -182
- datahub/api/entities/external/unity_catalog_external_entites.py +51 -52
- datahub/ingestion/api/source.py +81 -7
- datahub/ingestion/autogenerated/capability_summary.json +47 -19
- datahub/ingestion/source/abs/source.py +9 -0
- datahub/ingestion/source/aws/glue.py +18 -2
- datahub/ingestion/source/aws/tag_entities.py +2 -2
- datahub/ingestion/source/datahub/datahub_source.py +8 -1
- datahub/ingestion/source/delta_lake/source.py +8 -1
- datahub/ingestion/source/dremio/dremio_source.py +19 -2
- datahub/ingestion/source/fivetran/fivetran.py +9 -3
- datahub/ingestion/source/ge_data_profiler.py +8 -0
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/mock_data/datahub_mock_data.py +26 -10
- datahub/ingestion/source/powerbi/powerbi.py +4 -1
- datahub/ingestion/source/redshift/redshift.py +1 -0
- datahub/ingestion/source/salesforce.py +8 -0
- datahub/ingestion/source/sql/hive_metastore.py +8 -0
- datahub/ingestion/source/sql/teradata.py +8 -1
- datahub/ingestion/source/sql/trino.py +9 -0
- datahub/ingestion/source/unity/tag_entities.py +3 -3
- datahub/sdk/entity_client.py +22 -7
- datahub/utilities/mapping.py +29 -2
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/top_level.txt +0 -0
|
@@ -11,41 +11,12 @@ Features:
|
|
|
11
11
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
|
-
from typing import
|
|
14
|
+
from typing import ClassVar, Optional, Set
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
try:
|
|
18
|
-
from pydantic import VERSION
|
|
16
|
+
from datahub.configuration.common import ConfigModel
|
|
19
17
|
|
|
20
|
-
PYDANTIC_V2 = int(VERSION.split(".")[0]) >= 2
|
|
21
|
-
except (ImportError, AttributeError):
|
|
22
|
-
# Fallback for older versions that don't have VERSION
|
|
23
|
-
PYDANTIC_V2 = False
|
|
24
18
|
|
|
25
|
-
|
|
26
|
-
from pydantic import GetCoreSchemaHandler # type: ignore[attr-defined]
|
|
27
|
-
from pydantic_core import core_schema
|
|
28
|
-
else:
|
|
29
|
-
from pydantic.validators import str_validator
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class RestrictedTextConfig:
|
|
33
|
-
"""Configuration class for RestrictedText."""
|
|
34
|
-
|
|
35
|
-
def __init__(
|
|
36
|
-
self,
|
|
37
|
-
max_length: Optional[int] = None,
|
|
38
|
-
forbidden_chars: Optional[Set[str]] = None,
|
|
39
|
-
replacement_char: Optional[str] = None,
|
|
40
|
-
truncation_suffix: Optional[str] = None,
|
|
41
|
-
):
|
|
42
|
-
self.max_length = max_length
|
|
43
|
-
self.forbidden_chars = forbidden_chars
|
|
44
|
-
self.replacement_char = replacement_char
|
|
45
|
-
self.truncation_suffix = truncation_suffix
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class RestrictedText(str):
|
|
19
|
+
class RestrictedText(ConfigModel):
|
|
49
20
|
"""A string type that stores the original value but returns a truncated and sanitized version.
|
|
50
21
|
|
|
51
22
|
This type allows you to:
|
|
@@ -60,8 +31,9 @@ class RestrictedText(str):
|
|
|
60
31
|
# Basic usage with default settings
|
|
61
32
|
name: RestrictedText
|
|
62
33
|
|
|
63
|
-
# Custom max length and character replacement
|
|
64
|
-
custom_field: RestrictedText = RestrictedText
|
|
34
|
+
# Custom max length and character replacement
|
|
35
|
+
custom_field: RestrictedText = RestrictedText(
|
|
36
|
+
text="hello-world.test",
|
|
65
37
|
max_length=10,
|
|
66
38
|
forbidden_chars={' ', '-', '.'},
|
|
67
39
|
replacement_char='_'
|
|
@@ -73,175 +45,128 @@ class RestrictedText(str):
|
|
|
73
45
|
custom_field="hello-world.test"
|
|
74
46
|
)
|
|
75
47
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
48
|
+
# model.name returns truncated and sanitized version
|
|
49
|
+
# model.name.raw_text returns original value
|
|
50
|
+
# model.custom_field returns "hello_worl..."
|
|
79
51
|
```
|
|
80
52
|
"""
|
|
81
53
|
|
|
82
54
|
# Default configuration
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
self.
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
55
|
+
DEFAULT_MAX_LENGTH: ClassVar[Optional[int]] = 50
|
|
56
|
+
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
|
|
57
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
|
|
58
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
|
|
59
|
+
|
|
60
|
+
raw_text: str
|
|
61
|
+
max_length: Optional[int] = None
|
|
62
|
+
forbidden_chars: Optional[Set[str]] = None
|
|
63
|
+
replacement_char: Optional[str] = None
|
|
64
|
+
truncation_suffix: Optional[str] = None
|
|
65
|
+
_processed_value: Optional[str] = None
|
|
66
|
+
|
|
67
|
+
def __init__(self, **data):
|
|
68
|
+
super().__init__(**data)
|
|
69
|
+
self.validate_text()
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def __get_validators__(cls):
|
|
73
|
+
yield cls.pydantic_accept_raw_text
|
|
74
|
+
yield cls.validate
|
|
75
|
+
yield cls.pydantic_validate_text
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def pydantic_accept_raw_text(cls, v):
|
|
79
|
+
if isinstance(v, (RestrictedText, dict)):
|
|
80
|
+
return v
|
|
81
|
+
assert isinstance(v, str), "text must be a string"
|
|
82
|
+
return {"text": v}
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def pydantic_validate_text(cls, v):
|
|
86
|
+
assert isinstance(v, RestrictedText)
|
|
87
|
+
assert v.validate_text()
|
|
88
|
+
return v
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def validate(cls, v):
|
|
92
|
+
"""Validate and create a RestrictedText instance."""
|
|
93
|
+
if isinstance(v, RestrictedText):
|
|
94
|
+
return v
|
|
95
|
+
|
|
96
|
+
# This should be a dict at this point from pydantic_accept_raw_text
|
|
97
|
+
if isinstance(v, dict):
|
|
98
|
+
instance = cls(**v)
|
|
99
|
+
instance.validate_text()
|
|
100
|
+
return instance
|
|
101
|
+
|
|
102
|
+
raise ValueError(f"Unable to validate RestrictedText from {type(v)}")
|
|
103
|
+
|
|
104
|
+
def validate_text(self) -> bool:
|
|
105
|
+
"""Validate the text and apply restrictions."""
|
|
106
|
+
# Set defaults if not provided
|
|
107
|
+
max_length = (
|
|
108
|
+
self.max_length if self.max_length is not None else self.DEFAULT_MAX_LENGTH
|
|
109
|
+
)
|
|
110
|
+
forbidden_chars = (
|
|
111
|
+
self.forbidden_chars
|
|
112
|
+
if self.forbidden_chars is not None
|
|
113
|
+
else self.DEFAULT_FORBIDDEN_CHARS
|
|
114
|
+
)
|
|
115
|
+
replacement_char = (
|
|
116
|
+
self.replacement_char
|
|
117
|
+
if self.replacement_char is not None
|
|
118
|
+
else self.DEFAULT_REPLACEMENT_CHAR
|
|
119
|
+
)
|
|
120
|
+
truncation_suffix = (
|
|
121
|
+
self.truncation_suffix
|
|
122
|
+
if self.truncation_suffix is not None
|
|
123
|
+
else self.DEFAULT_TRUNCATION_SUFFIX
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Store processed value
|
|
127
|
+
self._processed_value = self._process_value(
|
|
128
|
+
self.raw_text,
|
|
129
|
+
max_length,
|
|
130
|
+
forbidden_chars,
|
|
131
|
+
replacement_char,
|
|
132
|
+
truncation_suffix,
|
|
133
|
+
)
|
|
134
|
+
return True
|
|
135
|
+
|
|
136
|
+
def _process_value(
|
|
105
137
|
self,
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
if max_length is not None:
|
|
113
|
-
self.max_length = max_length
|
|
114
|
-
if forbidden_chars is not None:
|
|
115
|
-
self.forbidden_chars = forbidden_chars
|
|
116
|
-
if replacement_char is not None:
|
|
117
|
-
self.replacement_char = replacement_char
|
|
118
|
-
if truncation_suffix is not None:
|
|
119
|
-
self.truncation_suffix = truncation_suffix
|
|
120
|
-
|
|
121
|
-
# Reprocess the value with new configuration
|
|
122
|
-
self._processed_value = self._process_value(self.original)
|
|
123
|
-
return self
|
|
124
|
-
|
|
125
|
-
def _process_value(self, value: str) -> str:
|
|
138
|
+
value: str,
|
|
139
|
+
max_length: Optional[int],
|
|
140
|
+
forbidden_chars: Set[str],
|
|
141
|
+
replacement_char: str,
|
|
142
|
+
truncation_suffix: str,
|
|
143
|
+
) -> str:
|
|
126
144
|
"""Process the value by replacing characters and truncating."""
|
|
127
145
|
# Replace specified characters
|
|
128
146
|
processed = value
|
|
129
|
-
for char in
|
|
130
|
-
processed = processed.replace(char,
|
|
147
|
+
for char in forbidden_chars:
|
|
148
|
+
processed = processed.replace(char, replacement_char)
|
|
131
149
|
|
|
132
150
|
# Truncate if necessary
|
|
133
|
-
if
|
|
134
|
-
if len(
|
|
151
|
+
if max_length is not None and len(processed) > max_length:
|
|
152
|
+
if len(truncation_suffix) >= max_length:
|
|
135
153
|
# If suffix is too long, just truncate without suffix
|
|
136
|
-
processed = processed[:
|
|
154
|
+
processed = processed[:max_length]
|
|
137
155
|
else:
|
|
138
156
|
# Truncate and add suffix
|
|
139
|
-
truncate_length =
|
|
140
|
-
processed = processed[:truncate_length] +
|
|
157
|
+
truncate_length = max_length - len(truncation_suffix)
|
|
158
|
+
processed = processed[:truncate_length] + truncation_suffix
|
|
141
159
|
|
|
142
160
|
return processed
|
|
143
161
|
|
|
144
162
|
def __str__(self) -> str:
|
|
145
163
|
"""Return the processed (truncated and sanitized) value."""
|
|
146
|
-
return self._processed_value
|
|
164
|
+
return self._processed_value or ""
|
|
147
165
|
|
|
148
166
|
def __repr__(self) -> str:
|
|
149
|
-
return f"{self.__class__.__name__}({self.
|
|
167
|
+
return f"{self.__class__.__name__}({self.raw_text!r})"
|
|
150
168
|
|
|
151
169
|
@property
|
|
152
170
|
def processed(self) -> str:
|
|
153
171
|
"""Get the processed (truncated and sanitized) value."""
|
|
154
|
-
return self._processed_value
|
|
155
|
-
|
|
156
|
-
@classmethod
|
|
157
|
-
def with_config(
|
|
158
|
-
cls,
|
|
159
|
-
max_length: Optional[int] = None,
|
|
160
|
-
forbidden_chars: Optional[Set[str]] = None,
|
|
161
|
-
replacement_char: Optional[str] = None,
|
|
162
|
-
truncation_suffix: Optional[str] = None,
|
|
163
|
-
) -> RestrictedTextConfig:
|
|
164
|
-
"""Create a configuration object for use as field default.
|
|
165
|
-
|
|
166
|
-
Args:
|
|
167
|
-
max_length: Maximum length of the processed string
|
|
168
|
-
forbidden_chars: Set of characters to replace
|
|
169
|
-
replacement_char: Character to use as replacement
|
|
170
|
-
truncation_suffix: Suffix to add when truncating
|
|
171
|
-
|
|
172
|
-
Returns:
|
|
173
|
-
A configuration object that can be used as field default
|
|
174
|
-
"""
|
|
175
|
-
return RestrictedTextConfig(
|
|
176
|
-
max_length=max_length,
|
|
177
|
-
forbidden_chars=forbidden_chars,
|
|
178
|
-
replacement_char=replacement_char,
|
|
179
|
-
truncation_suffix=truncation_suffix,
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
# Pydantic v2 methods
|
|
183
|
-
if PYDANTIC_V2:
|
|
184
|
-
|
|
185
|
-
@classmethod
|
|
186
|
-
def _validate(
|
|
187
|
-
cls,
|
|
188
|
-
__input_value: Union[str, "RestrictedText"],
|
|
189
|
-
_: core_schema.ValidationInfo,
|
|
190
|
-
) -> "RestrictedText":
|
|
191
|
-
"""Validate and create a RestrictedText instance."""
|
|
192
|
-
if isinstance(__input_value, RestrictedText):
|
|
193
|
-
return __input_value
|
|
194
|
-
return cls(__input_value)
|
|
195
|
-
|
|
196
|
-
@classmethod
|
|
197
|
-
def __get_pydantic_core_schema__(
|
|
198
|
-
cls, source: type[Any], handler: GetCoreSchemaHandler
|
|
199
|
-
) -> core_schema.CoreSchema:
|
|
200
|
-
"""Get the Pydantic core schema for this type."""
|
|
201
|
-
return core_schema.with_info_after_validator_function(
|
|
202
|
-
cls._validate,
|
|
203
|
-
core_schema.str_schema(),
|
|
204
|
-
field_name=cls.__name__,
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
# Pydantic v1 methods
|
|
208
|
-
else:
|
|
209
|
-
|
|
210
|
-
@classmethod
|
|
211
|
-
def __get_validators__(cls):
|
|
212
|
-
"""Pydantic v1 validator method."""
|
|
213
|
-
yield cls.validate
|
|
214
|
-
|
|
215
|
-
@classmethod
|
|
216
|
-
def validate(cls, v, field=None):
|
|
217
|
-
"""Validate and create a RestrictedText instance for Pydantic v1."""
|
|
218
|
-
if isinstance(v, RestrictedText):
|
|
219
|
-
return v
|
|
220
|
-
|
|
221
|
-
if not isinstance(v, str):
|
|
222
|
-
# Let pydantic handle the string validation
|
|
223
|
-
v = str_validator(v)
|
|
224
|
-
|
|
225
|
-
# Create instance
|
|
226
|
-
instance = cls(v)
|
|
227
|
-
|
|
228
|
-
# Check if there's a field default that contains configuration
|
|
229
|
-
if (
|
|
230
|
-
field
|
|
231
|
-
and hasattr(field, "default")
|
|
232
|
-
and isinstance(field.default, RestrictedTextConfig)
|
|
233
|
-
):
|
|
234
|
-
config = field.default
|
|
235
|
-
instance._configure(
|
|
236
|
-
max_length=config.max_length,
|
|
237
|
-
forbidden_chars=config.forbidden_chars,
|
|
238
|
-
replacement_char=config.replacement_char,
|
|
239
|
-
truncation_suffix=config.truncation_suffix,
|
|
240
|
-
)
|
|
241
|
-
|
|
242
|
-
return instance
|
|
243
|
-
|
|
244
|
-
@classmethod
|
|
245
|
-
def __modify_schema__(cls, field_schema):
|
|
246
|
-
"""Modify the JSON schema for Pydantic v1."""
|
|
247
|
-
field_schema.update(type="string", examples=["example string"])
|
|
172
|
+
return self._processed_value or ""
|
|
@@ -10,8 +10,10 @@
|
|
|
10
10
|
# Tag search using the workspace search UI is supported only for tables, views, and table columns.
|
|
11
11
|
# Tag search requires exact term matching.
|
|
12
12
|
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
|
|
13
|
-
from typing import Any, Dict, Optional, Set
|
|
13
|
+
from typing import Any, Dict, Optional, Set
|
|
14
14
|
|
|
15
|
+
# Import validator for Pydantic v1 (always needed since we removed conditional logic)
|
|
16
|
+
from pydantic import validator
|
|
15
17
|
from typing_extensions import ClassVar
|
|
16
18
|
|
|
17
19
|
from datahub.api.entities.external.external_tag import ExternalTag
|
|
@@ -21,9 +23,9 @@ from datahub.api.entities.external.restricted_text import RestrictedText
|
|
|
21
23
|
class UnityCatalogTagKeyText(RestrictedText):
|
|
22
24
|
"""RestrictedText configured for Unity Catalog tag keys."""
|
|
23
25
|
|
|
24
|
-
|
|
25
|
-
# Unity Catalog tag keys:
|
|
26
|
-
|
|
26
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 255
|
|
27
|
+
# Unity Catalog tag keys: forbidden characters based on constraints
|
|
28
|
+
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {
|
|
27
29
|
"\t",
|
|
28
30
|
"\n",
|
|
29
31
|
"\r",
|
|
@@ -34,18 +36,18 @@ class UnityCatalogTagKeyText(RestrictedText):
|
|
|
34
36
|
"/",
|
|
35
37
|
":",
|
|
36
38
|
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
|
|
40
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers
|
|
39
41
|
|
|
40
42
|
|
|
41
43
|
class UnityCatalogTagValueText(RestrictedText):
|
|
42
44
|
"""RestrictedText configured for Unity Catalog tag values."""
|
|
43
45
|
|
|
44
|
-
|
|
46
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 1000
|
|
45
47
|
# Unity Catalog tag values are more permissive but still have some restrictions
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
48
|
+
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {"\t", "\n", "\r"}
|
|
49
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
|
|
50
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
|
|
49
51
|
|
|
50
52
|
|
|
51
53
|
class UnityCatalogTag(ExternalTag):
|
|
@@ -60,46 +62,43 @@ class UnityCatalogTag(ExternalTag):
|
|
|
60
62
|
key: UnityCatalogTagKeyText
|
|
61
63
|
value: Optional[UnityCatalogTagValueText] = None
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
Initialize UnityCatalogTag from either a DataHub Tag URN or explicit key/value.
|
|
65
|
+
# Pydantic v1 validators
|
|
66
|
+
@validator("key", pre=True)
|
|
67
|
+
@classmethod
|
|
68
|
+
def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText:
|
|
69
|
+
"""Validate and convert key field for Pydantic v1."""
|
|
70
|
+
if isinstance(v, UnityCatalogTagKeyText):
|
|
71
|
+
return v
|
|
71
72
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
# If value is an empty string, set it to None to not
|
|
92
|
-
if not str(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
# Standard pydantic initialization
|
|
102
|
-
super().__init__(**data)
|
|
73
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
74
|
+
if hasattr(v, "raw_text"):
|
|
75
|
+
return UnityCatalogTagKeyText(raw_text=v.raw_text)
|
|
76
|
+
|
|
77
|
+
return UnityCatalogTagKeyText(raw_text=v)
|
|
78
|
+
|
|
79
|
+
@validator("value", pre=True)
|
|
80
|
+
@classmethod
|
|
81
|
+
def _validate_value(cls, v: Any) -> Optional[UnityCatalogTagValueText]:
|
|
82
|
+
"""Validate and convert value field for Pydantic v1."""
|
|
83
|
+
if v is None:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
if isinstance(v, UnityCatalogTagValueText):
|
|
87
|
+
return v
|
|
88
|
+
|
|
89
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
90
|
+
if hasattr(v, "raw_text"):
|
|
91
|
+
text_value = v.raw_text
|
|
92
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
93
|
+
if not str(text_value):
|
|
94
|
+
return None
|
|
95
|
+
return UnityCatalogTagValueText(raw_text=text_value)
|
|
96
|
+
|
|
97
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
98
|
+
if not str(v):
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
return UnityCatalogTagValueText(raw_text=v)
|
|
103
102
|
|
|
104
103
|
def __eq__(self, other: object) -> bool:
|
|
105
104
|
"""Check equality based on key and value."""
|
|
@@ -124,7 +123,7 @@ class UnityCatalogTag(ExternalTag):
|
|
|
124
123
|
Returns:
|
|
125
124
|
UnityCatalogTag instance
|
|
126
125
|
"""
|
|
127
|
-
return cls(
|
|
126
|
+
return cls(**tag_dict)
|
|
128
127
|
|
|
129
128
|
@classmethod
|
|
130
129
|
def from_key_value(cls, key: str, value: Optional[str] = None) -> "UnityCatalogTag":
|
|
@@ -149,9 +148,9 @@ class UnityCatalogTag(ExternalTag):
|
|
|
149
148
|
Returns:
|
|
150
149
|
Dictionary with 'key' and optionally 'value'
|
|
151
150
|
"""
|
|
152
|
-
result: Dict[str, str] = {"key": self.key.
|
|
151
|
+
result: Dict[str, str] = {"key": self.key.raw_text}
|
|
153
152
|
if self.value is not None:
|
|
154
|
-
result["value"] = self.value.
|
|
153
|
+
result["value"] = self.value.raw_text
|
|
155
154
|
return result
|
|
156
155
|
|
|
157
156
|
def to_display_dict(self) -> Dict[str, str]:
|
datahub/ingestion/api/source.py
CHANGED
|
@@ -81,11 +81,24 @@ class StructuredLogLevel(Enum):
|
|
|
81
81
|
ERROR = logging.ERROR
|
|
82
82
|
|
|
83
83
|
|
|
84
|
+
class StructuredLogCategory(Enum):
|
|
85
|
+
"""
|
|
86
|
+
This is used to categorise the errors mainly based on the biggest impact area
|
|
87
|
+
This is to be used to help in self-serve understand the impact of any log entry
|
|
88
|
+
More enums to be added as logs are updated to be self-serve
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
LINEAGE = "LINEAGE"
|
|
92
|
+
USAGE = "USAGE"
|
|
93
|
+
PROFILING = "PROFILING"
|
|
94
|
+
|
|
95
|
+
|
|
84
96
|
@dataclass
|
|
85
97
|
class StructuredLogEntry(Report):
|
|
86
98
|
title: Optional[str]
|
|
87
99
|
message: str
|
|
88
100
|
context: LossyList[str]
|
|
101
|
+
log_category: Optional[StructuredLogCategory] = None
|
|
89
102
|
|
|
90
103
|
|
|
91
104
|
@dataclass
|
|
@@ -108,9 +121,10 @@ class StructuredLogs(Report):
|
|
|
108
121
|
exc: Optional[BaseException] = None,
|
|
109
122
|
log: bool = False,
|
|
110
123
|
stacklevel: int = 1,
|
|
124
|
+
log_category: Optional[StructuredLogCategory] = None,
|
|
111
125
|
) -> None:
|
|
112
126
|
"""
|
|
113
|
-
Report a user-facing
|
|
127
|
+
Report a user-facing log for the ingestion run.
|
|
114
128
|
|
|
115
129
|
Args:
|
|
116
130
|
level: The level of the log entry.
|
|
@@ -118,6 +132,9 @@ class StructuredLogs(Report):
|
|
|
118
132
|
title: The category / heading to present on for this message in the UI.
|
|
119
133
|
context: Additional context (e.g. where, how) for the log entry.
|
|
120
134
|
exc: The exception associated with the event. We'll show the stack trace when in debug mode.
|
|
135
|
+
log_category: The type of the log entry. This is used to categorise the log entry.
|
|
136
|
+
log: Whether to log the entry to the console.
|
|
137
|
+
stacklevel: The stack level to use for the log entry.
|
|
121
138
|
"""
|
|
122
139
|
|
|
123
140
|
# One for this method, and one for the containing report_* call.
|
|
@@ -160,6 +177,7 @@ class StructuredLogs(Report):
|
|
|
160
177
|
title=title,
|
|
161
178
|
message=message,
|
|
162
179
|
context=context_list,
|
|
180
|
+
log_category=log_category,
|
|
163
181
|
)
|
|
164
182
|
else:
|
|
165
183
|
if context is not None:
|
|
@@ -219,9 +237,19 @@ class SourceReport(ExamplesReport):
|
|
|
219
237
|
context: Optional[str] = None,
|
|
220
238
|
title: Optional[LiteralString] = None,
|
|
221
239
|
exc: Optional[BaseException] = None,
|
|
240
|
+
log_category: Optional[StructuredLogCategory] = None,
|
|
222
241
|
) -> None:
|
|
242
|
+
"""
|
|
243
|
+
See docs of StructuredLogs.report_log for details of args
|
|
244
|
+
"""
|
|
223
245
|
self._structured_logs.report_log(
|
|
224
|
-
StructuredLogLevel.WARN,
|
|
246
|
+
StructuredLogLevel.WARN,
|
|
247
|
+
message,
|
|
248
|
+
title,
|
|
249
|
+
context,
|
|
250
|
+
exc,
|
|
251
|
+
log=False,
|
|
252
|
+
log_category=log_category,
|
|
225
253
|
)
|
|
226
254
|
|
|
227
255
|
def warning(
|
|
@@ -231,9 +259,19 @@ class SourceReport(ExamplesReport):
|
|
|
231
259
|
title: Optional[LiteralString] = None,
|
|
232
260
|
exc: Optional[BaseException] = None,
|
|
233
261
|
log: bool = True,
|
|
262
|
+
log_category: Optional[StructuredLogCategory] = None,
|
|
234
263
|
) -> None:
|
|
264
|
+
"""
|
|
265
|
+
See docs of StructuredLogs.report_log for details of args
|
|
266
|
+
"""
|
|
235
267
|
self._structured_logs.report_log(
|
|
236
|
-
StructuredLogLevel.WARN,
|
|
268
|
+
StructuredLogLevel.WARN,
|
|
269
|
+
message,
|
|
270
|
+
title,
|
|
271
|
+
context,
|
|
272
|
+
exc,
|
|
273
|
+
log=log,
|
|
274
|
+
log_category=log_category,
|
|
237
275
|
)
|
|
238
276
|
|
|
239
277
|
def report_failure(
|
|
@@ -243,9 +281,19 @@ class SourceReport(ExamplesReport):
|
|
|
243
281
|
title: Optional[LiteralString] = None,
|
|
244
282
|
exc: Optional[BaseException] = None,
|
|
245
283
|
log: bool = True,
|
|
284
|
+
log_category: Optional[StructuredLogCategory] = None,
|
|
246
285
|
) -> None:
|
|
286
|
+
"""
|
|
287
|
+
See docs of StructuredLogs.report_log for details of args
|
|
288
|
+
"""
|
|
247
289
|
self._structured_logs.report_log(
|
|
248
|
-
StructuredLogLevel.ERROR,
|
|
290
|
+
StructuredLogLevel.ERROR,
|
|
291
|
+
message,
|
|
292
|
+
title,
|
|
293
|
+
context,
|
|
294
|
+
exc,
|
|
295
|
+
log=log,
|
|
296
|
+
log_category=log_category,
|
|
249
297
|
)
|
|
250
298
|
|
|
251
299
|
def failure(
|
|
@@ -255,9 +303,19 @@ class SourceReport(ExamplesReport):
|
|
|
255
303
|
title: Optional[LiteralString] = None,
|
|
256
304
|
exc: Optional[BaseException] = None,
|
|
257
305
|
log: bool = True,
|
|
306
|
+
log_category: Optional[StructuredLogCategory] = None,
|
|
258
307
|
) -> None:
|
|
308
|
+
"""
|
|
309
|
+
See docs of StructuredLogs.report_log for details of args
|
|
310
|
+
"""
|
|
259
311
|
self._structured_logs.report_log(
|
|
260
|
-
StructuredLogLevel.ERROR,
|
|
312
|
+
StructuredLogLevel.ERROR,
|
|
313
|
+
message,
|
|
314
|
+
title,
|
|
315
|
+
context,
|
|
316
|
+
exc,
|
|
317
|
+
log=log,
|
|
318
|
+
log_category=log_category,
|
|
261
319
|
)
|
|
262
320
|
|
|
263
321
|
def info(
|
|
@@ -267,9 +325,19 @@ class SourceReport(ExamplesReport):
|
|
|
267
325
|
title: Optional[LiteralString] = None,
|
|
268
326
|
exc: Optional[BaseException] = None,
|
|
269
327
|
log: bool = True,
|
|
328
|
+
log_category: Optional[StructuredLogCategory] = None,
|
|
270
329
|
) -> None:
|
|
330
|
+
"""
|
|
331
|
+
See docs of StructuredLogs.report_log for details of args
|
|
332
|
+
"""
|
|
271
333
|
self._structured_logs.report_log(
|
|
272
|
-
StructuredLogLevel.INFO,
|
|
334
|
+
StructuredLogLevel.INFO,
|
|
335
|
+
message,
|
|
336
|
+
title,
|
|
337
|
+
context,
|
|
338
|
+
exc,
|
|
339
|
+
log=log,
|
|
340
|
+
log_category=log_category,
|
|
273
341
|
)
|
|
274
342
|
|
|
275
343
|
@contextlib.contextmanager
|
|
@@ -279,6 +347,7 @@ class SourceReport(ExamplesReport):
|
|
|
279
347
|
title: Optional[LiteralString] = None,
|
|
280
348
|
context: Optional[str] = None,
|
|
281
349
|
level: StructuredLogLevel = StructuredLogLevel.ERROR,
|
|
350
|
+
log_category: Optional[StructuredLogCategory] = None,
|
|
282
351
|
) -> Iterator[None]:
|
|
283
352
|
# Convenience method that helps avoid boilerplate try/except blocks.
|
|
284
353
|
# TODO: I'm not super happy with the naming here - it's not obvious that this
|
|
@@ -287,7 +356,12 @@ class SourceReport(ExamplesReport):
|
|
|
287
356
|
yield
|
|
288
357
|
except Exception as exc:
|
|
289
358
|
self._structured_logs.report_log(
|
|
290
|
-
level,
|
|
359
|
+
level,
|
|
360
|
+
message=message,
|
|
361
|
+
title=title,
|
|
362
|
+
context=context,
|
|
363
|
+
exc=exc,
|
|
364
|
+
log_category=log_category,
|
|
291
365
|
)
|
|
292
366
|
|
|
293
367
|
def __post_init__(self) -> None:
|